Merge tag 'pinctrl-v4.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl Pull pin control updates from Linus Walleij: "An almost purely driver related set of changes with no major changes to the framework, only one patch adding an unlocked version of the pinctrl_find_gpio_range_from_pin() library call. New drivers: - ST Microelectronics STM32 MCU support: this is a non-MMU low-end platform for IoT things (etc). - Microchip PIC32 MCU support: same story as for STM32. New subdrivers: - Allwinner SunXi H3 R_PIO controller support. - Qualcomm IPQ4019 support. - MediaTek MT2701 and MT7623. - Allwinner A64 Non-critical fixes: - gpio_disable_free() for the Vybrid. - pinctrl single: use a separate lockdep class. Misc: - Substantial cleanups and rewrites for the Super-H PFC driver and subdrivers. - Various fixes and cleanups, especially Paul Gortmakers work to make nonmodular drivers nonmodular" * tag 'pinctrl-v4.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl: (75 commits) pinctrl: single: Use a separate lockdep class drivers: pinctrl: add driver for Allwinner A64 SoC pinctrl: Broadcom Northstar2 pinctrl device tree bindings pinctrl: amlogic: Make driver independent from two-domain configuration pinctrl: amlogic: Separate some pin functions for Meson8 / Meson8b pinctrl: at91: use __maybe_unused to hide pm functions pinctrl: sh-pfc: core: don't open code of_device_get_match_data() pinctrl: uniphier: rename CONFIG options and file names pinctrl: sunxi: make A80 explicitly non-modular pinctrl: stm32: make explicitly non-modular pinctrl: sh-pfc: make explicitly non-modular pinctrl: meson: make explicitly non-modular pinctrl: pinctrl-mt6397 driver explicitly non-modular pinctrl: sunxi: does not need module.h pinctrl: pxa2xx: export symbols pinctrl: sunxi: Change mux setting on PI irq pins pinctrl: sunxi: Remove non existing irq's pinctrl: imx: attach iomuxc device to gpr syscon pinctrl-bcm2835: Fix cut-and-paste error in "pull" parsing pinctrl: lpc1850-scu: document nxp,gpio-pin-interrupt ...

commit: 5ca5446ec5ba5e79a6f271cd026bb153d6850fcc [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Tue Mar 15 20:23:13 2016 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Tue Mar 15 20:23:13 2016 -0700
tree: ba6b9a309d5f8730a01002db389e05ea7f784f9c
parent: 710d60cbf1b312a8075a2158cbfbbd9c66132dcc [diff]
parent: 3c177a166253653bf9c377eb28a5155ea2d9b631 [diff]
diff --git a/Documentation/DocBook/media/v4l/media-types.xml b/Documentation/DocBook/media/v4l/media-types.xml
index 1af3842..0ee0f33 100644
--- a/Documentation/DocBook/media/v4l/media-types.xml
+++ b/Documentation/DocBook/media/v4l/media-types.xml

@@ -57,10 +57,6 @@
 	    <entry>Connector for a RGB composite signal.</entry>
 	  </row>
 	  <row>
-	    <entry><constant>MEDIA_ENT_F_CONN_TEST</constant></entry>
-	    <entry>Connector for a test generator.</entry>
-	  </row>
-	  <row>
 	    <entry><constant>MEDIA_ENT_F_CAM_SENSOR</constant></entry>
 	    <entry>Camera video sensor entity.</entry>
 	  </row>

diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index a2bd593..66422d6 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt

@@ -23,6 +23,7 @@
   during suspend.
 - ti,no-reset-on-init: When present, the module should not be reset at init
 - ti,no-idle-on-init: When present, the module should not be idled at init
+- ti,no-idle: When present, the module is never allowed to idle.
 
 Example:
 

diff --git a/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt b/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt
new file mode 100644
index 0000000..f6f1c14
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/al,alpine-msix.txt

@@ -0,0 +1,26 @@
+Alpine MSIX controller
+
+See arm,gic-v3.txt for SPI and MSI definitions.
+
+Required properties:
+
+- compatible: should be "al,alpine-msix"
+- reg: physical base address and size of the registers
+- interrupt-parent: specifies the parent interrupt controller.
+- interrupt-controller: identifies the node as an interrupt controller
+- msi-controller: identifies the node as an PCI Message Signaled Interrupt
+		  controller
+- al,msi-base-spi: SPI base of the MSI frame
+- al,msi-num-spis: number of SPIs assigned to the MSI frame, relative to SPI0
+
+Example:
+
+msix: msix {
+	compatible = "al,alpine-msix";
+	reg = <0x0 0xfbe00000 0x0 0x100000>;
+	interrupt-parent = <&gic>;
+	interrupt-controller;
+	msi-controller;
+	al,msi-base-spi = <160>;
+	al,msi-num-spis = <160>;
+};

diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
index 5a1cb4b..793c20f 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt

@@ -16,6 +16,7 @@
 	"arm,cortex-a15-gic"
 	"arm,cortex-a7-gic"
 	"arm,cortex-a9-gic"
+	"arm,eb11mp-gic"
 	"arm,gic-400"
 	"arm,pl390"
 	"arm,tc11mp-gic"

diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
new file mode 100644
index 0000000..8af0a8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt

@@ -0,0 +1,44 @@
+
+* Marvell ODMI for MSI support
+
+Some Marvell SoCs have an On-Die Message Interrupt (ODMI) controller
+which can be used by on-board peripheral for MSI interrupts.
+
+Required properties:
+
+- compatible           : The value here should contain:
+
+    "marvell,ap806-odmi-controller", "marvell,odmi-controller".
+
+- interrupt,controller : Identifies the node as an interrupt controller.
+
+- msi-controller       : Identifies the node as an MSI controller.
+
+- marvell,odmi-frames  : Number of ODMI frames available. Each frame
+                         provides a number of events.
+
+- reg                  : List of register definitions, one for each
+                         ODMI frame.
+
+- marvell,spi-base     : List of GIC base SPI interrupts, one for each
+                         ODMI frame. Those SPI interrupts are 0-based,
+                         i.e marvell,spi-base = <128> will use SPI #96.
+                         See Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
+                         for details about the GIC Device Tree binding.
+
+- interrupt-parent     : Reference to the parent interrupt controller.
+
+Example:
+
+	odmi: odmi@300000 {
+		compatible = "marvell,ap806-odm-controller",
+			     "marvell,odmi-controller";
+		interrupt-controller;
+		msi-controller;
+		marvell,odmi-frames = <4>;
+		reg = <0x300000 0x4000>,
+		      <0x304000 0x4000>,
+		      <0x308000 0x4000>,
+		      <0x30C000 0x4000>;
+		marvell,spi-base = <128>, <136>, <144>, <152>;
+	};

diff --git a/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt b/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt
index aae4c38..1735953 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt

@@ -23,6 +23,12 @@
 - mti,reserved-cpu-vectors : Specifies the list of CPU interrupt vectors
   to which the GIC may not route interrupts.  Valid values are 2 - 7.
   This property is ignored if the CPU is started in EIC mode.
+- mti,reserved-ipi-vectors : Specifies the range of GIC interrupts that are
+  reserved for IPIs.
+  It accepts 2 values, the 1st is the starting interrupt and the 2nd is the size
+  of the reserved range.
+  If not specified, the driver will allocate the last 2 * number of VPEs in the
+  system.
 
 Required properties for timer sub-node:
 - compatible : Should be "mti,gic-timer".
@@ -44,6 +50,7 @@
 		#interrupt-cells = <3>;
 
 		mti,reserved-cpu-vectors = <7>;
+		mti,reserved-ipi-vectors = <40 8>;
 
 		timer {
 			compatible = "mti,gic-timer";

diff --git a/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt
new file mode 100644
index 0000000..1f441fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/sigma,smp8642-intc.txt

@@ -0,0 +1,49 @@
+Sigma Designs SMP86xx/SMP87xx secondary interrupt controller
+
+Required properties:
+- compatible: should be "sigma,smp8642-intc"
+- reg: physical address of MMIO region
+- ranges: address space mapping of child nodes
+- interrupt-parent: phandle of parent interrupt controller
+- interrupt-controller: boolean
+- #address-cells: should be <1>
+- #size-cells: should be <1>
+
+One child node per control block with properties:
+- reg: address of registers for this control block
+- interrupt-controller: boolean
+- #interrupt-cells: should be <2>, interrupt index and flags per interrupts.txt
+- interrupts: interrupt spec of primary interrupt controller
+
+Example:
+
+interrupt-controller@6e000 {
+	compatible = "sigma,smp8642-intc";
+	reg = <0x6e000 0x400>;
+	ranges = <0x0 0x6e000 0x400>;
+	interrupt-parent = <&gic>;
+	interrupt-controller;
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	irq0: interrupt-controller@0 {
+		reg = <0x000 0x100>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+		interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	irq1: interrupt-controller@100 {
+		reg = <0x100 0x100>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+		interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	irq2: interrupt-controller@300 {
+		reg = <0x300 0x100>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+		interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
+	};
+};

diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index 81a9f9e..c8ac222 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt

@@ -82,8 +82,8 @@
 				  "ch16", "ch17", "ch18", "ch19",
 				  "ch20", "ch21", "ch22", "ch23",
 				  "ch24";
-		clocks = <&mstp8_clks R8A7795_CLK_ETHERAVB>;
-		power-domains = <&cpg_clocks>;
+		clocks = <&cpg CPG_MOD 812>;
+		power-domains = <&cpg>;
 		phy-mode = "rgmii-id";
 		phy-handle = <&phy0>;
 

diff --git a/Documentation/devicetree/bindings/regulator/tps65217.txt b/Documentation/devicetree/bindings/regulator/tps65217.txt
index d181096..4f05d20 100644
--- a/Documentation/devicetree/bindings/regulator/tps65217.txt
+++ b/Documentation/devicetree/bindings/regulator/tps65217.txt

@@ -26,11 +26,7 @@
 		ti,pmic-shutdown-controller;
 
 		regulators {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
 			dcdc1_reg: dcdc1 {
-				reg = <0>;
 				regulator-min-microvolt = <900000>;
 				regulator-max-microvolt = <1800000>;
 				regulator-boot-on;
@@ -38,7 +34,6 @@
 			};
 
 			dcdc2_reg: dcdc2 {
-				reg = <1>;
 				regulator-min-microvolt = <900000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
@@ -46,7 +41,6 @@
 			};
 
 			dcdc3_reg: dcc3 {
-				reg = <2>;
 				regulator-min-microvolt = <900000>;
 				regulator-max-microvolt = <1500000>;
 				regulator-boot-on;
@@ -54,7 +48,6 @@
 			};
 
 			ldo1_reg: ldo1 {
-				reg = <3>;
 				regulator-min-microvolt = <1000000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
@@ -62,7 +55,6 @@
 			};
 
 			ldo2_reg: ldo2 {
-				reg = <4>;
 				regulator-min-microvolt = <900000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
@@ -70,7 +62,6 @@
 			};
 
 			ldo3_reg: ldo3 {
-				reg = <5>;
 				regulator-min-microvolt = <1800000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;
@@ -78,7 +69,6 @@
 			};
 
 			ldo4_reg: ldo4 {
-				reg = <6>;
 				regulator-min-microvolt = <1800000>;
 				regulator-max-microvolt = <3300000>;
 				regulator-boot-on;

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9a53c92..4d9ca7d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -666,7 +666,7 @@
 
 	clearcpuid=BITNUM [X86]
 			Disable CPUID feature X for the kernel. See
-			arch/x86/include/asm/cpufeature.h for the valid bit
+			arch/x86/include/asm/cpufeatures.h for the valid bit
 			numbers. Note the Linux specific bits are not necessarily
 			stable over kernel options, but the vendor specific
 			ones should be.
@@ -1687,6 +1687,15 @@
 	ip=		[IP_PNP]
 			See Documentation/filesystems/nfs/nfsroot.txt.
 
+	irqaffinity=	[SMP] Set the default irq affinity mask
+			Format:
+			<cpu number>,...,<cpu number>
+			or
+			<cpu number>-<cpu number>
+			(must be a positive range in ascending order)
+			or a mixture
+			<cpu number>,...,<cpu number>-<cpu number>
+
 	irqfixup	[HW]
 			When an interrupt is not handled search all handlers
 			for it. Intended to get systems with badly broken
@@ -2566,6 +2575,8 @@
 
 	nointroute	[IA-64]
 
+	noinvpcid	[X86] Disable the INVPCID cpu feature.
+
 	nojitter	[IA-64] Disables jitter checking for ITC timers.
 
 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
@@ -3491,6 +3502,10 @@
 
 	ro		[KNL] Mount root device read-only on boot
 
+	rodata=		[KNL]
+		on	Mark read-only kernel memory as read-only (default).
+		off	Leave read-only kernel memory writable for debugging.
+
 	root=		[KNL] Root filesystem
 			See name_to_dev_t comment in init/do_mounts.c.
 
@@ -3528,6 +3543,11 @@
 
 	sched_debug	[KNL] Enables verbose scheduler debug messages.
 
+	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
+			Allowed values are enable and disable. This feature
+			incurs a small amount of overhead in the scheduler
+			but is useful for debugging and performance tuning.
+
 	skew_tick=	[KNL] Offset the periodic timer tick per cpu to mitigate
 			xtime_lock contention on larger systems, and/or RCU lock
 			contention on all systems with CONFIG_MAXSMP set.

diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c
index 6c6247a..d99012f 100644
--- a/Documentation/ptp/testptp.c
+++ b/Documentation/ptp/testptp.c

@@ -277,13 +277,15 @@
 			       "  %d external time stamp channels\n"
 			       "  %d programmable periodic signals\n"
 			       "  %d pulse per second\n"
-			       "  %d programmable pins\n",
+			       "  %d programmable pins\n"
+			       "  %d cross timestamping\n",
 			       caps.max_adj,
 			       caps.n_alarm,
 			       caps.n_ext_ts,
 			       caps.n_per_out,
 			       caps.pps,
-			       caps.n_pins);
+			       caps.n_pins,
+			       caps.cross_timestamping);
 		}
 	}
 

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a93b414..f4444c9 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt

@@ -58,6 +58,8 @@
 - panic_on_stackoverflow
 - panic_on_unrecovered_nmi
 - panic_on_warn
+- perf_cpu_time_max_percent
+- perf_event_paranoid
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -639,6 +641,17 @@
 
 ==============================================================
 
+perf_event_paranoid:
+
+Controls use of the performance events system by unprivileged
+users (without CAP_SYS_ADMIN).  The default value is 1.
+
+ -1: Allow use of (almost) all events by all users
+>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
+>=1: Disallow CPU event access by users without CAP_SYS_ADMIN
+>=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+
+==============================================================
 
 pid_max:
 
@@ -760,6 +773,14 @@
 
 ==============================================================
 
+sched_schedstats:
+
+Enables/disables scheduler statistics. Enabling this feature
+incurs a small amount of overhead in the scheduler but is
+useful for debugging and performance tuning.
+
+==============================================================
+
 sg-big-buff:
 
 This file shows the size of the generic SCSI (sg) buffer.

diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index daf9c0f..c817310 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt

@@ -358,7 +358,8 @@
 - if CR4.SMEP is enabled: since we've turned the page into a kernel page,
   the kernel may now execute it.  We handle this by also setting spte.nx.
   If we get a user fetch or read fault, we'll change spte.u=1 and
-  spte.nx=gpte.nx back.
+  spte.nx=gpte.nx back.  For this to work, KVM forces EFER.NX to 1 when
+  shadow paging is in use.
 - if CR4.SMAP is disabled: since the page has been changed to a kernel
   page, it can not be reused when CR4.SMAP is enabled. We set
   CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,

diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 9f9ec9f..4e4b6f1 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt

@@ -400,3 +400,7 @@
 nowayout: Watchdog cannot be stopped once started
 	(default=kernel config parameter)
 -------------------------------------------------
+sun4v_wdt:
+timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000)
+nowayout: Watchdog cannot be stopped once started
+-------------------------------------------------

diff --git a/Documentation/x86/early-microcode.txt b/Documentation/x86/early-microcode.txt
index d62bea6..c956d99 100644
--- a/Documentation/x86/early-microcode.txt
+++ b/Documentation/x86/early-microcode.txt

@@ -40,3 +40,28 @@
 find . | cpio -o -H newc >../ucode.cpio
 cd ..
 cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img
+
+Builtin microcode
+=================
+
+We can also load builtin microcode supplied through the regular firmware
+builtin method CONFIG_FIRMWARE_IN_KERNEL. Here's an example:
+
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin"
+CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware"
+
+This basically means, you have the following tree structure locally:
+
+/lib/firmware/
+|-- amd-ucode
+...
+|   |-- microcode_amd_fam15h.bin
+...
+|-- intel-ucode
+...
+|   |-- 06-3a-09
+...
+
+so that the build system can find those files and integrate them into
+the final kernel image. The early loader finds them and applies them.

diff --git a/Documentation/x86/exception-tables.txt b/Documentation/x86/exception-tables.txt
index 32901aa..e396bcd 100644
--- a/Documentation/x86/exception-tables.txt
+++ b/Documentation/x86/exception-tables.txt

@@ -290,3 +290,38 @@
 only use exceptions for code in the .text section.  Any other section
 will cause the exception table to not be sorted correctly, and the
 exceptions will fail.
+
+Things changed when 64-bit support was added to x86 Linux. Rather than
+double the size of the exception table by expanding the two entries
+from 32-bits to 64 bits, a clever trick was used to store addresses
+as relative offsets from the table itself. The assembly code changed
+from:
+	.long 1b,3b
+to:
+        .long (from) - .
+        .long (to) - .
+
+and the C-code that uses these values converts back to absolute addresses
+like this:
+
+	ex_insn_addr(const struct exception_table_entry *x)
+	{
+		return (unsigned long)&x->insn + x->insn;
+	}
+
+In v4.6 the exception table entry was expanded with a new field "handler".
+This is also 32-bits wide and contains a third relative function
+pointer which points to one of:
+
+1) int ex_handler_default(const struct exception_table_entry *fixup)
+   This is legacy case that just jumps to the fixup code
+2) int ex_handler_fault(const struct exception_table_entry *fixup)
+   This case provides the fault number of the trap that occurred at
+   entry->insn. It is used to distinguish page faults from machine
+   check.
+3) int ex_handler_ext(const struct exception_table_entry *fixup)
+   This case is used for uaccess_err ... we need to set a flag
+   in the task structure. Before the handler functions existed this
+   case was handled by adding a large offset to the fixup to tag
+   it as special.
+More functions can easily be added.

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 68ed311..0965a71 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt

@@ -60,6 +60,8 @@
 		threshold to 1. Enabling this may make memory predictive failure
 		analysis less effective if the bios sets thresholds for memory
 		errors since we will not see details for all errors.
+   mce=recovery
+		Force-enable recoverable machine check code paths
 
    nomce (for compatibility with i386): same as mce=off
 

diff --git a/MAINTAINERS b/MAINTAINERS
index 4978dc1..57adf39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -920,17 +920,24 @@
 S:	Maintained
 F:	drivers/clk/sunxi/
 
-ARM/Amlogic MesonX SoC support
+ARM/Amlogic Meson SoC support
 M:	Carlo Caione <carlo@caione.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-meson@googlegroups.com
+W:	http://linux-meson.com/
 S:	Maintained
-F:	drivers/media/rc/meson-ir.c
-N:	meson[x68]
+F:	arch/arm/mach-meson/
+F:	arch/arm/boot/dts/meson*
+N:	meson
 
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:	Tsahee Zidenberg <tsahee@annapurnalabs.com>
+M:	Antoine Tenart <antoine.tenart@free-electrons.com>
 S:	Maintained
 F:	arch/arm/mach-alpine/
+F:	arch/arm/boot/dts/alpine*
+F:	arch/arm64/boot/dts/al/
+F:	drivers/*/*alpine*
 
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:	Nicolas Ferre <nicolas.ferre@atmel.com>
@@ -2415,6 +2422,7 @@
 F:	arch/mips/include/asm/mach-bmips/*
 F:	arch/mips/kernel/*bmips*
 F:	arch/mips/boot/dts/brcm/bcm*.dts*
+F:	drivers/irqchip/irq-bcm63*
 F:	drivers/irqchip/irq-bcm7*
 F:	drivers/irqchip/irq-brcmstb*
 F:	include/linux/bcm963xx_nvram.h
@@ -3444,7 +3452,6 @@
 DESIGNWARE USB3 DRD IP DRIVER
 M:	Felipe Balbi <balbi@kernel.org>
 L:	linux-usb@vger.kernel.org
-L:	linux-omap@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:	Maintained
 F:	drivers/usb/dwc3/
@@ -4512,6 +4519,12 @@
 S:	Maintained
 F:	drivers/dma/fsldma.*
 
+FREESCALE GPMI NAND DRIVER
+M:	Han Xu <han.xu@nxp.com>
+L:	linux-mtd@lists.infradead.org
+S:	Maintained
+F:	drivers/mtd/nand/gpmi-nand/*
+
 FREESCALE I2C CPM DRIVER
 M:	Jochen Friedrich <jochen@scram.de>
 L:	linuxppc-dev@lists.ozlabs.org
@@ -4528,7 +4541,7 @@
 F:	drivers/video/fbdev/imxfb.c
 
 FREESCALE QUAD SPI DRIVER
-M:	Han Xu <han.xu@freescale.com>
+M:	Han Xu <han.xu@nxp.com>
 L:	linux-mtd@lists.infradead.org
 S:	Maintained
 F:	drivers/mtd/spi-nor/fsl-quadspi.c
@@ -4542,6 +4555,15 @@
 F:	drivers/net/ethernet/freescale/fs_enet/
 F:	include/linux/fs_enet_pd.h
 
+FREESCALE IMX / MXC FEC DRIVER
+M:	Fugang Duan <fugang.duan@nxp.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/freescale/fec_main.c
+F:	drivers/net/ethernet/freescale/fec_ptp.c
+F:	drivers/net/ethernet/freescale/fec.h
+F:	Documentation/devicetree/bindings/net/fsl-fec.txt
+
 FREESCALE QUICC ENGINE LIBRARY
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Orphan
@@ -6758,6 +6780,7 @@
 F:	Documentation/networking/mac80211-injection.txt
 F:	include/net/mac80211.h
 F:	net/mac80211/
+F:	drivers/net/wireless/mac80211_hwsim.[ch]
 
 MACVLAN DRIVER
 M:	Patrick McHardy <kaber@trash.net>
@@ -7354,7 +7377,7 @@
 F:	include/linux/isicom.h
 
 MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
-M:	Felipe Balbi <balbi@kernel.org>
+M:	Bin Liu <b-liu@ti.com>
 L:	linux-usb@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:	Maintained
@@ -7377,6 +7400,17 @@
 S:	Supported
 F:	drivers/net/ethernet/myricom/myri10ge/
 
+NAND FLASH SUBSYSTEM
+M:	Boris Brezillon <boris.brezillon@free-electrons.com>
+R:	Richard Weinberger <richard@nod.at>
+L:	linux-mtd@lists.infradead.org
+W:	http://www.linux-mtd.infradead.org/
+Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
+T:	git git://github.com/linux-nand/linux.git
+S:	Maintained
+F:	drivers/mtd/nand/
+F:	include/linux/mtd/nand*.h
+
 NATSEMI ETHERNET DRIVER (DP8381x)
 S:	Orphan
 F:	drivers/net/ethernet/natsemi/natsemi.c
@@ -7686,13 +7720,13 @@
 F:	arch/nios2/
 
 NOKIA N900 POWER SUPPLY DRIVERS
-M:	Pali Rohár <pali.rohar@gmail.com>
-S:	Maintained
+R:	Pali Rohár <pali.rohar@gmail.com>
 F:	include/linux/power/bq2415x_charger.h
 F:	include/linux/power/bq27xxx_battery.h
 F:	include/linux/power/isp1704_charger.h
 F:	drivers/power/bq2415x_charger.c
 F:	drivers/power/bq27xxx_battery.c
+F:	drivers/power/bq27xxx_battery_i2c.c
 F:	drivers/power/isp1704_charger.c
 F:	drivers/power/rx51_battery.c
 
@@ -7923,11 +7957,9 @@
 F:	drivers/staging/media/omap4iss/
 
 OMAP USB SUPPORT
-M:	Felipe Balbi <balbi@kernel.org>
 L:	linux-usb@vger.kernel.org
 L:	linux-omap@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
-S:	Maintained
+S:	Orphan
 F:	drivers/usb/*/*omap*
 F:	arch/arm/*omap*/usb*
 
@@ -8444,6 +8476,7 @@
 M:	Peter Zijlstra <peterz@infradead.org>
 M:	Ingo Molnar <mingo@redhat.com>
 M:	Arnaldo Carvalho de Melo <acme@kernel.org>
+R:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
 S:	Supported
@@ -9558,6 +9591,12 @@
 S:	Maintained
 F:	drivers/thunderbolt/
 
+TI BQ27XXX POWER SUPPLY DRIVER
+R:	Andrew F. Davis <afd@ti.com>
+F:	include/linux/power/bq27xxx_battery.h
+F:	drivers/power/bq27xxx_battery.c
+F:	drivers/power/bq27xxx_battery_i2c.c
+
 TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
 M:	John Stultz <john.stultz@linaro.org>
 M:	Thomas Gleixner <tglx@linutronix.de>
@@ -12013,7 +12052,6 @@
 F:	arch/arm64/include/asm/xen/
 
 XEN NETWORK BACKEND DRIVER
-M:	Ian Campbell <ian.campbell@citrix.com>
 M:	Wei Liu <wei.liu2@citrix.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 L:	netdev@vger.kernel.org

diff --git a/Makefile b/Makefile
index fbe1b92..7b3ecdc 100644
--- a/Makefile
+++ b/Makefile

@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION =
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 2f24447f..46bf263 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c

@@ -168,7 +168,7 @@
 	      cpuid, current, current->active_mm));
 
 	preempt_disable();
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 /* Wait until hwrpb->txrdy is clear for cpu.  Return -1 on timeout.  */

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 0655495..8a188bc 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig

@@ -12,8 +12,6 @@
 	select BUILDTIME_EXTABLE_SORT
 	select COMMON_CLK
 	select CLONE_BACKWARDS
-	# ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev
-	select DEVTMPFS if !INITRAMFS_SOURCE=""
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_FIND_FIRST_BIT
@@ -275,14 +273,6 @@
 	default "0xA0000000"
 	depends on ARC_HAS_DCCM
 
-config ARC_HAS_HW_MPY
-	bool "Use Hardware Multiplier (Normal or Faster XMAC)"
-	default y
-	help
-	  Influences how gcc generates code for MPY operations.
-	  If enabled, MPYxx insns are generated, provided by Standard/XMAC
-	  Multipler. Otherwise software multipy lib is used
-
 choice
 	prompt "MMU Version"
 	default ARC_MMU_V3 if ARC_CPU_770
@@ -542,14 +532,6 @@
 	  Counts number of I and D TLB Misses and exports them via Debugfs
 	  The counters can be cleared via Debugfs as well
 
-if SMP
-
-config ARC_IPI_DBG
-	bool "Debug Inter Core interrupts"
-	default n
-
-endif
-
 endif
 
 config ARC_UBOOT_SUPPORT

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index aeb1902..c8230f3 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile

@@ -74,10 +74,6 @@
 # --build-id w/o "-marclinux". Default arc-elf32-ld is OK
 ldflags-$(upto_gcc44)			+= -marclinux
 
-ifndef CONFIG_ARC_HAS_HW_MPY
-	cflags-y	+= -mno-mpy
-endif
-
 LIBGCC	:= $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
 
 # Modules with short calls might break for calls into builtin-kernel

diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index f1ac981..5d4e2a0 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig

@@ -39,6 +39,7 @@
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -73,7 +74,6 @@
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
@@ -91,12 +91,10 @@
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y

diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 323486d..87ee46b 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig

@@ -39,14 +39,10 @@
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_AXS=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
@@ -78,14 +74,12 @@
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
-CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
@@ -97,12 +91,10 @@
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y

diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 66191cd..d80daf4 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig

@@ -40,14 +40,10 @@
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_AXS=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_NETDEVICES=y
@@ -79,14 +75,12 @@
 CONFIG_I2C_DESIGNWARE_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
 # CONFIG_LOGO_LINUX_CLUT224 is not set
-CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
@@ -98,12 +92,10 @@
 CONFIG_MMC_DW=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y

diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 138f9d8..f410953 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig

@@ -4,6 +4,7 @@
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -26,7 +27,6 @@
 CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -34,6 +34,7 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -51,7 +52,6 @@
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -63,4 +63,3 @@
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_DEBUG_PREEMPT is not set
-CONFIG_XZ_DEC=y

diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index f68838e..cfaa33c 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig

@@ -35,6 +35,7 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -49,7 +50,6 @@
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -61,4 +61,3 @@
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_DEBUG_PREEMPT is not set
-CONFIG_XZ_DEC=y

diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 96bd1c2..bb2a8dc 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig

@@ -2,6 +2,7 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -21,13 +22,11 @@
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARC_PLAT_SIM=y
-CONFIG_ARC_BOARD_ML509=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
 CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -35,6 +34,7 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -49,7 +49,6 @@
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -60,4 +59,3 @@
 CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y

diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 31e1d95..646182e 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig

@@ -33,6 +33,7 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -58,7 +59,6 @@
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set

diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index fcae666..ceca254 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig

@@ -34,12 +34,12 @@
 CONFIG_NET_KEY=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_OSCI_LAN=y
 CONFIG_INPUT_EVDEV=y
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
@@ -58,7 +58,6 @@
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set

diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index b01b659..4b6da90 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig

@@ -2,6 +2,7 @@
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
@@ -18,15 +19,11 @@
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARC_PLAT_SIM=y
-CONFIG_ARC_BOARD_ML509=y
 CONFIG_ISA_ARCV2=y
 CONFIG_SMP=y
-CONFIG_ARC_HAS_LL64=y
-# CONFIG_ARC_HAS_RTSC is not set
 CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
 CONFIG_PREEMPT=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
@@ -40,6 +37,7 @@
 # CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -56,14 +54,11 @@
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
-CONFIG_NET_OSCI_LAN=y
 # CONFIG_WLAN is not set
 CONFIG_INPUT_EVDEV=y
 CONFIG_MOUSE_PS2_TOUCHKIT=y
 # CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_LIBPS2=y
 CONFIG_SERIO_ARC_PS2=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
@@ -75,9 +70,6 @@
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
-CONFIG_ARCPGU_RGB888=y
-CONFIG_ARCPGU_DISPTYPE=0
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set

diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 3b4dc9c..9b342ea 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig

@@ -3,6 +3,7 @@
 CONFIG_DEFAULT_HOSTNAME="tb10x"
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -26,12 +27,10 @@
 # CONFIG_BLOCK is not set
 CONFIG_ARC_PLAT_TB10X=y
 CONFIG_ARC_CACHE_LINE_SHIFT=5
-CONFIG_ARC_STACK_NONEXEC=y
 CONFIG_HZ=250
 CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
 CONFIG_PREEMPT_VOLUNTARY=y
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -44,8 +43,8 @@
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_PROC_DEVICETREE=y
 CONFIG_NETDEVICES=y
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -55,9 +54,6 @@
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 CONFIG_STMMAC_ETH=y
-CONFIG_STMMAC_DEBUG_FS=y
-CONFIG_STMMAC_DA=y
-CONFIG_STMMAC_CHAINED=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_WLAN is not set
 # CONFIG_INPUT is not set
@@ -91,7 +87,6 @@
 CONFIG_LEDS_TRIGGER_TRANSIENT=y
 CONFIG_DMADEVICES=y
 CONFIG_DW_DMAC=y
-CONFIG_NET_DMA=y
 CONFIG_ASYNC_TX_DMA=y
 # CONFIG_IOMMU_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
@@ -100,17 +95,16 @@
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_MAGIC_SYSRQ=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
 CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set

diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index fdc5be5..f9f4c6f 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h

@@ -10,7 +10,8 @@
 #define _ASM_ARC_ARCREGS_H
 
 /* Build Configuration Registers */
-#define ARC_REG_DCCMBASE_BCR	0x61	/* DCCM Base Addr */
+#define ARC_REG_AUX_DCCM	0x18	/* DCCM Base Addr ARCv2 */
+#define ARC_REG_DCCM_BASE_BUILD	0x61	/* DCCM Base Addr ARCompact */
 #define ARC_REG_CRC_BCR		0x62
 #define ARC_REG_VECBASE_BCR	0x68
 #define ARC_REG_PERIBASE_BCR	0x69
@@ -18,10 +19,10 @@
 #define ARC_REG_DPFP_BCR	0x6C	/* ARCompact: Dbl Precision FPU */
 #define ARC_REG_FP_V2_BCR	0xc8	/* ARCv2 FPU */
 #define ARC_REG_SLC_BCR		0xce
-#define ARC_REG_DCCM_BCR	0x74	/* DCCM Present + SZ */
+#define ARC_REG_DCCM_BUILD	0x74	/* DCCM size (common) */
 #define ARC_REG_TIMERS_BCR	0x75
 #define ARC_REG_AP_BCR		0x76
-#define ARC_REG_ICCM_BCR	0x78
+#define ARC_REG_ICCM_BUILD	0x78	/* ICCM size (common) */
 #define ARC_REG_XY_MEM_BCR	0x79
 #define ARC_REG_MAC_BCR		0x7a
 #define ARC_REG_MUL_BCR		0x7b
@@ -36,6 +37,7 @@
 #define ARC_REG_IRQ_BCR		0xF3
 #define ARC_REG_SMART_BCR	0xFF
 #define ARC_REG_CLUSTER_BCR	0xcf
+#define ARC_REG_AUX_ICCM	0x208	/* ICCM Base Addr (ARCv2) */
 
 /* status32 Bits Positions */
 #define STATUS_AE_BIT		5	/* Exception active */
@@ -246,7 +248,7 @@
 #endif
 };
 
-struct bcr_iccm {
+struct bcr_iccm_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int base:16, pad:5, sz:3, ver:8;
 #else
@@ -254,17 +256,15 @@
 #endif
 };
 
-/* DCCM Base Address Register: ARC_REG_DCCMBASE_BCR */
-struct bcr_dccm_base {
+struct bcr_iccm_arcv2 {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int addr:24, ver:8;
+	unsigned int pad:8, sz11:4, sz01:4, sz10:4, sz00:4, ver:8;
 #else
-	unsigned int ver:8, addr:24;
+	unsigned int ver:8, sz00:4, sz10:4, sz01:4, sz11:4, pad:8;
 #endif
 };
 
-/* DCCM RAM Configuration Register: ARC_REG_DCCM_BCR */
-struct bcr_dccm {
+struct bcr_dccm_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int res:21, sz:3, ver:8;
 #else
@@ -272,6 +272,14 @@
 #endif
 };
 
+struct bcr_dccm_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad2:12, cyc:3, pad1:1, sz1:4, sz0:4, ver:8;
+#else
+	unsigned int ver:8, sz0:4, sz1:4, pad1:1, cyc:3, pad2:12;
+#endif
+};
+
 /* ARCompact: Both SP and DP FPU BCRs have same format */
 struct bcr_fp_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
@@ -315,9 +323,9 @@
 
 struct bcr_generic {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int pad:24, ver:8;
+	unsigned int info:24, ver:8;
 #else
-	unsigned int ver:8, pad:24;
+	unsigned int ver:8, info:24;
 #endif
 };
 

diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 4fd7d62..49014f0 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h

@@ -16,11 +16,9 @@
 #ifdef CONFIG_ISA_ARCOMPACT
 #define TIMER0_IRQ      3
 #define TIMER1_IRQ      4
-#define IPI_IRQ		(NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */
 #else
 #define TIMER0_IRQ      16
 #define TIMER1_IRQ      17
-#define IPI_IRQ         19
 #endif
 
 #include <linux/interrupt.h>

diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index 1fc18ee..37c2f75 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h

@@ -22,6 +22,7 @@
 #define AUX_IRQ_CTRL		0x00E
 #define AUX_IRQ_ACT		0x043	/* Active Intr across all levels */
 #define AUX_IRQ_LVL_PEND	0x200	/* Pending Intr across all levels */
+#define AUX_IRQ_HINT		0x201	/* For generating Soft Interrupts */
 #define AUX_IRQ_PRIORITY	0x206
 #define ICAUSE			0x40a
 #define AUX_IRQ_SELECT		0x40b
@@ -115,6 +116,16 @@
 	return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
+static inline void arc_softirq_trigger(int irq)
+{
+	write_aux_reg(AUX_IRQ_HINT, irq);
+}
+
+static inline void arc_softirq_clear(int irq)
+{
+	write_aux_reg(AUX_IRQ_HINT, 0);
+}
+
 #else
 
 .macro IRQ_DISABLE  scratch

diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index b178302..c126460 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S

@@ -45,11 +45,12 @@
 VECTOR	handle_interrupt	; (16) Timer0
 VECTOR	handle_interrupt	; unused (Timer1)
 VECTOR	handle_interrupt	; unused (WDT)
-VECTOR	handle_interrupt	; (19) ICI (inter core interrupt)
-VECTOR	handle_interrupt
-VECTOR	handle_interrupt
-VECTOR	handle_interrupt
-VECTOR	handle_interrupt	; (23) End of fixed IRQs
+VECTOR	handle_interrupt	; (19) Inter core Interrupt (IPI)
+VECTOR	handle_interrupt	; (20) perf Interrupt
+VECTOR	handle_interrupt	; (21) Software Triggered Intr (Self IPI)
+VECTOR	handle_interrupt	; unused
+VECTOR	handle_interrupt	; (23) unused
+# End of fixed IRQs
 
 .rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8
 	VECTOR	handle_interrupt

diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 06bcedf..224d1c3 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c

@@ -81,9 +81,6 @@
 {
 	switch (irq) {
 	case TIMER0_IRQ:
-#ifdef CONFIG_SMP
-	case IPI_IRQ:
-#endif
 		irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
 		break;
 	default:

diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index bc771f5..c41c364 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c

@@ -11,9 +11,13 @@
 #include <linux/smp.h>
 #include <linux/irq.h>
 #include <linux/spinlock.h>
+#include <asm/irqflags-arcv2.h>
 #include <asm/mcip.h>
 #include <asm/setup.h>
 
+#define IPI_IRQ		19
+#define SOFTIRQ_IRQ	21
+
 static char smp_cpuinfo_buf[128];
 static int idu_detected;
 
@@ -22,6 +26,7 @@
 static void mcip_setup_per_cpu(int cpu)
 {
 	smp_ipi_irq_setup(cpu, IPI_IRQ);
+	smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
 }
 
 static void mcip_ipi_send(int cpu)
@@ -29,46 +34,44 @@
 	unsigned long flags;
 	int ipi_was_pending;
 
+	/* ARConnect can only send IPI to others */
+	if (unlikely(cpu == raw_smp_processor_id())) {
+		arc_softirq_trigger(SOFTIRQ_IRQ);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
 	/*
-	 * NOTE: We must spin here if the other cpu hasn't yet
-	 * serviced a previous message. This can burn lots
-	 * of time, but we MUST follows this protocol or
-	 * ipi messages can be lost!!!
-	 * Also, we must release the lock in this loop because
-	 * the other side may get to this same loop and not
-	 * be able to ack -- thus causing deadlock.
+	 * If receiver already has a pending interrupt, elide sending this one.
+	 * Linux cross core calling works well with concurrent IPIs
+	 * coalesced into one
+	 * see arch/arc/kernel/smp.c: ipi_send_msg_one()
 	 */
+	__mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+	ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+	if (!ipi_was_pending)
+		__mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
 
-	do {
-		raw_spin_lock_irqsave(&mcip_lock, flags);
-		__mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
-		ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
-		if (ipi_was_pending == 0)
-			break; /* break out but keep lock */
-		raw_spin_unlock_irqrestore(&mcip_lock, flags);
-	} while (1);
-
-	__mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
-
-#ifdef CONFIG_ARC_IPI_DBG
-	if (ipi_was_pending)
-		pr_info("IPI ACK delayed from cpu %d\n", cpu);
-#endif
 }
 
 static void mcip_ipi_clear(int irq)
 {
 	unsigned int cpu, c;
 	unsigned long flags;
-	unsigned int __maybe_unused copy;
+
+	if (unlikely(irq == SOFTIRQ_IRQ)) {
+		arc_softirq_clear(irq);
+		return;
+	}
 
 	raw_spin_lock_irqsave(&mcip_lock, flags);
 
 	/* Who sent the IPI */
 	__mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
 
-	copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK);	/* 1,2,4,8... */
+	cpu = read_aux_reg(ARC_REG_MCIP_READBACK);	/* 1,2,4,8... */
 
 	/*
 	 * In rare case, multiple concurrent IPIs sent to same target can
@@ -82,12 +85,6 @@
 	} while (cpu);
 
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
-
-#ifdef CONFIG_ARC_IPI_DBG
-	if (c != __ffs(copy))
-		pr_info("IPIs from %x coalesced to %x\n",
-			copy, raw_smp_processor_id());
-#endif
 }
 
 static void mcip_probe_n_setup(void)
@@ -111,10 +108,11 @@
 	READ_BCR(ARC_REG_MCIP_BCR, mp);
 
 	sprintf(smp_cpuinfo_buf,
-		"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+		"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s%s\n",
 		mp.ver, mp.num_cores,
 		IS_AVAIL1(mp.ipi, "IPI "),
 		IS_AVAIL1(mp.idu, "IDU "),
+		IS_AVAIL1(mp.llm, "LLM "),
 		IS_AVAIL1(mp.dbg, "DEBUG "),
 		IS_AVAIL1(mp.gfrc, "GFRC"));
 

diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index a7edceb..cdc821d 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c

@@ -42,6 +42,53 @@
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
+static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+{
+	if (is_isa_arcompact()) {
+		struct bcr_iccm_arcompact iccm;
+		struct bcr_dccm_arcompact dccm;
+
+		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+		if (iccm.ver) {
+			cpu->iccm.sz = 4096 << iccm.sz;	/* 8K to 512K */
+			cpu->iccm.base_addr = iccm.base << 16;
+		}
+
+		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+		if (dccm.ver) {
+			unsigned long base;
+			cpu->dccm.sz = 2048 << dccm.sz;	/* 2K to 256K */
+
+			base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+			cpu->dccm.base_addr = base & ~0xF;
+		}
+	} else {
+		struct bcr_iccm_arcv2 iccm;
+		struct bcr_dccm_arcv2 dccm;
+		unsigned long region;
+
+		READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+		if (iccm.ver) {
+			cpu->iccm.sz = 256 << iccm.sz00;	/* 512B to 16M */
+			if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+				cpu->iccm.sz <<= iccm.sz01;
+
+			region = read_aux_reg(ARC_REG_AUX_ICCM);
+			cpu->iccm.base_addr = region & 0xF0000000;
+		}
+
+		READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+		if (dccm.ver) {
+			cpu->dccm.sz = 256 << dccm.sz0;
+			if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+				cpu->dccm.sz <<= dccm.sz1;
+
+			region = read_aux_reg(ARC_REG_AUX_DCCM);
+			cpu->dccm.base_addr = region & 0xF0000000;
+		}
+	}
+}
+
 static void read_arc_build_cfg_regs(void)
 {
 	struct bcr_perip uncached_space;
@@ -76,36 +123,11 @@
 	cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0;        /* 1,3 */
 	cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
 	cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
-
-	/* Note that we read the CCM BCRs independent of kernel config
-	 * This is to catch the cases where user doesn't know that
-	 * CCMs are present in hardware build
-	 */
-	{
-		struct bcr_iccm iccm;
-		struct bcr_dccm dccm;
-		struct bcr_dccm_base dccm_base;
-		unsigned int bcr_32bit_val;
-
-		bcr_32bit_val = read_aux_reg(ARC_REG_ICCM_BCR);
-		if (bcr_32bit_val) {
-			iccm = *((struct bcr_iccm *)&bcr_32bit_val);
-			cpu->iccm.base_addr = iccm.base << 16;
-			cpu->iccm.sz = 0x2000 << (iccm.sz - 1);
-		}
-
-		bcr_32bit_val = read_aux_reg(ARC_REG_DCCM_BCR);
-		if (bcr_32bit_val) {
-			dccm = *((struct bcr_dccm *)&bcr_32bit_val);
-			cpu->dccm.sz = 0x800 << (dccm.sz);
-
-			READ_BCR(ARC_REG_DCCMBASE_BCR, dccm_base);
-			cpu->dccm.base_addr = dccm_base.addr << 8;
-		}
-	}
-
 	READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem);
 
+	/* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
+	read_decode_ccm_bcr(cpu);
+
 	read_decode_mmu_bcr();
 	read_decode_cache_bcr();
 
@@ -237,8 +259,6 @@
 
 			n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
 		}
-		n += scnprintf(buf + n, len - n, "%s",
-			       IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY));
 	}
 
 	n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",

diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index ef6e9e1..4cb3add 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c

@@ -142,7 +142,7 @@
 
 	local_irq_enable();
 	preempt_disable();
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 /*
@@ -336,11 +336,8 @@
 		int rc;
 
 		rc = __do_IPI(msg);
-#ifdef CONFIG_ARC_IPI_DBG
-		/* IPI received but no valid @msg */
 		if (rc)
 			pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
-#endif
 		pending &= ~(1U << msg);
 	} while (pending);
 

diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 7a6a58e..43788b1 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile

@@ -195,5 +195,7 @@
 $(obj)/font.c: $(FONTC)
 	$(call cmd,shipped)
 
+AFLAGS_hyp-stub.o := -Wa,-march=armv7-a
+
 $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
 	$(call cmd,shipped)

diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index f3db13d..0cc150b 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi

@@ -285,8 +285,10 @@
 	};
 };
 
+
+/include/ "tps65217.dtsi"
+
 &tps {
-	compatible = "ti,tps65217";
 	/*
 	 * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
 	 * mode") at poweroff.  Most BeagleBone versions do not support RTC-only
@@ -307,17 +309,12 @@
 	ti,pmic-shutdown-controller;
 
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			regulator-name = "vdds_dpr";
 			regulator-always-on;
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
@@ -327,7 +324,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
@@ -337,25 +333,21 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			regulator-name = "vio,vrtc,vdds";
 			regulator-always-on;
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			regulator-name = "vdd_3v3aux";
 			regulator-always-on;
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			regulator-name = "vdd_1v8";
 			regulator-always-on;
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			regulator-name = "vdd_3v3a";
 			regulator-always-on;
 		};

diff --git a/arch/arm/boot/dts/am335x-chilisom.dtsi b/arch/arm/boot/dts/am335x-chilisom.dtsi
index fda457b..857d989 100644
--- a/arch/arm/boot/dts/am335x-chilisom.dtsi
+++ b/arch/arm/boot/dts/am335x-chilisom.dtsi

@@ -128,21 +128,16 @@
 
 };
 
+/include/ "tps65217.dtsi"
+
 &tps {
-	compatible = "ti,tps65217";
-
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			regulator-name = "vdds_dpr";
 			regulator-always-on;
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
@@ -152,7 +147,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
@@ -162,28 +156,24 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			regulator-name = "vio,vrtc,vdds";
 			regulator-boot-on;
 			regulator-always-on;
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			regulator-name = "vdd_3v3aux";
 			regulator-boot-on;
 			regulator-always-on;
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			regulator-name = "vdd_1v8";
 			regulator-boot-on;
 			regulator-always-on;
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			regulator-name = "vdd_3v3d";
 			regulator-boot-on;
 			regulator-always-on;

diff --git a/arch/arm/boot/dts/am335x-nano.dts b/arch/arm/boot/dts/am335x-nano.dts
index 77559a1..f313999 100644
--- a/arch/arm/boot/dts/am335x-nano.dts
+++ b/arch/arm/boot/dts/am335x-nano.dts

@@ -375,15 +375,11 @@
 	wp-gpios = <&gpio3 18 0>;
 };
 
+#include "tps65217.dtsi"
+
 &tps {
-	compatible = "ti,tps65217";
-
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			/* +1.5V voltage with ±4% tolerance */
 			regulator-min-microvolt = <1450000>;
 			regulator-max-microvolt = <1550000>;
@@ -392,7 +388,6 @@
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_MPU voltage limits 0.95V - 1.1V with ±4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <915000>;
@@ -402,7 +397,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_CORE voltage limits 0.95V - 1.1V with ±4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <915000>;
@@ -412,7 +406,6 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			/* +1.8V voltage with ±4% tolerance */
 			regulator-min-microvolt = <1750000>;
 			regulator-max-microvolt = <1870000>;
@@ -421,7 +414,6 @@
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			/* +3.3V voltage with ±4% tolerance */
 			regulator-min-microvolt = <3175000>;
 			regulator-max-microvolt = <3430000>;
@@ -430,7 +422,6 @@
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			/* +1.8V voltage with ±4% tolerance */
 			regulator-min-microvolt = <1750000>;
 			regulator-max-microvolt = <1870000>;
@@ -439,7 +430,6 @@
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			/* +3.3V voltage with ±4% tolerance */
 			regulator-min-microvolt = <3175000>;
 			regulator-max-microvolt = <3430000>;

diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts
index 471a3a7..8867aaa 100644
--- a/arch/arm/boot/dts/am335x-pepper.dts
+++ b/arch/arm/boot/dts/am335x-pepper.dts

@@ -420,9 +420,9 @@
 	vin-supply = <&vbat>;
 };
 
-&tps {
-	compatible = "ti,tps65217";
+/include/ "tps65217.dtsi"
 
+&tps {
 	backlight {
 		isel = <1>; /* ISET1 */
 		fdim = <200>; /* TPS65217_BL_FDIM_200HZ */
@@ -430,17 +430,12 @@
 	};
 
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			/* VDD_1V8 system supply */
 			regulator-always-on;
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
@@ -450,7 +445,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
@@ -460,21 +454,18 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			/* VRTC 1.8V always-on supply */
 			regulator-name = "vrtc,vdds";
 			regulator-always-on;
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			/* 3.3V rail */
 			regulator-name = "vdd_3v3aux";
 			regulator-always-on;
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			/* VDD_3V3A 3.3V rail */
 			regulator-name = "vdd_3v3a";
 			regulator-min-microvolt = <3300000>;
@@ -482,7 +473,6 @@
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			/* VDD_3V3B 3.3V rail */
 			regulator-name = "vdd_3v3b";
 			regulator-always-on;

diff --git a/arch/arm/boot/dts/am335x-shc.dts b/arch/arm/boot/dts/am335x-shc.dts
index 1b5b044..865de85 100644
--- a/arch/arm/boot/dts/am335x-shc.dts
+++ b/arch/arm/boot/dts/am335x-shc.dts

@@ -46,7 +46,7 @@
 			gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>;
 			linux,code = <KEY_BACK>;
 			debounce-interval = <1000>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		front_button {
@@ -54,7 +54,7 @@
 			gpios = <&gpio1 25 GPIO_ACTIVE_HIGH>;
 			linux,code = <KEY_FRONT>;
 			debounce-interval = <1000>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 

diff --git a/arch/arm/boot/dts/am335x-sl50.dts b/arch/arm/boot/dts/am335x-sl50.dts
index d38edfa..3303c28 100644
--- a/arch/arm/boot/dts/am335x-sl50.dts
+++ b/arch/arm/boot/dts/am335x-sl50.dts

@@ -375,19 +375,16 @@
 	pinctrl-0 = <&uart4_pins>;
 };
 
+#include "tps65217.dtsi"
+
 &tps {
-	compatible = "ti,tps65217";
 	ti,pmic-shutdown-controller;
 
 	interrupt-parent = <&intc>;
 	interrupts = <7>;	/* NNMI */
 
 	regulators {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		dcdc1_reg: regulator@0 {
-			reg = <0>;
 			/* VDDS_DDR */
 			regulator-min-microvolt = <1500000>;
 			regulator-max-microvolt = <1500000>;
@@ -395,7 +392,6 @@
 		};
 
 		dcdc2_reg: regulator@1 {
-			reg = <1>;
 			/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
 			regulator-name = "vdd_mpu";
 			regulator-min-microvolt = <925000>;
@@ -405,7 +401,6 @@
 		};
 
 		dcdc3_reg: regulator@2 {
-			reg = <2>;
 			/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
 			regulator-name = "vdd_core";
 			regulator-min-microvolt = <925000>;
@@ -415,7 +410,6 @@
 		};
 
 		ldo1_reg: regulator@3 {
-			reg = <3>;
 			/* VRTC / VIO / VDDS*/
 			regulator-always-on;
 			regulator-min-microvolt = <1800000>;
@@ -423,7 +417,6 @@
 		};
 
 		ldo2_reg: regulator@4 {
-			reg = <4>;
 			/* VDD_3V3AUX */
 			regulator-always-on;
 			regulator-min-microvolt = <3300000>;
@@ -431,7 +424,6 @@
 		};
 
 		ldo3_reg: regulator@5 {
-			reg = <5>;
 			/* VDD_1V8 */
 			regulator-min-microvolt = <1800000>;
 			regulator-max-microvolt = <1800000>;
@@ -439,7 +431,6 @@
 		};
 
 		ldo4_reg: regulator@6 {
-			reg = <6>;
 			/* VDD_3V3A */
 			regulator-min-microvolt = <3300000>;
 			regulator-max-microvolt = <3300000>;

diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 36c0fa6..a0986c6 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts

@@ -173,6 +173,8 @@
 
 		sound0_master: simple-audio-card,codec {
 			sound-dai = <&tlv320aic3104>;
+			assigned-clocks = <&clkoutmux2_clk_mux>;
+			assigned-clock-parents = <&sys_clk2_dclk_div>;
 			clocks = <&clkout2_clk>;
 		};
 	};
@@ -796,6 +798,8 @@
 	pinctrl-names = "default", "sleep";
 	pinctrl-0 = <&mcasp3_pins_default>;
 	pinctrl-1 = <&mcasp3_pins_sleep>;
+	assigned-clocks = <&mcasp3_ahclkx_mux>;
+	assigned-clock-parents = <&sys_clkin2>;
 	status = "okay";
 
 	op-mode = <0>;	/* MCASP_IIS_MODE */

diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
index 8d93882..1c06cb7 100644
--- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
+++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts

@@ -545,7 +545,7 @@
 		ti,debounce-tol = /bits/ 16 <10>;
 		ti,debounce-rep = /bits/ 16 <1>;
 
-		linux,wakeup;
+		wakeup-source;
 	};
 };
 

diff --git a/arch/arm/boot/dts/armada-xp-axpwifiap.dts b/arch/arm/boot/dts/armada-xp-axpwifiap.dts
index 23fc670..5c21b23 100644
--- a/arch/arm/boot/dts/armada-xp-axpwifiap.dts
+++ b/arch/arm/boot/dts/armada-xp-axpwifiap.dts

@@ -70,8 +70,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";

diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts
index f774101..ebe1d26 100644
--- a/arch/arm/boot/dts/armada-xp-db.dts
+++ b/arch/arm/boot/dts/armada-xp-db.dts

@@ -76,8 +76,8 @@
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
 			  MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		devbus-bootcs {
 			status = "okay";

diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts
index 4878d73..5730b87 100644
--- a/arch/arm/boot/dts/armada-xp-gp.dts
+++ b/arch/arm/boot/dts/armada-xp-gp.dts

@@ -95,8 +95,8 @@
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
 			  MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		devbus-bootcs {
 			status = "okay";

diff --git a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
index fb9e1bb..8af463f 100644
--- a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
+++ b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts

@@ -65,8 +65,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
 			MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";

diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
index 6e9820e..b89e6cf 100644
--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts

@@ -70,8 +70,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";

diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts
index 6ab3383..6522b04 100644
--- a/arch/arm/boot/dts/armada-xp-matrix.dts
+++ b/arch/arm/boot/dts/armada-xp-matrix.dts

@@ -68,8 +68,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		internal-regs {
 			serial@12000 {

diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
index 62175a8..d19f44c 100644
--- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
+++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts

@@ -64,8 +64,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";

diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index a5db177..853bd39 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts

@@ -65,9 +65,9 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		devbus-bootcs {
 			status = "okay";

diff --git a/arch/arm/boot/dts/armada-xp-synology-ds414.dts b/arch/arm/boot/dts/armada-xp-synology-ds414.dts
index 2391b11..d17dab0 100644
--- a/arch/arm/boot/dts/armada-xp-synology-ds414.dts
+++ b/arch/arm/boot/dts/armada-xp-synology-ds414.dts

@@ -78,8 +78,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
 		pcie-controller {
 			status = "okay";

diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index c4d9175..f82aa44 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi

@@ -1500,6 +1500,16 @@
 			       0x48485200 0x2E00>;
 			#address-cells = <1>;
 			#size-cells = <1>;
+
+			/*
+			 * Do not allow gating of cpsw clock as workaround
+			 * for errata i877. Keeping internal clock disabled
+			 * causes the device switching characteristics
+			 * to degrade over time and eventually fail to meet
+			 * the data manual delay time/skew specs.
+			 */
+			ti,no-idle;
+
 			/*
 			 * rx_thresh_pend
 			 * rx_pend

diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index 4f6ae92..f74d3db 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi

@@ -896,7 +896,6 @@
 				#size-cells = <1>;
 				reg = <0x2100000 0x10000>;
 				ranges = <0 0x2100000 0x10000>;
-				interrupt-parent = <&intc>;
 				clocks = <&clks IMX6QDL_CLK_CAAM_MEM>,
 					 <&clks IMX6QDL_CLK_CAAM_ACLK>,
 					 <&clks IMX6QDL_CLK_CAAM_IPG>,

diff --git a/arch/arm/boot/dts/kirkwood-ds112.dts b/arch/arm/boot/dts/kirkwood-ds112.dts
index bf4143c..b84af3d 100644
--- a/arch/arm/boot/dts/kirkwood-ds112.dts
+++ b/arch/arm/boot/dts/kirkwood-ds112.dts

@@ -14,7 +14,7 @@
 #include "kirkwood-synology.dtsi"
 
 / {
-	model = "Synology DS111";
+	model = "Synology DS112";
 	compatible = "synology,ds111", "marvell,kirkwood";
 
 	memory {

diff --git a/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts b/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
index 4207882..aae8a7a 100644
--- a/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
+++ b/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts

@@ -228,6 +228,37 @@
 	};
 };
 
+&devbus_bootcs {
+	status = "okay";
+	devbus,keep-config;
+
+	flash@0 {
+		compatible = "jedec-flash";
+		reg = <0 0x40000>;
+		bank-width = <1>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			header@0 {
+				reg = <0 0x30000>;
+				read-only;
+			};
+
+			uboot@30000 {
+				reg = <0x30000 0xF000>;
+				read-only;
+			};
+
+			uboot_env@3F000 {
+				reg = <0x3F000 0x1000>;
+			};
+		};
+	};
+};
+
 &mdio {
 	status = "okay";
 

diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts
index 6713b1e..01d239c 100644
--- a/arch/arm/boot/dts/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/r8a7791-porter.dts

@@ -283,7 +283,6 @@
 	pinctrl-names = "default";
 
 	status = "okay";
-	renesas,enable-gpio = <&gpio5 31 GPIO_ACTIVE_HIGH>;
 };
 
 &usbphy {

diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h
index 1afe246..b0c912fe 100644
--- a/arch/arm/boot/dts/sama5d2-pinfunc.h
+++ b/arch/arm/boot/dts/sama5d2-pinfunc.h

@@ -90,7 +90,7 @@
 #define PIN_PA14__I2SC1_MCK		PINMUX_PIN(PIN_PA14, 4, 2)
 #define PIN_PA14__FLEXCOM3_IO2		PINMUX_PIN(PIN_PA14, 5, 1)
 #define PIN_PA14__D9			PINMUX_PIN(PIN_PA14, 6, 2)
-#define PIN_PA15			14
+#define PIN_PA15			15
 #define PIN_PA15__GPIO			PINMUX_PIN(PIN_PA15, 0, 0)
 #define PIN_PA15__SPI0_MOSI		PINMUX_PIN(PIN_PA15, 1, 1)
 #define PIN_PA15__TF1			PINMUX_PIN(PIN_PA15, 2, 1)

diff --git a/arch/arm/boot/dts/tps65217.dtsi b/arch/arm/boot/dts/tps65217.dtsi
new file mode 100644
index 0000000..a632724
--- /dev/null
+++ b/arch/arm/boot/dts/tps65217.dtsi

@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Integrated Power Management Chip
+ * http://www.ti.com/lit/ds/symlink/tps65217.pdf
+ */
+
+&tps {
+	compatible = "ti,tps65217";
+
+	regulators {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		dcdc1_reg: regulator@0 {
+			reg = <0>;
+			regulator-compatible = "dcdc1";
+		};
+
+		dcdc2_reg: regulator@1 {
+			reg = <1>;
+			regulator-compatible = "dcdc2";
+		};
+
+		dcdc3_reg: regulator@2 {
+			reg = <2>;
+			regulator-compatible = "dcdc3";
+		};
+
+		ldo1_reg: regulator@3 {
+			reg = <3>;
+			regulator-compatible = "ldo1";
+		};
+
+		ldo2_reg: regulator@4 {
+			reg = <4>;
+			regulator-compatible = "ldo2";
+		};
+
+		ldo3_reg: regulator@5 {
+			reg = <5>;
+			regulator-compatible = "ldo3";
+		};
+
+		ldo4_reg: regulator@6 {
+			reg = <6>;
+			regulator-compatible = "ldo4";
+		};
+	};
+};

diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 7da5503..e08d151 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h

@@ -117,6 +117,7 @@
 	u32 irqstat;
 
 	asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat));
+	dsb(sy);
 	return irqstat;
 }
 

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index d5525bf..9156fc3 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h

@@ -491,7 +491,6 @@
 #endif
 
 #ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
 #else

diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index 0375c8c..9408a99 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h

@@ -35,14 +35,21 @@
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
 	     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
-	bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page);
+	unsigned long page_pfn = page_to_xen_pfn(page);
+	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
+	unsigned long compound_pages =
+		(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
+	bool local = (page_pfn <= dev_pfn) &&
+		(dev_pfn - page_pfn < compound_pages);
+
 	/*
-	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
-	 * multiple Xen page, it's not possible to have a mix of local and
-	 * foreign Xen page. So if the first xen_pfn == mfn the page is local
-	 * otherwise it's a foreign page grant-mapped in dom0. If the page is
-	 * local we can safely call the native dma_ops function, otherwise we
-	 * call the xen specific function.
+	 * Dom0 is mapped 1:1, while the Linux page can span across
+	 * multiple Xen pages, it's not possible for it to contain a
+	 * mix of local and foreign Xen pages. So if the first xen_pfn
+	 * == mfn the page is local otherwise it's a foreign page
+	 * grant-mapped in dom0. If the page is local we can safely
+	 * call the native dma_ops function, otherwise we call the xen
+	 * specific function.
 	 */
 	if (local)
 		__generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 2c5f160..ad325a8 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile

@@ -88,6 +88,7 @@
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
+AFLAGS_hyp-stub.o		:=-Wa,-march=armv7-a
 ifeq ($(CONFIG_ARM_PSCI),y)
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 7d0cba6f..139791e 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c

@@ -176,13 +176,13 @@
 		.name = "Kernel code",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	},
 	{
 		.name = "Kernel data",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	}
 };
 
@@ -851,7 +851,7 @@
 		res->name  = "System RAM";
 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 		request_resource(&iomem_resource, res);
 

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 37312f6..baee702 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c

@@ -409,7 +409,7 @@
 	/*
 	 * OK, it's off to the idle thread for us
 	 */
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dda1959..08e49c4 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c

@@ -506,18 +506,18 @@
 	struct kvm_vcpu *vcpu;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
-		wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+		struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 
 		vcpu->arch.pause = false;
-		wake_up_interruptible(wq);
+		swake_up(wq);
 	}
 }
 
 static void vcpu_sleep(struct kvm_vcpu *vcpu)
 {
-	wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 
-	wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
+	swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
 				       (!vcpu->arch.pause)));
 }
 

diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 5fa69d7..99361f1 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c

@@ -161,7 +161,7 @@
 	u64 val;
 
 	val = kvm_arm_timer_get_reg(vcpu, reg->id);
-	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
 }
 
 static unsigned long num_core_regs(void)

diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 7f33b20..0f6600f 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c

@@ -206,7 +206,8 @@
 	run->mmio.is_write	= is_write;
 	run->mmio.phys_addr	= fault_ipa;
 	run->mmio.len		= len;
-	memcpy(run->mmio.data, data_buf, len);
+	if (is_write)
+		memcpy(run->mmio.data, data_buf, len);
 
 	if (!ret) {
 		/* We handled the access successfully in the kernel. */

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index a9b3b90..c2b1315 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c

@@ -70,7 +70,7 @@
 {
 	struct kvm *kvm = source_vcpu->kvm;
 	struct kvm_vcpu *vcpu = NULL;
-	wait_queue_head_t *wq;
+	struct swait_queue_head *wq;
 	unsigned long cpu_id;
 	unsigned long context_id;
 	phys_addr_t target_pc;
@@ -119,7 +119,7 @@
 	smp_mb();		/* Make sure the above is visible */
 
 	wq = kvm_arch_vcpu_wq(vcpu);
-	wake_up_interruptible(wq);
+	swake_up(wq);
 
 	return PSCI_RET_SUCCESS;
 }

diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index 77d6b1b..ee06fab 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c

@@ -86,8 +86,8 @@
 {
 	dma_addr_t dma;
 
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
-		PANEL_SIZE, &dma, GFP_KERNEL);
+	fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, PANEL_SIZE, &dma,
+					  GFP_KERNEL);
 	if (!fb->fb.screen_base) {
 		printk(KERN_ERR "CLCD: unable to map framebuffer\n");
 		return -ENOMEM;
@@ -116,15 +116,14 @@
 
 static int lpc32xx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-		fb->fb.screen_base, fb->fb.fix.smem_start,
-		fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 static void lpc32xx_clcd_remove(struct clcd_fb *fb)
 {
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-		fb->fb.screen_base, fb->fb.fix.smem_start);
+	dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+		    fb->fb.fix.smem_start);
 }
 
 /*

diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 64e3d2c..b003e3a 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig

@@ -3,7 +3,6 @@
 	depends on ARCH_MULTI_V7 || ARCH_MULTI_V5
 	select ARCH_SUPPORTS_BIG_ENDIAN
 	select CLKSRC_MMIO
-	select GENERIC_IRQ_CHIP
 	select PINCTRL
 	select PLAT_ORION
 	select SOC_BUS
@@ -29,6 +28,7 @@
 	bool "Marvell Armada 370 boards"
 	depends on ARCH_MULTI_V7
 	select ARMADA_370_CLK
+	select ARMADA_370_XP_IRQ
 	select CPU_PJ4B
 	select MACH_MVEBU_V7
 	select PINCTRL_ARMADA_370
@@ -39,6 +39,7 @@
 config MACH_ARMADA_375
 	bool "Marvell Armada 375 boards"
 	depends on ARCH_MULTI_V7
+	select ARMADA_370_XP_IRQ
 	select ARM_ERRATA_720789
 	select ARM_ERRATA_753970
 	select ARM_GIC
@@ -58,6 +59,7 @@
 	select ARM_ERRATA_720789
 	select ARM_ERRATA_753970
 	select ARM_GIC
+	select ARMADA_370_XP_IRQ
 	select ARMADA_38X_CLK
 	select HAVE_ARM_SCU
 	select HAVE_ARM_TWD if SMP
@@ -72,6 +74,7 @@
 	bool "Marvell Armada 39x boards"
 	depends on ARCH_MULTI_V7
 	select ARM_GIC
+	select ARMADA_370_XP_IRQ
 	select ARMADA_39X_CLK
 	select CACHE_L2X0
 	select HAVE_ARM_SCU
@@ -86,6 +89,7 @@
 config MACH_ARMADA_XP
 	bool "Marvell Armada XP boards"
 	depends on ARCH_MULTI_V7
+	select ARMADA_370_XP_IRQ
 	select ARMADA_XP_CLK
 	select CPU_PJ4B
 	select MACH_MVEBU_V7

diff --git a/arch/arm/mach-netx/fb.c b/arch/arm/mach-netx/fb.c
index d122ee6..8814ee5 100644
--- a/arch/arm/mach-netx/fb.c
+++ b/arch/arm/mach-netx/fb.c

@@ -42,8 +42,8 @@
 
 	fb->panel = netx_panel;
 
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, 1024*1024,
-						    &dma, GFP_KERNEL);
+	fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, 1024 * 1024, &dma,
+					  GFP_KERNEL);
 	if (!fb->fb.screen_base) {
 		printk(KERN_ERR "CLCD: unable to map framebuffer\n");
 		return -ENOMEM;
@@ -57,16 +57,14 @@
 
 int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-				     fb->fb.screen_base,
-				     fb->fb.fix.smem_start,
-				     fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void netx_clcd_remove(struct clcd_fb *fb)
 {
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-			      fb->fb.screen_base, fb->fb.fix.smem_start);
+	dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+		    fb->fb.fix.smem_start);
 }
 
 static AMBA_AHB_DEVICE(fb, "fb", 0, 0x00104000, { NETX_IRQ_LCD }, NULL);

diff --git a/arch/arm/mach-nspire/clcd.c b/arch/arm/mach-nspire/clcd.c
index abea126..ea0e5b2 100644
--- a/arch/arm/mach-nspire/clcd.c
+++ b/arch/arm/mach-nspire/clcd.c

@@ -90,8 +90,8 @@
 	panel_size = ((panel->mode.xres * panel->mode.yres) * panel->bpp) / 8;
 	panel_size = ALIGN(panel_size, PAGE_SIZE);
 
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
-		panel_size, &dma, GFP_KERNEL);
+	fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, panel_size, &dma,
+					  GFP_KERNEL);
 
 	if (!fb->fb.screen_base) {
 		pr_err("CLCD: unable to map framebuffer\n");
@@ -107,13 +107,12 @@
 
 int nspire_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-		fb->fb.screen_base, fb->fb.fix.smem_start,
-		fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void nspire_clcd_remove(struct clcd_fb *fb)
 {
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-		fb->fb.screen_base, fb->fb.fix.smem_start);
+	dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+		    fb->fb.fix.smem_start);
 }

diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 8098272..bab814d 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c

@@ -18,6 +18,7 @@
 
 #include <asm/setup.h>
 #include <asm/mach/arch.h>
+#include <asm/system_info.h>
 
 #include "common.h"
 
@@ -77,12 +78,31 @@
 	NULL,
 };
 
+/* Set system_rev from atags */
+static void __init rx51_set_system_rev(const struct tag *tags)
+{
+	const struct tag *tag;
+
+	if (tags->hdr.tag != ATAG_CORE)
+		return;
+
+	for_each_tag(tag, tags) {
+		if (tag->hdr.tag == ATAG_REVISION) {
+			system_rev = tag->u.revision.rev;
+			break;
+		}
+	}
+}
+
 /* Legacy userspace on Nokia N900 needs ATAGS exported in /proc/atags,
  * save them while the data is still not overwritten
  */
 static void __init rx51_reserve(void)
 {
-	save_atags((const struct tag *)(PAGE_OFFSET + 0x100));
+	const struct tag *tags = (const struct tag *)(PAGE_OFFSET + 0x100);
+
+	save_atags(tags);
+	rx51_set_system_rev(tags);
 	omap_reserve();
 }
 

diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 7b76ce0..8633c70 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c

@@ -101,10 +101,8 @@
 
 static void set_onenand_cfg(void __iomem *onenand_base)
 {
-	u32 reg;
+	u32 reg = ONENAND_SYS_CFG1_RDY | ONENAND_SYS_CFG1_INT;
 
-	reg = readw(onenand_base + ONENAND_REG_SYS_CFG1);
-	reg &= ~((0x7 << ONENAND_SYS_CFG1_BRL_SHIFT) | (0x7 << 9));
 	reg |=	(latency << ONENAND_SYS_CFG1_BRL_SHIFT) |
 		ONENAND_SYS_CFG1_BL_16;
 	if (onenand_flags & ONENAND_FLAG_SYNCREAD)
@@ -123,6 +121,7 @@
 		reg |= ONENAND_SYS_CFG1_VHF;
 	else
 		reg &= ~ONENAND_SYS_CFG1_VHF;
+
 	writew(reg, onenand_base + ONENAND_REG_SYS_CFG1);
 }
 
@@ -289,6 +288,7 @@
 		}
 	}
 
+	onenand_async.sync_write = true;
 	omap2_onenand_calc_async_timings(&t);
 
 	ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async);

diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 0437537..f7ff3b9 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c

@@ -191,12 +191,22 @@
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_device *od;
+	int err;
 
 	switch (event) {
 	case BUS_NOTIFY_DEL_DEVICE:
 		if (pdev->archdata.od)
 			omap_device_delete(pdev->archdata.od);
 		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		od = to_omap_device(pdev);
+		if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED)) {
+			dev_info(dev, "enabled after unload, idling\n");
+			err = omap_device_idle(pdev);
+			if (err)
+				dev_err(dev, "failed to idle\n");
+		}
+		break;
 	case BUS_NOTIFY_ADD_DEVICE:
 		if (pdev->dev.of_node)
 			omap_device_build_from_dt(pdev);
@@ -602,8 +612,10 @@
 	int ret;
 
 	ret = omap_device_enable(pdev);
-	if (ret)
+	if (ret) {
+		dev_err(dev, "use pm_runtime_put_sync_suspend() in driver?\n");
 		return ret;
+	}
 
 	return pm_generic_runtime_resume(dev);
 }

diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index e9f65fe..b6d62e4 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c

@@ -2200,6 +2200,11 @@
  */
 static int _idle(struct omap_hwmod *oh)
 {
+	if (oh->flags & HWMOD_NO_IDLE) {
+		oh->_int_flags |= _HWMOD_SKIP_ENABLE;
+		return 0;
+	}
+
 	pr_debug("omap_hwmod: %s: idling\n", oh->name);
 
 	if (oh->_state != _HWMOD_STATE_ENABLED) {
@@ -2504,6 +2509,8 @@
 			oh->flags |= HWMOD_INIT_NO_RESET;
 		if (of_find_property(np, "ti,no-idle-on-init", NULL))
 			oh->flags |= HWMOD_INIT_NO_IDLE;
+		if (of_find_property(np, "ti,no-idle", NULL))
+			oh->flags |= HWMOD_NO_IDLE;
 	}
 
 	oh->_state = _HWMOD_STATE_INITIALIZED;
@@ -2630,7 +2637,7 @@
 	 * XXX HWMOD_INIT_NO_IDLE does not belong in hwmod data -
 	 * it should be set by the core code as a runtime flag during startup
 	 */
-	if ((oh->flags & HWMOD_INIT_NO_IDLE) &&
+	if ((oh->flags & (HWMOD_INIT_NO_IDLE | HWMOD_NO_IDLE)) &&
 	    (postsetup_state == _HWMOD_STATE_IDLE)) {
 		oh->_int_flags |= _HWMOD_SKIP_ENABLE;
 		postsetup_state = _HWMOD_STATE_ENABLED;

diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index 76bce11..7c7a311 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h

@@ -525,6 +525,8 @@
  *     or idled.
  * HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to
  *     operate and they need to be handled at the same time as the main_clk.
+ * HWMOD_NO_IDLE: Do not idle the hwmod at all. Useful to handle certain
+ *     IPs like CPSW on DRA7, where clocks to this module cannot be disabled.
  */
 #define HWMOD_SWSUP_SIDLE			(1 << 0)
 #define HWMOD_SWSUP_MSTANDBY			(1 << 1)
@@ -541,6 +543,7 @@
 #define HWMOD_SWSUP_SIDLE_ACT			(1 << 12)
 #define HWMOD_RECONFIG_IO_CHAIN			(1 << 13)
 #define HWMOD_OPT_CLKS_NEEDED			(1 << 14)
+#define HWMOD_NO_IDLE				(1 << 15)
 
 /*
  * omap_hwmod._int_flags definitions

diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h
index 9cb1121..b3a4ed5 100644
--- a/arch/arm/mach-shmobile/common.h
+++ b/arch/arm/mach-shmobile/common.h

@@ -4,7 +4,6 @@
 extern void shmobile_init_delay(void);
 extern void shmobile_boot_vector(void);
 extern unsigned long shmobile_boot_fn;
-extern unsigned long shmobile_boot_arg;
 extern unsigned long shmobile_boot_size;
 extern void shmobile_smp_boot(void);
 extern void shmobile_smp_sleep(void);

diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S
index fa5248c..5e503d9 100644
--- a/arch/arm/mach-shmobile/headsmp-scu.S
+++ b/arch/arm/mach-shmobile/headsmp-scu.S

@@ -38,9 +38,3 @@
 
 	b	secondary_startup
 ENDPROC(shmobile_boot_scu)
-
-	.text
-	.align	2
-	.globl	shmobile_scu_base
-shmobile_scu_base:
-	.space	4

diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index 330c1fc..32e0bf6 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S

@@ -24,7 +24,6 @@
 	.arm
 	.align  12
 ENTRY(shmobile_boot_vector)
-	ldr     r0, 2f
 	ldr     r1, 1f
 	bx	r1
 
@@ -34,9 +33,6 @@
 	.globl	shmobile_boot_fn
 shmobile_boot_fn:
 1:	.space	4
-	.globl	shmobile_boot_arg
-shmobile_boot_arg:
-2:	.space	4
 	.globl	shmobile_boot_size
 shmobile_boot_size:
 	.long	. - shmobile_boot_vector
@@ -46,13 +42,15 @@
  */
 
 ENTRY(shmobile_smp_boot)
-						@ r0 = MPIDR_HWID_BITMASK
 	mrc	p15, 0, r1, c0, c0, 5		@ r1 = MPIDR
-	and	r0, r1, r0			@ r0 = cpu_logical_map() value
+	and	r0, r1, #0xffffff		@ MPIDR_HWID_BITMASK
+						@ r0 = cpu_logical_map() value
 	mov	r1, #0				@ r1 = CPU index
-	adr	r5, 1f				@ array of per-cpu mpidr values
-	adr	r6, 2f				@ array of per-cpu functions
-	adr	r7, 3f				@ array of per-cpu arguments
+	adr	r2, 1f
+	ldmia	r2, {r5, r6, r7}
+	add	r5, r5, r2			@ array of per-cpu mpidr values
+	add	r6, r6, r2			@ array of per-cpu functions
+	add	r7, r7, r2			@ array of per-cpu arguments
 
 shmobile_smp_boot_find_mpidr:
 	ldr	r8, [r5, r1, lsl #2]
@@ -80,12 +78,18 @@
 	b	shmobile_smp_boot
 ENDPROC(shmobile_smp_sleep)
 
+	.align	2
+1:	.long	shmobile_smp_mpidr - .
+	.long	shmobile_smp_fn - 1b
+	.long	shmobile_smp_arg - 1b
+
+	.bss
 	.globl	shmobile_smp_mpidr
 shmobile_smp_mpidr:
-1:	.space	NR_CPUS * 4
+	.space	NR_CPUS * 4
 	.globl	shmobile_smp_fn
 shmobile_smp_fn:
-2:	.space	NR_CPUS * 4
+	.space	NR_CPUS * 4
 	.globl	shmobile_smp_arg
 shmobile_smp_arg:
-3:	.space	NR_CPUS * 4
+	.space	NR_CPUS * 4

diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c
index 911884f..aba75c8 100644
--- a/arch/arm/mach-shmobile/platsmp-apmu.c
+++ b/arch/arm/mach-shmobile/platsmp-apmu.c

@@ -123,7 +123,6 @@
 {
 	/* install boot code shared by all CPUs */
 	shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
-	shmobile_boot_arg = MPIDR_HWID_BITMASK;
 
 	/* perform per-cpu setup */
 	apmu_parse_cfg(apmu_init_cpu, apmu_config, num);

diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c
index 6466311..081a097 100644
--- a/arch/arm/mach-shmobile/platsmp-scu.c
+++ b/arch/arm/mach-shmobile/platsmp-scu.c

@@ -17,6 +17,9 @@
 #include <asm/smp_scu.h>
 #include "common.h"
 
+
+void __iomem *shmobile_scu_base;
+
 static int shmobile_smp_scu_notifier_call(struct notifier_block *nfb,
 					  unsigned long action, void *hcpu)
 {
@@ -41,7 +44,6 @@
 {
 	/* install boot code shared by all CPUs */
 	shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
-	shmobile_boot_arg = MPIDR_HWID_BITMASK;
 
 	/* enable SCU and cache coherency on booting CPU */
 	scu_enable(shmobile_scu_base);

diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c
index b854fe2..0b024a9 100644
--- a/arch/arm/mach-shmobile/smp-r8a7779.c
+++ b/arch/arm/mach-shmobile/smp-r8a7779.c

@@ -92,8 +92,6 @@
 {
 	/* Map the reset vector (in headsmp-scu.S, headsmp.S) */
 	__raw_writel(__pa(shmobile_boot_vector), AVECR);
-	shmobile_boot_fn = virt_to_phys(shmobile_boot_scu);
-	shmobile_boot_arg = (unsigned long)shmobile_scu_base;
 
 	/* setup r8a7779 specific SCU bits */
 	shmobile_scu_base = IOMEM(R8A7779_SCU_BASE);

diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 4b4058d..66353ca 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c

@@ -173,7 +173,7 @@
 {
 	unsigned long rnd;
 
-	rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+	rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 
 	return rnd << PAGE_SHIFT;
 }

diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index cf30daf..d19b1ad 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c

@@ -49,6 +49,9 @@
 		WARN_ON_ONCE(1);
 	}
 
+	if (!numpages)
+		return 0;
+
 	if (start < MODULES_VADDR || start >= MODULES_END)
 		return -EINVAL;
 

diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c
index 04aff2c..70f2f69 100644
--- a/arch/arm/plat-samsung/pm-check.c
+++ b/arch/arm/plat-samsung/pm-check.c

@@ -53,8 +53,8 @@
 		if (ptr->child != NULL)
 			s3c_pm_run_res(ptr->child, fn, arg);
 
-		if ((ptr->flags & IORESOURCE_MEM) &&
-		    strcmp(ptr->name, "System RAM") == 0) {
+		if ((ptr->flags & IORESOURCE_SYSTEM_RAM)
+				== IORESOURCE_SYSTEM_RAM) {
 			S3C_PMDBG("Found system RAM at %08lx..%08lx\n",
 				  (unsigned long)ptr->start,
 				  (unsigned long)ptr->end);

diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S
index b2b97e3..a62a7b6 100644
--- a/arch/arm/vdso/vdso.S
+++ b/arch/arm/vdso/vdso.S

@@ -23,9 +23,8 @@
 #include <linux/const.h>
 #include <asm/page.h>
 
-	__PAGE_ALIGNED_DATA
-
 	.globl vdso_start, vdso_end
+	.section .data..ro_after_init
 	.balign PAGE_SIZE
 vdso_start:
 	.incbin "arch/arm/vdso/vdso.so"

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7fc294c..22dda61 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h

@@ -156,8 +156,4 @@
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
 #endif

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bf464de..819aff5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h

@@ -34,7 +34,7 @@
 /*
  * VMALLOC and SPARSEMEM_VMEMMAP ranges.
  *
- * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
+ * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
  *	(rounded up to PUD_SIZE).
  * VMALLOC_START: beginning of the kernel VA space
  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
@@ -51,7 +51,9 @@
 
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
-#define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
+#define VMEMMAP_START		(VMALLOC_END + SZ_64K)
+#define vmemmap			((struct page *)VMEMMAP_START - \
+				 SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT))
 
 #define FIRST_USER_ADDRESS	0UL
 

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479..450987d 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c

@@ -73,13 +73,13 @@
 		.name = "Kernel code",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	},
 	{
 		.name = "Kernel data",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	}
 };
 
@@ -210,7 +210,7 @@
 		res->name  = "System RAM";
 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 		request_resource(&iomem_resource, res);
 

diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index e33fe33..fd10eb6 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S

@@ -145,6 +145,10 @@
 ENDPROC(cpu_resume_mmu)
 	.popsection
 cpu_resume_after_mmu:
+#ifdef CONFIG_KASAN
+	mov	x0, sp
+	bl	kasan_unpoison_remaining_stack
+#endif
 	mov	x0, #0			// return zero on success
 	ldp	x19, x20, [sp, #16]
 	ldp	x21, x22, [sp, #32]

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51..4607657 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c

@@ -195,7 +195,7 @@
 	/*
 	 * OK, it's off to the idle thread for us
 	 */
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index fcb7788..9e54ad7 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c

@@ -194,7 +194,7 @@
 	u64 val;
 
 	val = kvm_arm_timer_get_reg(vcpu, reg->id);
-	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
 }
 
 /**

diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 9142e08..5dd2a26 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c

@@ -149,16 +149,6 @@
 
 	switch (nr_pri_bits) {
 	case 7:
-		 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
-		 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
-	case 6:
-		 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
-	default:
-		 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
-	}	 	                           
-		 	                           
-	switch (nr_pri_bits) {
-	case 7:
 		 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
 		 write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
 	case 6:
@@ -167,6 +157,16 @@
 		 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
 	}
 
+	switch (nr_pri_bits) {
+	case 7:
+		 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+		 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+	case 6:
+		 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+	default:
+		 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+	}
+
 	switch (max_lr_idx) {
 	case 15:
 		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 82d607c..da30529 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c

@@ -306,10 +306,6 @@
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 	} else if (ps == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
-		hugetlb_add_hstate(CONT_PTE_SHIFT);
-	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
-		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
 	} else {
 		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
 		return 0;
@@ -317,13 +313,3 @@
 	return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
-	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
-		hugetlb_add_hstate(CONT_PMD_SHIFT);
-	return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f3b061e..7802f21 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c

@@ -319,8 +319,8 @@
 #endif
 		  MLG(VMALLOC_START, VMALLOC_END),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-		  MLG((unsigned long)vmemmap,
-		      (unsigned long)vmemmap + VMEMMAP_SIZE),
+		  MLG(VMEMMAP_START,
+		      VMEMMAP_START + VMEMMAP_SIZE),
 		  MLM((unsigned long)virt_to_page(PAGE_OFFSET),
 		      (unsigned long)virt_to_page(high_memory)),
 #endif

diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 4c893b5..232f787 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c

@@ -53,10 +53,10 @@
 
 #ifdef CONFIG_COMPAT
 	if (test_thread_flag(TIF_32BIT))
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
 	else
 #endif
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 	return rnd << PAGE_SHIFT;
 }
 

diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index 209ae5a..e692889 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c

@@ -49,13 +49,13 @@
 	.name	= "Kernel data",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_MEM,
+	.flags	= IORESOURCE_SYSTEM_RAM,
 };
 static struct resource __initdata kernel_code = {
 	.name	= "Kernel code",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_MEM,
+	.flags	= IORESOURCE_SYSTEM_RAM,
 	.sibling = &kernel_data,
 };
 
@@ -134,7 +134,7 @@
 	new->start = start;
 	new->end = end;
 	new->name = "System RAM";
-	new->flags = IORESOURCE_MEM;
+	new->flags = IORESOURCE_SYSTEM_RAM;
 
 	*pprev = new;
 }

diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 0030e21..23c4ef5 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c

@@ -333,7 +333,7 @@
 
 	/* We are done with local CPU inits, unblock the boot CPU. */
 	set_cpu_online(cpu, true);
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void __init smp_prepare_boot_cpu(void)

diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 72e17f7..786e36e 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c

@@ -281,8 +281,6 @@
 	 */
 	set_ist(_vectors_start);
 
-	lockdep_init();
-
 	/*
 	 * dtb is passed in from bootloader.
 	 * fdt is linked in blob.

diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index ff759f2..983bae7d2 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c

@@ -180,7 +180,7 @@
 
 	local_irq_enable();
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index caae3f4..300dac3 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c

@@ -1178,7 +1178,7 @@
 	efi_memory_desc_t *md;
 	u64 efi_desc_size;
 	char *name;
-	unsigned long flags;
+	unsigned long flags, desc;
 
 	efi_map_start = __va(ia64_boot_param->efi_memmap);
 	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
@@ -1193,6 +1193,8 @@
 			continue;
 
 		flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		desc = IORES_DESC_NONE;
+
 		switch (md->type) {
 
 			case EFI_MEMORY_MAPPED_IO:
@@ -1207,14 +1209,17 @@
 				if (md->attribute & EFI_MEMORY_WP) {
 					name = "System ROM";
 					flags |= IORESOURCE_READONLY;
-				} else if (md->attribute == EFI_MEMORY_UC)
+				} else if (md->attribute == EFI_MEMORY_UC) {
 					name = "Uncached RAM";
-				else
+				} else {
 					name = "System RAM";
+					flags |= IORESOURCE_SYSRAM;
+				}
 				break;
 
 			case EFI_ACPI_MEMORY_NVS:
 				name = "ACPI Non-volatile Storage";
+				desc = IORES_DESC_ACPI_NV_STORAGE;
 				break;
 
 			case EFI_UNUSABLE_MEMORY:
@@ -1224,6 +1229,7 @@
 
 			case EFI_PERSISTENT_MEMORY:
 				name = "Persistent Memory";
+				desc = IORES_DESC_PERSISTENT_MEMORY;
 				break;
 
 			case EFI_RESERVED_TYPE:
@@ -1246,6 +1252,7 @@
 		res->start = md->phys_addr;
 		res->end = md->phys_addr + efi_md_size(md) - 1;
 		res->flags = flags;
+		res->desc = desc;
 
 		if (insert_resource(&iomem_resource, res) < 0)
 			kfree(res);

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4f118b0..2029a38 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c

@@ -80,17 +80,17 @@
 
 static struct resource data_resource = {
 	.name	= "Kernel data",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
 	.name	= "Kernel bss",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 unsigned long ia64_max_cacheline_size;

diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0e76fad..74fe317 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c

@@ -454,7 +454,7 @@
 	preempt_disable();
 	smp_callin();
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 	return 0;
 }
 

diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index a5ecef7..136c69f 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c

@@ -70,14 +70,14 @@
 	.name   = "Kernel data",
 	.start  = 0,
 	.end    = 0,
-	.flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
 	.name   = "Kernel code",
 	.start  = 0,
 	.end    = 0,
-	.flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 unsigned long memory_start;

diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index a468467..f98d2f6 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c

@@ -432,7 +432,7 @@
 	 */
 	local_flush_tlb_all();
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 	return 0;
 }
 

diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index c3c6f08..bad1323 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c

@@ -396,7 +396,7 @@
 	/*
 	 * OK, it's off to the idle thread for us
 	 */
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)

diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index 89a2a939..f31ebb5 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c

@@ -130,8 +130,6 @@
 	memset(__bss_start, 0, __bss_stop-__bss_start);
 	memset(_ssbss, 0, _esbss-_ssbss);
 
-	lockdep_init();
-
 /* initialize device tree for usage in early_printk */
 	early_init_devtree(_fdt_start);
 

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 74a3db9..a65eacf 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig

@@ -151,6 +151,7 @@
 	select CSRC_R4K
 	select SYNC_R4K
 	select COMMON_CLK
+	select BCM6345_L1_IRQ
 	select BCM7038_L1_IRQ
 	select BCM7120_L2_IRQ
 	select BRCMSTB_L2_IRQ
@@ -2169,7 +2170,6 @@
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
 	select SYNC_R4K
-	select MIPS_GIC_IPI
 	select MIPS_MT
 	select SMP
 	select SMP_UP
@@ -2267,7 +2267,6 @@
 config MIPS_CMP
 	bool "MIPS CMP framework support (DEPRECATED)"
 	depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6
-	select MIPS_GIC_IPI
 	select SMP
 	select SYNC_R4K
 	select SYS_SUPPORTS_SMP
@@ -2287,7 +2286,6 @@
 	select MIPS_CM
 	select MIPS_CPC
 	select MIPS_CPS_PM if HOTPLUG_CPU
-	select MIPS_GIC_IPI
 	select SMP
 	select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
 	select SYS_SUPPORTS_HOTPLUG_CPU
@@ -2305,9 +2303,6 @@
 	select MIPS_CPC
 	bool
 
-config MIPS_GIC_IPI
-	bool
-
 config MIPS_CM
 	bool
 

diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c
index 511c065..2dfff1f 100644
--- a/arch/mips/ath79/irq.c
+++ b/arch/mips/ath79/irq.c

@@ -26,90 +26,6 @@
 #include "common.h"
 #include "machtypes.h"
 
-static void __init ath79_misc_intc_domain_init(
-	struct device_node *node, int irq);
-
-static void ath79_misc_irq_handler(struct irq_desc *desc)
-{
-	struct irq_domain *domain = irq_desc_get_handler_data(desc);
-	void __iomem *base = domain->host_data;
-	u32 pending;
-
-	pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
-		  __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
-	if (!pending) {
-		spurious_interrupt();
-		return;
-	}
-
-	while (pending) {
-		int bit = __ffs(pending);
-
-		generic_handle_irq(irq_linear_revmap(domain, bit));
-		pending &= ~BIT(bit);
-	}
-}
-
-static void ar71xx_misc_irq_unmask(struct irq_data *d)
-{
-	void __iomem *base = irq_data_get_irq_chip_data(d);
-	unsigned int irq = d->hwirq;
-	u32 t;
-
-	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-	__raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
-	/* flush write */
-	__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-}
-
-static void ar71xx_misc_irq_mask(struct irq_data *d)
-{
-	void __iomem *base = irq_data_get_irq_chip_data(d);
-	unsigned int irq = d->hwirq;
-	u32 t;
-
-	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-	__raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
-	/* flush write */
-	__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-}
-
-static void ar724x_misc_irq_ack(struct irq_data *d)
-{
-	void __iomem *base = irq_data_get_irq_chip_data(d);
-	unsigned int irq = d->hwirq;
-	u32 t;
-
-	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
-	__raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
-	/* flush write */
-	__raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
-}
-
-static struct irq_chip ath79_misc_irq_chip = {
-	.name		= "MISC",
-	.irq_unmask	= ar71xx_misc_irq_unmask,
-	.irq_mask	= ar71xx_misc_irq_mask,
-};
-
-static void __init ath79_misc_irq_init(void)
-{
-	if (soc_is_ar71xx() || soc_is_ar913x())
-		ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
-	else if (soc_is_ar724x() ||
-		 soc_is_ar933x() ||
-		 soc_is_ar934x() ||
-		 soc_is_qca955x())
-		ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
-	else
-		BUG();
-
-	ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
-}
 
 static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
 {
@@ -212,142 +128,12 @@
 	irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch);
 }
 
-/*
- * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for
- * these devices typically allocate coherent DMA memory, however the
- * DMA controller may still have some unsynchronized data in the FIFO.
- * Issue a flush in the handlers to ensure that the driver sees
- * the update.
- *
- * This array map the interrupt lines to the DDR write buffer channels.
- */
-
-static unsigned irq_wb_chan[8] = {
-	-1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-asmlinkage void plat_irq_dispatch(void)
-{
-	unsigned long pending;
-	int irq;
-
-	pending = read_c0_status() & read_c0_cause() & ST0_IM;
-
-	if (!pending) {
-		spurious_interrupt();
-		return;
-	}
-
-	pending >>= CAUSEB_IP;
-	while (pending) {
-		irq = fls(pending) - 1;
-		if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1)
-			ath79_ddr_wb_flush(irq_wb_chan[irq]);
-		do_IRQ(MIPS_CPU_IRQ_BASE + irq);
-		pending &= ~BIT(irq);
-	}
-}
-
-static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
-{
-	irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
-	irq_set_chip_data(irq, d->host_data);
-	return 0;
-}
-
-static const struct irq_domain_ops misc_irq_domain_ops = {
-	.xlate = irq_domain_xlate_onecell,
-	.map = misc_map,
-};
-
-static void __init ath79_misc_intc_domain_init(
-	struct device_node *node, int irq)
-{
-	void __iomem *base = ath79_reset_base;
-	struct irq_domain *domain;
-
-	domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
-			ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
-	if (!domain)
-		panic("Failed to add MISC irqdomain");
-
-	/* Disable and clear all interrupts */
-	__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-	__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
-	irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
-}
-
-static int __init ath79_misc_intc_of_init(
-	struct device_node *node, struct device_node *parent)
-{
-	int irq;
-
-	irq = irq_of_parse_and_map(node, 0);
-	if (!irq)
-		panic("Failed to get MISC IRQ");
-
-	ath79_misc_intc_domain_init(node, irq);
-	return 0;
-}
-
-static int __init ar7100_misc_intc_of_init(
-	struct device_node *node, struct device_node *parent)
-{
-	ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
-	return ath79_misc_intc_of_init(node, parent);
-}
-
-IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc",
-		ar7100_misc_intc_of_init);
-
-static int __init ar7240_misc_intc_of_init(
-	struct device_node *node, struct device_node *parent)
-{
-	ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
-	return ath79_misc_intc_of_init(node, parent);
-}
-
-IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
-		ar7240_misc_intc_of_init);
-
-static int __init ar79_cpu_intc_of_init(
-	struct device_node *node, struct device_node *parent)
-{
-	int err, i, count;
-
-	/* Fill the irq_wb_chan table */
-	count = of_count_phandle_with_args(
-		node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells");
-
-	for (i = 0; i < count; i++) {
-		struct of_phandle_args args;
-		u32 irq = i;
-
-		of_property_read_u32_index(
-			node, "qca,ddr-wb-channel-interrupts", i, &irq);
-		if (irq >= ARRAY_SIZE(irq_wb_chan))
-			continue;
-
-		err = of_parse_phandle_with_args(
-			node, "qca,ddr-wb-channels",
-			"#qca,ddr-wb-channel-cells",
-			i, &args);
-		if (err)
-			return err;
-
-		irq_wb_chan[irq] = args.args[0];
-		pr_info("IRQ: Set flush channel of IRQ%d to %d\n",
-			irq, args.args[0]);
-	}
-
-	return mips_cpu_irq_of_init(node, parent);
-}
-IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
-		ar79_cpu_intc_of_init);
-
 void __init arch_init_irq(void)
 {
+	unsigned irq_wb_chan2 = -1;
+	unsigned irq_wb_chan3 = -1;
+	bool misc_is_ar71xx;
+
 	if (mips_machtype == ATH79_MACH_GENERIC_OF) {
 		irqchip_init();
 		return;
@@ -355,14 +141,26 @@
 
 	if (soc_is_ar71xx() || soc_is_ar724x() ||
 	    soc_is_ar913x() || soc_is_ar933x()) {
-		irq_wb_chan[2] = 3;
-		irq_wb_chan[3] = 2;
+		irq_wb_chan2 = 3;
+		irq_wb_chan3 = 2;
 	} else if (soc_is_ar934x()) {
-		irq_wb_chan[3] = 2;
+		irq_wb_chan3 = 2;
 	}
 
-	mips_cpu_irq_init();
-	ath79_misc_irq_init();
+	ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3);
+
+	if (soc_is_ar71xx() || soc_is_ar913x())
+		misc_is_ar71xx = true;
+	else if (soc_is_ar724x() ||
+		 soc_is_ar933x() ||
+		 soc_is_ar934x() ||
+		 soc_is_qca955x())
+		misc_is_ar71xx = false;
+	else
+		BUG();
+	ath79_misc_irq_init(
+		ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS,
+		ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx);
 
 	if (soc_is_ar934x())
 		ar934x_ip2_irq_init();

diff --git a/arch/mips/bmips/irq.c b/arch/mips/bmips/irq.c
index e7fc6f934..7efefcf 100644
--- a/arch/mips/bmips/irq.c
+++ b/arch/mips/bmips/irq.c

@@ -15,6 +15,12 @@
 #include <asm/irq_cpu.h>
 #include <asm/time.h>
 
+static const struct of_device_id smp_intc_dt_match[] = {
+	{ .compatible = "brcm,bcm7038-l1-intc" },
+	{ .compatible = "brcm,bcm6345-l1-intc" },
+	{}
+};
+
 unsigned int get_c0_compare_int(void)
 {
 	return CP0_LEGACY_COMPARE_IRQ;
@@ -24,8 +30,8 @@
 {
 	struct device_node *dn;
 
-	/* Only the STB (bcm7038) controller supports SMP IRQ affinity */
-	dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc");
+	/* Only these controllers support SMP IRQ affinity */
+	dn = of_find_matching_node(NULL, smp_intc_dt_match);
 	if (dn)
 		of_node_put(dn);
 	else

diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c
index 408799a..f752114 100644
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c

@@ -17,7 +17,7 @@
 #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
 #endif
 
-#ifdef CONFIG_MACH_JZ4740
+#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780)
 #include <asm/mach-jz4740/base.h>
 #define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
 #endif

diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h
index 2b34872..441faa9 100644
--- a/arch/mips/include/asm/mach-ath79/ath79.h
+++ b/arch/mips/include/asm/mach-ath79/ath79.h

@@ -144,4 +144,8 @@
 void ath79_device_reset_set(u32 mask);
 void ath79_device_reset_clear(u32 mask);
 
+void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3);
+void ath79_misc_irq_init(void __iomem *regs, int irq,
+			int irq_base, bool is_ar71xx);
+
 #endif /* __ASM_MACH_ATH79_H */

diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index 6ba1fb8..db7c322 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h

@@ -44,8 +44,9 @@
 	mp_ops->smp_setup();
 }
 
-extern void gic_send_ipi_single(int cpu, unsigned int action);
-extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action);
+extern void mips_smp_send_ipi_single(int cpu, unsigned int action);
+extern void mips_smp_send_ipi_mask(const struct cpumask *mask,
+				      unsigned int action);
 
 #else /* !CONFIG_SMP */
 

diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index 8c6d76c..d9907e5 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c

@@ -270,7 +270,7 @@
 }
 EXPORT_SYMBOL(jz_gpio_port_get_value);
 
-#define IRQ_TO_BIT(irq) BIT(irq_to_gpio(irq) & 0x1f)
+#define IRQ_TO_BIT(irq) BIT((irq - JZ4740_IRQ_GPIO(0)) & 0x1f)
 
 static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int irq)
 {

diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 68e2b7d..b0988fd 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile

@@ -52,7 +52,6 @@
 obj-$(CONFIG_MIPS_CMP)		+= smp-cmp.o
 obj-$(CONFIG_MIPS_CPS)		+= smp-cps.o cps-vec.o
 obj-$(CONFIG_MIPS_CPS_NS16550)	+= cps-vec-ns16550.o
-obj-$(CONFIG_MIPS_GIC_IPI)	+= smp-gic.o
 obj-$(CONFIG_MIPS_SPRAM)	+= spram.o
 
 obj-$(CONFIG_MIPS_VPE_LOADER)	+= vpe.o

diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index 5ce3b74..b4ac637 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S

@@ -125,7 +125,7 @@
 	END(_restore_fp_context)
 	.set	reorder
 
-	.type	fault@function
+	.type	fault, @function
 	.ent	fault
 fault:	li	v0, -EFAULT
 	jr	ra

diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index f09546e..17732f8 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S

@@ -358,7 +358,7 @@
 
 	.set	reorder
 
-	.type	fault@function
+	.type	fault, @function
 	.ent	fault
 fault:	li	v0, -EFAULT				# failure
 	jr	ra

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5fdaf8b..4f60734 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c

@@ -732,21 +732,23 @@
 			end = HIGHMEM_START - 1;
 
 		res = alloc_bootmem(sizeof(struct resource));
+
+		res->start = start;
+		res->end = end;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
 		switch (boot_mem_map.map[i].type) {
 		case BOOT_MEM_RAM:
 		case BOOT_MEM_INIT_RAM:
 		case BOOT_MEM_ROM_DATA:
 			res->name = "System RAM";
+			res->flags |= IORESOURCE_SYSRAM;
 			break;
 		case BOOT_MEM_RESERVED:
 		default:
 			res->name = "reserved";
 		}
 
-		res->start = start;
-		res->end = end;
-
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		request_resource(&iomem_resource, res);
 
 		/*

diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index d5e0f94..7692334 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c

@@ -149,8 +149,8 @@
 }
 
 struct plat_smp_ops cmp_smp_ops = {
-	.send_ipi_single	= gic_send_ipi_single,
-	.send_ipi_mask		= gic_send_ipi_mask,
+	.send_ipi_single	= mips_smp_send_ipi_single,
+	.send_ipi_mask		= mips_smp_send_ipi_mask,
 	.init_secondary		= cmp_init_secondary,
 	.smp_finish		= cmp_smp_finish,
 	.boot_secondary		= cmp_boot_secondary,

diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 2ad4e4c..253e140 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c

@@ -472,8 +472,8 @@
 	.boot_secondary		= cps_boot_secondary,
 	.init_secondary		= cps_init_secondary,
 	.smp_finish		= cps_smp_finish,
-	.send_ipi_single	= gic_send_ipi_single,
-	.send_ipi_mask		= gic_send_ipi_mask,
+	.send_ipi_single	= mips_smp_send_ipi_single,
+	.send_ipi_mask		= mips_smp_send_ipi_mask,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable		= cps_cpu_disable,
 	.cpu_die		= cps_cpu_die,

diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 86311a1..4f9570a 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c

@@ -121,7 +121,7 @@
 
 #ifdef CONFIG_MIPS_GIC
 	if (gic_present) {
-		gic_send_ipi_single(cpu, action);
+		mips_smp_send_ipi_single(cpu, action);
 		return;
 	}
 #endif

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index bd4385a..37708d9 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c

@@ -33,12 +33,16 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/ftrace.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
 
 #include <linux/atomic.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
 #include <asm/idle.h>
 #include <asm/r4k-timer.h>
+#include <asm/mips-cpc.h>
 #include <asm/mmu_context.h>
 #include <asm/time.h>
 #include <asm/setup.h>
@@ -79,6 +83,11 @@
 
 cpumask_t cpu_coherent_mask;
 
+#ifdef CONFIG_GENERIC_IRQ_IPI
+static struct irq_desc *call_desc;
+static struct irq_desc *sched_desc;
+#endif
+
 static inline void set_cpu_sibling_map(int cpu)
 {
 	int i;
@@ -121,6 +130,7 @@
 	cpumask_t temp_foreign_map;
 
 	/* Re-calculate the mask */
+	cpumask_clear(&temp_foreign_map);
 	for_each_online_cpu(i) {
 		core_present = 0;
 		for_each_cpu(k, &temp_foreign_map)
@@ -145,6 +155,133 @@
 	mp_ops = ops;
 }
 
+#ifdef CONFIG_GENERIC_IRQ_IPI
+void mips_smp_send_ipi_single(int cpu, unsigned int action)
+{
+	mips_smp_send_ipi_mask(cpumask_of(cpu), action);
+}
+
+void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+	unsigned long flags;
+	unsigned int core;
+	int cpu;
+
+	local_irq_save(flags);
+
+	switch (action) {
+	case SMP_CALL_FUNCTION:
+		__ipi_send_mask(call_desc, mask);
+		break;
+
+	case SMP_RESCHEDULE_YOURSELF:
+		__ipi_send_mask(sched_desc, mask);
+		break;
+
+	default:
+		BUG();
+	}
+
+	if (mips_cpc_present()) {
+		for_each_cpu(cpu, mask) {
+			core = cpu_data[cpu].core;
+
+			if (core == current_cpu_data.core)
+				continue;
+
+			while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
+				mips_cpc_lock_other(core);
+				write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
+				mips_cpc_unlock_other();
+			}
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+
+static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
+{
+	scheduler_ipi();
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
+{
+	generic_smp_call_function_interrupt();
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_resched = {
+	.handler	= ipi_resched_interrupt,
+	.flags		= IRQF_PERCPU,
+	.name		= "IPI resched"
+};
+
+static struct irqaction irq_call = {
+	.handler	= ipi_call_interrupt,
+	.flags		= IRQF_PERCPU,
+	.name		= "IPI call"
+};
+
+static __init void smp_ipi_init_one(unsigned int virq,
+				    struct irqaction *action)
+{
+	int ret;
+
+	irq_set_handler(virq, handle_percpu_irq);
+	ret = setup_irq(virq, action);
+	BUG_ON(ret);
+}
+
+static int __init mips_smp_ipi_init(void)
+{
+	unsigned int call_virq, sched_virq;
+	struct irq_domain *ipidomain;
+	struct device_node *node;
+
+	node = of_irq_find_parent(of_root);
+	ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
+
+	/*
+	 * Some platforms have half DT setup. So if we found irq node but
+	 * didn't find an ipidomain, try to search for one that is not in the
+	 * DT.
+	 */
+	if (node && !ipidomain)
+		ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
+
+	BUG_ON(!ipidomain);
+
+	call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
+	BUG_ON(!call_virq);
+
+	sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
+	BUG_ON(!sched_virq);
+
+	if (irq_domain_is_ipi_per_cpu(ipidomain)) {
+		int cpu;
+
+		for_each_cpu(cpu, cpu_possible_mask) {
+			smp_ipi_init_one(call_virq + cpu, &irq_call);
+			smp_ipi_init_one(sched_virq + cpu, &irq_resched);
+		}
+	} else {
+		smp_ipi_init_one(call_virq, &irq_call);
+		smp_ipi_init_one(sched_virq, &irq_resched);
+	}
+
+	call_desc = irq_to_desc(call_virq);
+	sched_desc = irq_to_desc(sched_virq);
+
+	return 0;
+}
+early_initcall(mips_smp_ipi_init);
+#endif
+
 /*
  * First C code run on the secondary CPUs after being started up by
  * the master.
@@ -191,7 +328,7 @@
 	WARN_ON_ONCE(!irqs_disabled());
 	mp_ops->smp_finish();
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 static void stop_this_cpu(void *dummy)

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ae790c5..bf14da9 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c

@@ -690,15 +690,15 @@
 asmlinkage void do_ov(struct pt_regs *regs)
 {
 	enum ctx_state prev_state;
-	siginfo_t info;
+	siginfo_t info = {
+		.si_signo = SIGFPE,
+		.si_code = FPE_INTOVF,
+		.si_addr = (void __user *)regs->cp0_epc,
+	};
 
 	prev_state = exception_enter();
 	die_if_kernel("Integer overflow", regs);
 
-	info.si_code = FPE_INTOVF;
-	info.si_signo = SIGFPE;
-	info.si_errno = 0;
-	info.si_addr = (void __user *) regs->cp0_epc;
 	force_sig_info(SIGFPE, &info, current);
 	exception_exit(prev_state);
 }
@@ -874,7 +874,7 @@
 void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
 	const char *str)
 {
-	siginfo_t info;
+	siginfo_t info = { 0 };
 	char b[40];
 
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
@@ -903,7 +903,6 @@
 		else
 			info.si_code = FPE_INTOVF;
 		info.si_signo = SIGFPE;
-		info.si_errno = 0;
 		info.si_addr = (void __user *) regs->cp0_epc;
 		force_sig_info(SIGFPE, &info, current);
 		break;

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 8bc3977..70ef1a4 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c

@@ -445,8 +445,8 @@
 
 	dvcpu->arch.wait = 0;
 
-	if (waitqueue_active(&dvcpu->wq))
-		wake_up_interruptible(&dvcpu->wq);
+	if (swait_active(&dvcpu->wq))
+		swake_up(&dvcpu->wq);
 
 	return 0;
 }
@@ -702,7 +702,7 @@
 	} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
 		void __user *uaddr = (void __user *)(long)reg->addr;
 
-		return copy_to_user(uaddr, vs, 16);
+		return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0;
 	} else {
 		return -EINVAL;
 	}
@@ -732,7 +732,7 @@
 	} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
 		void __user *uaddr = (void __user *)(long)reg->addr;
 
-		return copy_from_user(vs, uaddr, 16);
+		return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0;
 	} else {
 		return -EINVAL;
 	}
@@ -1174,8 +1174,8 @@
 	kvm_mips_callbacks->queue_timer_int(vcpu);
 
 	vcpu->arch.wait = 0;
-	if (waitqueue_active(&vcpu->wq))
-		wake_up_interruptible(&vcpu->wq);
+	if (swait_active(&vcpu->wq))
+		swake_up(&vcpu->wq);
 }
 
 /* low level hrtimer wake routine */

diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 5c81fdd..3530376 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c

@@ -146,7 +146,7 @@
 {
 	unsigned long rnd;
 
-	rnd = (unsigned long)get_random_int();
+	rnd = get_random_long();
 	rnd <<= PAGE_SHIFT;
 	if (TASK_IS_32BIT_ADDR)
 		rnd &= 0xfffffful;
@@ -174,7 +174,7 @@
 
 static inline unsigned long brk_rnd(void)
 {
-	unsigned long rnd = get_random_int();
+	unsigned long rnd = get_random_long();
 
 	rnd = rnd << PAGE_SHIFT;
 	/* 8MB for 32bit, 256MB for 64bit */

diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 2496475..91dec32 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c

@@ -164,11 +164,13 @@
 
 	sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE_MSK;
 	sets >>= CM_GCR_L2_CONFIG_SET_SIZE_SHF;
-	c->scache.sets = 64 << sets;
+	if (sets)
+		c->scache.sets = 64 << sets;
 
 	line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE_MSK;
 	line_sz >>= CM_GCR_L2_CONFIG_LINE_SIZE_SHF;
-	c->scache.linesz = 2 << line_sz;
+	if (line_sz)
+		c->scache.linesz = 2 << line_sz;
 
 	assoc = cfg & CM_GCR_L2_CONFIG_ASSOC_MSK;
 	assoc >>= CM_GCR_L2_CONFIG_ASSOC_SHF;
@@ -176,9 +178,12 @@
 	c->scache.waysize = c->scache.sets * c->scache.linesz;
 	c->scache.waybit = __ffs(c->scache.waysize);
 
-	c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+	if (c->scache.linesz) {
+		c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+		return 1;
+	}
 
-	return 1;
+	return 0;
 }
 
 static inline int __init mips_sc_probe(void)

diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index f984193..426173c 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c

@@ -675,7 +675,7 @@
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 	init_clockevents();
 #endif
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 	return 0;
 }
 

diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 3d0e17b..df0f52b 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h

@@ -22,6 +22,9 @@
 
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init	__read_mostly
+
 void parisc_cache_init(void);	/* initializes cache-flushing */
 void disable_sr_hashing_asm(int); /* low level support for above */
 void disable_sr_hashing(void);   /* turns off space register hashing */

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 845272c..7bd69bd 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h

@@ -121,10 +121,6 @@
 	}
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
 #include <asm/kmap_types.h>
 
 #define ARCH_HAS_KMAP

diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h
index f84ff12..6d8276cd 100644
--- a/arch/parisc/include/asm/floppy.h
+++ b/arch/parisc/include/asm/floppy.h

@@ -33,7 +33,7 @@
  * floppy accesses go through the track buffer.
  */
 #define _CROSS_64KB(a,s,vdma) \
-(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
+(!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
 
 #define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
 

diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 35bdccb..b75039f 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h

@@ -361,8 +361,9 @@
 #define __NR_membarrier		(__NR_Linux + 343)
 #define __NR_userfaultfd	(__NR_Linux + 344)
 #define __NR_mlock2		(__NR_Linux + 345)
+#define __NR_copy_file_range	(__NR_Linux + 346)
 
-#define __NR_Linux_syscalls	(__NR_mlock2 + 1)
+#define __NR_Linux_syscalls	(__NR_copy_file_range + 1)
 
 
 #define __IGNORE_select		/* newselect */

diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 9585c81..ce0b2b4 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c

@@ -269,14 +269,19 @@
 
 long do_syscall_trace_enter(struct pt_regs *regs)
 {
-	long ret = 0;
-
 	/* Do the secure computing check first. */
 	secure_computing_strict(regs->gr[20]);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
-	    tracehook_report_syscall_entry(regs))
-		ret = -1L;
+	    tracehook_report_syscall_entry(regs)) {
+		/*
+		 * Tracing decided this syscall should not happen or the
+		 * debugger stored an invalid system call number. Skip
+		 * the system call and the system call restart handling.
+		 */
+		regs->gr[20] = -1UL;
+		goto out;
+	}
 
 #ifdef CONFIG_64BIT
 	if (!is_compat_task())
@@ -290,7 +295,8 @@
 			regs->gr[24] & 0xffffffff,
 			regs->gr[23] & 0xffffffff);
 
-	return ret ? : regs->gr[20];
+out:
+	return regs->gr[20];
 }
 
 void do_syscall_trace_exit(struct pt_regs *regs)

diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 52e8597..c2a9cc5 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c

@@ -305,7 +305,7 @@
 
 	local_irq_enable();  /* Interrupts have been off until now */
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
 	/* NOTREACHED */
 	panic("smp_callin() AAAAaaaaahhhh....\n");

diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 3fbd725..fbafa0d 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S

@@ -343,7 +343,7 @@
 #endif
 
 	comiclr,>>=	__NR_Linux_syscalls, %r20, %r0
-	b,n	.Lsyscall_nosys
+	b,n	.Ltracesys_nosys
 
 	LDREGX  %r20(%r19), %r19
 
@@ -359,6 +359,9 @@
 	be      0(%sr7,%r19)
 	ldo	R%tracesys_exit(%r2),%r2
 
+.Ltracesys_nosys:
+	ldo	-ENOSYS(%r0),%r28		/* set errno */
+
 	/* Do *not* call this function on the gateway page, because it
 	makes a direct call to syscall_trace. */
 	

diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index d4ffcfb..585d50f 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S

@@ -441,6 +441,7 @@
 	ENTRY_SAME(membarrier)
 	ENTRY_SAME(userfaultfd)
 	ENTRY_SAME(mlock2)		/* 345 */
+	ENTRY_SAME(copy_file_range)
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))

diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 1b366c4..3c07d6b 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c

@@ -55,12 +55,12 @@
 
 static struct resource data_resource = {
 	.name	= "Kernel data",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource pdcdata_resource = {
@@ -201,7 +201,7 @@
 		res->name = "System RAM";
 		res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT;
 		res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 		request_resource(&iomem_resource, res);
 	}
 

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9d08d8c..c98afa5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h

@@ -289,7 +289,7 @@
 	struct list_head runnable_threads;
 	struct list_head preempt_list;
 	spinlock_t lock;
-	wait_queue_head_t wq;
+	struct swait_queue_head wq;
 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
@@ -629,7 +629,7 @@
 	u8 prodded;
 	u32 last_inst;
 
-	wait_queue_head_t *wqp;
+	struct swait_queue_head *wqp;
 	struct kvmppc_vcore *vcore;
 	int ret;
 	int trap;

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 301be31..650cfb3 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c

@@ -418,8 +418,7 @@
 		eeh_pcid_put(dev);
 		if (driver->err_handler &&
 		    driver->err_handler->error_detected &&
-		    driver->err_handler->slot_reset &&
-		    driver->err_handler->resume)
+		    driver->err_handler->slot_reset)
 			return NULL;
 	}
 

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 05e804c..aec9a1b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c

@@ -109,8 +109,9 @@
 	 * If the breakpoint is unregistered between a hw_breakpoint_handler()
 	 * and the single_step_dabr_instruction(), then cleanup the breakpoint
 	 * restoration variables to prevent dangling pointers.
+	 * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
 	 */
-	if (bp->ctx && bp->ctx->task)
+	if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
 		bp->ctx->task->thread.last_hit_ubp = NULL;
 }
 

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e..3c5736e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c

@@ -1768,9 +1768,9 @@
 
 	/* 8MB for 32bit, 1GB for 64bit */
 	if (is_32bit_task())
-		rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+		rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
 	else
-		rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+		rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
 
 	return rnd << PAGE_SHIFT;
 }

diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ad8c9db..d544fa3 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c

@@ -114,8 +114,6 @@
 
 notrace void __init machine_init(u64 dt_ptr)
 {
-	lockdep_init();
-
 	/* Enable early debugging if any specified (see udbg.h) */
 	udbg_early_init();
 

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5c03a6a..f98be83 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c

@@ -255,9 +255,6 @@
 	setup_paca(&boot_paca);
 	fixup_boot_paca();
 
-	/* Initialize lockdep early or else spinlocks will blow */
-	lockdep_init();
-
 	/* -------- printk is now safe to use ------- */
 
 	/* Enable early debugging if any specified (see udbg.h) */

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec20..cc13d4c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c

@@ -727,7 +727,7 @@
 
 	local_irq_enable();
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
 	BUG();
 }

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index baeddb0..f1187bb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c

@@ -114,11 +114,11 @@
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
 	int cpu;
-	wait_queue_head_t *wqp;
+	struct swait_queue_head *wqp;
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (waitqueue_active(wqp)) {
-		wake_up_interruptible(wqp);
+	if (swait_active(wqp)) {
+		swake_up(wqp);
 		++vcpu->stat.halt_wakeup;
 	}
 
@@ -701,8 +701,8 @@
 		tvcpu->arch.prodded = 1;
 		smp_mb();
 		if (vcpu->arch.ceded) {
-			if (waitqueue_active(&vcpu->wq)) {
-				wake_up_interruptible(&vcpu->wq);
+			if (swait_active(&vcpu->wq)) {
+				swake_up(&vcpu->wq);
 				vcpu->stat.halt_wakeup++;
 			}
 		}
@@ -1459,7 +1459,7 @@
 	INIT_LIST_HEAD(&vcore->runnable_threads);
 	spin_lock_init(&vcore->lock);
 	spin_lock_init(&vcore->stoltb_lock);
-	init_waitqueue_head(&vcore->wq);
+	init_swait_queue_head(&vcore->wq);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
 	vcore->first_vcpuid = core * threads_per_subcore;
@@ -2531,10 +2531,9 @@
 {
 	struct kvm_vcpu *vcpu;
 	int do_sleep = 1;
+	DECLARE_SWAITQUEUE(wait);
 
-	DEFINE_WAIT(wait);
-
-	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
 
 	/*
 	 * Check one last time for pending exceptions and ceded state after
@@ -2548,7 +2547,7 @@
 	}
 
 	if (!do_sleep) {
-		finish_wait(&vc->wq, &wait);
+		finish_swait(&vc->wq, &wait);
 		return;
 	}
 
@@ -2556,7 +2555,7 @@
 	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
 	schedule();
-	finish_wait(&vc->wq, &wait);
+	finish_swait(&vc->wq, &wait);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
 	trace_kvmppc_vcore_blocked(vc, 1);
@@ -2612,7 +2611,7 @@
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
-			wake_up(&vc->wq);
+			swake_up(&vc->wq);
 		}
 
 	}

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6ee26de..25ae2c9 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -1370,6 +1370,20 @@
 	std	r6, VCPU_ACOP(r9)
 	stw	r7, VCPU_GUEST_PID(r9)
 	std	r8, VCPU_WORT(r9)
+	/*
+	 * Restore various registers to 0, where non-zero values
+	 * set by the guest could disrupt the host.
+	 */
+	li	r0, 0
+	mtspr	SPRN_IAMR, r0
+	mtspr	SPRN_CIABR, r0
+	mtspr	SPRN_DAWRX, r0
+	mtspr	SPRN_TCSCR, r0
+	mtspr	SPRN_WORT, r0
+	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+	li	r0, 1
+	sldi	r0, r0, 31
+	mtspr	SPRN_MMCRS, r0
 8:
 
 	/* Save and reset AMR and UAMOR before turning on the MMU */

diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 0762c1e..edb0991 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c

@@ -111,7 +111,13 @@
 	 */
 	if (!(old_pte & _PAGE_COMBO)) {
 		flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
-		old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
+		/*
+		 * clear the old slot details from the old and new pte.
+		 * On hash insert failure we use old pte value and we don't
+		 * want slot information there if we have a insert failure.
+		 */
+		old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+		new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
 		goto htab_insert_hpte;
 	}
 	/*

diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 49b152b..eb2accd 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c

@@ -78,9 +78,19 @@
 		 * base page size. This is because demote_segment won't flush
 		 * hash page table entries.
 		 */
-		if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+		if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
 			flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
 					    ssize, flags);
+			/*
+			 * With THP, we also clear the slot information with
+			 * respect to all the 64K hash pte mapping the 16MB
+			 * page. They are all invalid now. This make sure we
+			 * don't find the slot valid when we fault with 4k
+			 * base page size.
+			 *
+			 */
+			memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+		}
 	}
 
 	valid = hpte_valid(hpte_slot_array, index);

diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 7e6d088..83a8be7 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c

@@ -8,6 +8,8 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 
+#include <asm/mmu.h>
+
 #ifdef CONFIG_PPC_FSL_BOOK3E
 #ifdef CONFIG_PPC64
 static inline int tlb1_next(void)
@@ -60,6 +62,14 @@
 	unsigned long tmp;
 	int token = smp_processor_id() + 1;
 
+	/*
+	 * Besides being unnecessary in the absence of SMT, this
+	 * check prevents trying to do lbarx/stbcx. on e5500 which
+	 * doesn't implement either feature.
+	 */
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
 	asm volatile("1: lbarx %0, 0, %1;"
 		     "cmpwi %0, 0;"
 		     "bne 2f;"
@@ -80,6 +90,9 @@
 {
 	struct paca_struct *paca = get_paca();
 
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
 	isync();
 	paca->tcd_ptr->lock = 0;
 }

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index d0f0a51..f078a1f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c

@@ -541,7 +541,7 @@
 			res->name = "System RAM";
 			res->start = base;
 			res->end = base + size - 1;
-			res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 			WARN_ON(request_resource(&iomem_resource, res) < 0);
 		}
 	}

diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0f0502e..4087705 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c

@@ -59,9 +59,9 @@
 
 	/* 8MB for 32bit, 1GB for 64bit */
 	if (is_32bit_task())
-		rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+		rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
 	else
-		rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
+		rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
 
 	return rnd << PAGE_SHIFT;
 }

diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
index ea91ddf..629c908 100644
--- a/arch/s390/include/asm/fpu/internal.h
+++ b/arch/s390/include/asm/fpu/internal.h

@@ -40,6 +40,7 @@
 static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
 	fpregs->pad = 0;
+	fpregs->fpc = fpu->fpc;
 	if (MACHINE_HAS_VX)
 		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
 	else
@@ -49,6 +50,7 @@
 
 static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
+	fpu->fpc = fpregs->fpc;
 	if (MACHINE_HAS_VX)
 		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
 	else

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8959ebb..b0c8ad0 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h

@@ -467,7 +467,7 @@
 struct kvm_s390_local_interrupt {
 	spinlock_t lock;
 	struct kvm_s390_float_interrupt *float_int;
-	wait_queue_head_t *wq;
+	struct swait_queue_head *wq;
 	atomic_t *cpuflags;
 	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
 	struct kvm_s390_irq_payload irq;

diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index fb1b93e..e485817 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h

@@ -15,17 +15,25 @@
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	spin_lock_init(&mm->context.list_lock);
+	INIT_LIST_HEAD(&mm->context.pgtable_list);
+	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.attach_count, 0);
 	mm->context.flush_mm = 0;
-	mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
-	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste;
 	mm->context.has_pgste = 0;
 	mm->context.use_skey = 0;
 #endif
-	mm->context.asce_limit = STACK_TOP_MAX;
+	if (mm->context.asce_limit == 0) {
+		/* context created by exec, set asce limit to 4TB */
+		mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+			_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+		mm->context.asce_limit = STACK_TOP_MAX;
+	} else if (mm->context.asce_limit == (1UL << 31)) {
+		mm_inc_nr_pmds(mm);
+	}
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
 }
@@ -111,8 +119,6 @@
 static inline void arch_dup_mmap(struct mm_struct *oldmm,
 				 struct mm_struct *mm)
 {
-	if (oldmm->context.asce_limit < mm->context.asce_limit)
-		crst_table_downgrade(mm, oldmm->context.asce_limit);
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)

diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b7858f..d7cc79f 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h

@@ -100,12 +100,26 @@
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	spin_lock_init(&mm->context.list_lock);
-	INIT_LIST_HEAD(&mm->context.pgtable_list);
-	INIT_LIST_HEAD(&mm->context.gmap_list);
-	return (pgd_t *) crst_table_alloc(mm);
+	unsigned long *table = crst_table_alloc(mm);
+
+	if (!table)
+		return NULL;
+	if (mm->context.asce_limit == (1UL << 31)) {
+		/* Forking a compat process with 2 page table levels */
+		if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+			crst_table_free(mm, table);
+			return NULL;
+		}
+	}
+	return (pgd_t *) table;
 }
-#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	if (mm->context.asce_limit == (1UL << 31))
+		pgtable_pmd_page_dtor(virt_to_page(pgd));
+	crst_table_free(mm, (unsigned long *) pgd);
+}
 
 static inline void pmd_populate(struct mm_struct *mm,
 				pmd_t *pmd, pgtable_t pte)

diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 66c9441..4af6037 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c

@@ -271,7 +271,7 @@
 
 	/* Restore high gprs from signal stack */
 	if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
-			     sizeof(&sregs_ext->gprs_high)))
+			     sizeof(sregs_ext->gprs_high)))
 		return -EFAULT;
 	for (i = 0; i < NUM_GPRS; i++)
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c55576b..a0684de 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c

@@ -448,7 +448,6 @@
 	rescue_initrd();
 	clear_bss_section();
 	init_kernel_storage_key();
-	lockdep_init();
 	lockdep_off();
 	setup_lowcore_early();
 	setup_facility_list();

diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index c5febe8..03c2b46 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S

@@ -16,7 +16,7 @@
 
 __HEAD
 ENTRY(startup_continue)
-	tm	__LC_STFLE_FAC_LIST+6,0x80	# LPP available ?
+	tm	__LC_STFLE_FAC_LIST+5,0x80	# LPP available ?
 	jz	0f
 	xc	__LC_LPP+1(7,0),__LC_LPP+1	# clear lpp and current_pid
 	mvi	__LC_LPP,0x80			#   and set LPP_MAGIC

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 9220db5..cedb019 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c

@@ -374,17 +374,17 @@
 
 static struct resource code_resource = {
 	.name  = "Kernel code",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource data_resource = {
 	.name = "Kernel data",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource bss_resource = {
 	.name = "Kernel bss",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource __initdata *standard_resources[] = {
@@ -408,7 +408,7 @@
 
 	for_each_memblock(memory, reg) {
 		res = alloc_bootmem_low(sizeof(*res));
-		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
 		res->name = "System RAM";
 		res->start = reg->base;

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3c65a8e..40a6b4f 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c

@@ -798,7 +798,7 @@
 	set_cpu_online(smp_processor_id(), true);
 	inc_irq_stat(CPU_RST);
 	local_irq_enable();
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 /* Upping and downing of CPUs */

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f88ca72..9ffc732 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c

@@ -966,13 +966,13 @@
 
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 {
-	if (waitqueue_active(&vcpu->wq)) {
+	if (swait_active(&vcpu->wq)) {
 		/*
 		 * The vcpu gave up the cpu voluntarily, mark it as a good
 		 * yield-candidate.
 		 */
 		vcpu->preempted = true;
-		wake_up_interruptible(&vcpu->wq);
+		swake_up(&vcpu->wq);
 		vcpu->stat.halt_wakeup++;
 	}
 }

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4af21c7..03dfe9c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c

@@ -2381,7 +2381,7 @@
 
 	/* manually convert vector registers if necessary */
 	if (MACHINE_HAS_VX) {
-		convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
 				     fprs, 128);
 	} else {

diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c
index b48459a..f3a0649 100644
--- a/arch/score/kernel/setup.c
+++ b/arch/score/kernel/setup.c

@@ -101,7 +101,7 @@
 	res->name = "System RAM";
 	res->start = MEMORY_START;
 	res->end = MEMORY_START + MEMORY_SIZE - 1;
-	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 	request_resource(&iomem_resource, res);
 
 	request_resource(res, &code_resource);

diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index de19cfa..3f1c18b 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c

@@ -78,17 +78,17 @@
 
 static struct resource code_resource = {
 	.name = "Kernel code",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource data_resource = {
 	.name = "Kernel data",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource bss_resource = {
 	.name	= "Kernel bss",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 unsigned long memory_start;
@@ -202,7 +202,7 @@
 	res->name = "System RAM";
 	res->start = start;
 	res->end = end - 1;
-	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 	if (request_resource(&iomem_resource, res)) {
 		pr_err("unable to request memory_resource 0x%lx 0x%lx\n",

diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index de6be00..13f633a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c

@@ -203,7 +203,7 @@
 	set_cpu_online(cpu, true);
 	per_cpu(cpu_state, cpu) = CPU_ONLINE;
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 extern struct {

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index eaee146..8496a07 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile

@@ -24,7 +24,13 @@
 export BITS    := 32
 UTS_MACHINE    := sparc
 
+# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some
+# versions of gcc.  Some gcc versions won't pass -Av8 to binutils when you
+# give -mcpu=v8.  This silently worked with older bintutils versions but
+# does not any more.
 KBUILD_CFLAGS  += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
+KBUILD_CFLAGS  += -Wa,-Av8
+
 KBUILD_AFLAGS  += -m32 -Wa,-Av8
 
 else

diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 1c26d44..b6de8b1 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h

@@ -422,8 +422,9 @@
 #define __NR_listen		354
 #define __NR_setsockopt		355
 #define __NR_mlock2		356
+#define __NR_copy_file_range	357
 
-#define NR_syscalls		357
+#define NR_syscalls		358
 
 /* Bitmask values returned from kern_features system call.  */
 #define KERN_FEATURE_MIXED_MODE_STACK	0x00000001

diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 33c02b1..a83707c 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S

@@ -948,7 +948,24 @@
 	cmp	%o0, 0
 	bne	3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ld	[%sp + STACKFRAME_SZ + PT_G1], %g1
+	sethi	%hi(sys_call_table), %l7
+	ld	[%sp + STACKFRAME_SZ + PT_I0], %i0
+	or	%l7, %lo(sys_call_table), %l7
+	ld	[%sp + STACKFRAME_SZ + PT_I1], %i1
+	ld	[%sp + STACKFRAME_SZ + PT_I2], %i2
+	ld	[%sp + STACKFRAME_SZ + PT_I3], %i3
+	ld	[%sp + STACKFRAME_SZ + PT_I4], %i4
+	ld	[%sp + STACKFRAME_SZ + PT_I5], %i5
+	cmp	%g1, NR_syscalls
+	bgeu	3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	mov	%i0, %o0
+	ld	[%l7 + %l4], %l7
 	mov	%i1, %o1
 	mov	%i2, %o2
 	mov	%i3, %o3

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index f2d30ca..cd1f592 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S

@@ -696,14 +696,6 @@
 	call	__bzero
 	 sub	%o1, %o0, %o1
 
-#ifdef CONFIG_LOCKDEP
-	/* We have this call this super early, as even prom_init can grab
-	 * spinlocks and thus call into the lockdep code.
-	 */
-	call	lockdep_init
-	 nop
-#endif
-
 	call	prom_init
 	 mov	%l7, %o0			! OpenPROM cif handler
 

diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index afbaba5..d127130 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S

@@ -338,8 +338,9 @@
 	mov	%o1, %o4
 	mov	HV_FAST_MACH_SET_WATCHDOG, %o5
 	ta	HV_FAST_TRAP
+	brnz,a,pn %o4, 0f
 	stx	%o1, [%o4]
-	retl
+0:	retl
 	 nop
 ENDPROC(sun4v_mach_set_watchdog)
 

diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index d88beff4..39aaec1 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c

@@ -52,7 +52,7 @@
 	unsigned char fenab;
 	int err;
 
-	flush_user_windows();
+	synchronize_user_stack();
 	if (get_thread_wsaved()					||
 	    (((unsigned long)ucp) & (sizeof(unsigned long)-1))	||
 	    (!__access_ok(ucp, sizeof(*ucp))))

diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index b3a5d81..fb30e7c 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c

@@ -364,7 +364,7 @@
 	local_irq_enable();
 
 	wmb();
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
 	/* We should never reach here! */
 	BUG();

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 19cd08d..8a6151a 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c

@@ -134,7 +134,7 @@
 
 	local_irq_enable();
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 void cpu_panic(void)

diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
index a92d5d2..9e034f2 100644
--- a/arch/sparc/kernel/sparc_ksyms_64.c
+++ b/arch/sparc/kernel/sparc_ksyms_64.c

@@ -37,6 +37,7 @@
 EXPORT_SYMBOL(sun4v_niagara_setperf);
 EXPORT_SYMBOL(sun4v_niagara2_getperf);
 EXPORT_SYMBOL(sun4v_niagara2_setperf);
+EXPORT_SYMBOL(sun4v_mach_set_watchdog);
 
 /* from hweight.S */
 EXPORT_SYMBOL(__arch_hweight8);

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c690c8e..b489e97 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c

@@ -264,7 +264,7 @@
 	unsigned long rnd = 0UL;
 
 	if (current->flags & PF_RANDOMIZE) {
-		unsigned long val = get_random_int();
+		unsigned long val = get_random_long();
 		if (test_thread_flag(TIF_32BIT))
 			rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
 		else

diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index bb00089..c4a1b5c 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S

@@ -158,7 +158,25 @@
 	 add	%sp, PTREGS_OFF, %o0
 	brnz,pn	%o0, 3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
+	sethi	%hi(sys_call_table32), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
+	or	%l7, %lo(sys_call_table32), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
+	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
+	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
+	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
+	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+	cmp	%g1, NR_syscalls
+	bgeu,pn	%xcc, 3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	srl	%i0, 0, %o0
+	lduw	[%l7 + %l4], %l7
 	srl	%i4, 0, %o4
 	srl	%i1, 0, %o1
 	srl	%i2, 0, %o2
@@ -170,7 +188,25 @@
 	 add	%sp, PTREGS_OFF, %o0
 	brnz,pn	%o0, 3f
 	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
+	sethi	%hi(sys_call_table64), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
+	or	%l7, %lo(sys_call_table64), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
+	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
+	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
+	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
+	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+	cmp	%g1, NR_syscalls
+	bgeu,pn	%xcc, 3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
 	mov	%i0, %o0
+	lduw	[%l7 + %l4], %l7
 	mov	%i1, %o1
 	mov	%i2, %o2
 	mov	%i3, %o3

diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index e663b6c..6c3dd6c 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S

@@ -88,4 +88,4 @@
 /*340*/	.long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 /*345*/	.long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-/*355*/	.long sys_setsockopt, sys_mlock2
+/*355*/	.long sys_setsockopt, sys_mlock2, sys_copy_file_range

diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 1557121..12b524c 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S

@@ -89,7 +89,7 @@
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 	.word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-	.word compat_sys_setsockopt, sys_mlock2
+	.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range
 
 #endif /* CONFIG_COMPAT */
 
@@ -170,4 +170,4 @@
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
 	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-	.word sys_setsockopt, sys_mlock2
+	.word sys_setsockopt, sys_mlock2, sys_copy_file_range

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 6f21685..1cfe6aa 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c

@@ -2863,17 +2863,17 @@
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource data_resource = {
 	.name	= "Kernel data",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
 	.name	= "Kernel bss",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static inline resource_size_t compute_kern_paddr(void *addr)
@@ -2909,7 +2909,7 @@
 		res->name = "System RAM";
 		res->start = pavail[i].phys_addr;
 		res->end = pavail[i].phys_addr + pavail[i].reg_size - 1;
-		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
 		if (insert_resource(&iomem_resource, res) < 0) {
 			pr_warn("Resource insertion failed.\n");

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index bbb855d..a992238 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c

@@ -1632,14 +1632,14 @@
 	.name	= "Kernel data",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 /*
@@ -1673,10 +1673,15 @@
 		kzalloc(sizeof(struct resource), GFP_ATOMIC);
 	if (!res)
 		return NULL;
-	res->name = reserved ? "Reserved" : "System RAM";
 	res->start = start_pfn << PAGE_SHIFT;
 	res->end = (end_pfn << PAGE_SHIFT) - 1;
 	res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	if (reserved) {
+		res->name = "Reserved";
+	} else {
+		res->name = "System RAM";
+		res->flags |= IORESOURCE_SYSRAM;
+	}
 	if (insert_resource(&iomem_resource, res)) {
 		kfree(res);
 		return NULL;

diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 20d52a9..6c0abaa 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c

@@ -208,7 +208,7 @@
 	/* Set up tile-timer clock-event device on this cpu */
 	setup_tile_timer();
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)

diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 9bdf67a..b60a9f8 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c

@@ -12,6 +12,7 @@
 #include <skas.h>
 
 void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
 
 static void kill_off_processes(void)
 {

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index fc8be0e..57acbd6 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c

@@ -69,7 +69,7 @@
 	struct ksignal ksig;
 	int handled_sig = 0;
 
-	if (get_signal(&ksig)) {
+	while (get_signal(&ksig)) {
 		handled_sig = 1;
 		/* Whee!  Actually deliver the signal.  */
 		handle_signal(&ksig, regs);

diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 3fa317f..c2bffa5 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c

@@ -72,13 +72,13 @@
 		.name = "Kernel code",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	},
 	{
 		.name = "Kernel data",
 		.start = 0,
 		.end = 0,
-		.flags = IORESOURCE_MEM
+		.flags = IORESOURCE_SYSTEM_RAM
 	}
 };
 
@@ -211,7 +211,7 @@
 		res->name  = "System RAM";
 		res->start = mi->bank[i].start;
 		res->end   = mi->bank[i].start + mi->bank[i].size - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 		request_resource(&iomem_resource, res);
 

diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 1538562..eb3abf8 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild

@@ -1,6 +1,7 @@
-
 obj-y += entry/
 
+obj-$(CONFIG_PERF_EVENTS) += events/
+
 obj-$(CONFIG_KVM) += kvm/
 
 # Xen paravirtualization support

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c46662f..8f2e665 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -303,6 +303,9 @@
 config FIX_EARLYCON_MEM
 	def_bool y
 
+config DEBUG_RODATA
+	def_bool y
+
 config PGTABLE_LEVELS
 	int
 	default 4 if X86_64
@@ -1160,22 +1163,23 @@
 	bool "CPU microcode loading support"
 	default y
 	depends on CPU_SUP_AMD || CPU_SUP_INTEL
-	depends on BLK_DEV_INITRD
 	select FW_LOADER
 	---help---
-
 	  If you say Y here, you will be able to update the microcode on
-	  certain Intel and AMD processors. The Intel support is for the
-	  IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
-	  Xeon etc. The AMD support is for families 0x10 and later. You will
-	  obviously need the actual microcode binary data itself which is not
-	  shipped with the Linux kernel.
+	  Intel and AMD processors. The Intel support is for the IA32 family,
+	  e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
+	  AMD support is for families 0x10 and later. You will obviously need
+	  the actual microcode binary data itself which is not shipped with
+	  the Linux kernel.
 
-	  This option selects the general module only, you need to select
-	  at least one vendor specific module as well.
+	  The preferred method to load microcode from a detached initrd is described
+	  in Documentation/x86/early-microcode.txt. For that you need to enable
+	  CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
+	  initrd for microcode blobs.
 
-	  To compile this driver as a module, choose M here: the module
-	  will be called microcode.
+	  In addition, you can build-in the microcode into the kernel. For that you
+	  need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode
+	  to the CONFIG_EXTRA_FIRMWARE config option.
 
 config MICROCODE_INTEL
 	bool "Intel microcode loading support"

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 9b18ed9..67eec55 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug

@@ -74,28 +74,16 @@
 	  issues with the mapping of the EFI runtime regions into that
 	  table.
 
-config DEBUG_RODATA
-	bool "Write protect kernel read-only data structures"
-	default y
-	depends on DEBUG_KERNEL
-	---help---
-	  Mark the kernel read-only data as write-protected in the pagetables,
-	  in order to catch accidental (and incorrect) writes to such const
-	  data. This is recommended so that we can catch kernel bugs sooner.
-	  If in doubt, say "Y".
-
 config DEBUG_RODATA_TEST
-	bool "Testcase for the DEBUG_RODATA feature"
-	depends on DEBUG_RODATA
+	bool "Testcase for the marking rodata read-only"
 	default y
 	---help---
-	  This option enables a testcase for the DEBUG_RODATA
-	  feature as well as for the change_page_attr() infrastructure.
+	  This option enables a testcase for the setting rodata read-only
+	  as well as for the change_page_attr() infrastructure.
 	  If in doubt, say "N"
 
 config DEBUG_WX
 	bool "Warn on W+X mappings at boot"
-	depends on DEBUG_RODATA
 	select X86_PTDUMP_CORE
 	---help---
 	  Generate a warning if any W+X mappings are found at boot.
@@ -350,16 +338,6 @@
 
 	  If unsure say N here.
 
-config X86_DEBUG_STATIC_CPU_HAS
-	bool "Debug alternatives"
-	depends on DEBUG_KERNEL
-	---help---
-	  This option causes additional code to be generated which
-	  fails if static_cpu_has() is used before alternatives have
-	  run.
-
-	  If unsure, say N.
-
 config X86_DEBUG_FPU
 	bool "Debug the x86 FPU code"
 	depends on DEBUG_KERNEL

diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index ea97697..4cb404f 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h

@@ -1,7 +1,7 @@
 #ifndef BOOT_CPUFLAGS_H
 #define BOOT_CPUFLAGS_H
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/processor-flags.h>
 
 struct cpu_features {

diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 637097e..f72498d 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c

@@ -17,7 +17,7 @@
 
 #include "../include/asm/required-features.h"
 #include "../include/asm/disabled-features.h"
-#include "../include/asm/cpufeature.h"
+#include "../include/asm/cpufeatures.h"
 #include "../kernel/cpu/capflags.c"
 
 int main(void)

diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a7661c4..0702d25 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c

@@ -49,7 +49,6 @@
 
 /* This must be large enough to hold the entire setup */
 u8 buf[SETUP_SECT_MAX*512];
-int is_big_kernel;
 
 #define PECOFF_RELOC_RESERVE 0x20
 

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 028be48..e25a163 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig

@@ -288,7 +288,7 @@
 CONFIG_NLS_UTF8=y
 CONFIG_PRINTK_TIME=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_FRAME_WARN=2048
+CONFIG_FRAME_WARN=1024
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_KERNEL=y

diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 07d2c6c..27226df 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c

@@ -33,7 +33,7 @@
 #include <linux/crc32.h>
 #include <crypto/internal/hash.h>
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/api.h>
 

diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0e98716..0857b1a 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c

@@ -30,7 +30,7 @@
 #include <linux/kernel.h>
 #include <crypto/internal/hash.h>
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/internal.h>
 

diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index a3fcfc9..cd4df93 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c

@@ -30,7 +30,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/fpu/api.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cpu_device_id.h>
 
 asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index e32206e..9a9e588 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h

@@ -201,37 +201,6 @@
 	.byte 0xf1
 	.endm
 
-#else /* CONFIG_X86_64 */
-
-/*
- * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
- * are different from the entry_32.S versions in not changing the segment
- * registers. So only suitable for in kernel use, not when transitioning
- * from or to user space. The resulting stack frame is not a standard
- * pt_regs frame. The main use case is calling C code from assembler
- * when all the registers need to be preserved.
- */
-
-	.macro SAVE_ALL
-	pushl %eax
-	pushl %ebp
-	pushl %edi
-	pushl %esi
-	pushl %edx
-	pushl %ecx
-	pushl %ebx
-	.endm
-
-	.macro RESTORE_ALL
-	popl %ebx
-	popl %ecx
-	popl %edx
-	popl %esi
-	popl %edi
-	popl %ebp
-	popl %eax
-	.endm
-
 #endif /* CONFIG_X86_64 */
 
 /*

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 0366374..e79d93d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c

@@ -26,6 +26,7 @@
 #include <asm/traps.h>
 #include <asm/vdso.h>
 #include <asm/uaccess.h>
+#include <asm/cpufeature.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -44,6 +45,8 @@
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
 	user_exit();
 }
+#else
+static inline void enter_from_user_mode(void) {}
 #endif
 
 static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
@@ -84,17 +87,6 @@
 
 	work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
 
-#ifdef CONFIG_CONTEXT_TRACKING
-	/*
-	 * If TIF_NOHZ is set, we are required to call user_exit() before
-	 * doing anything that could touch RCU.
-	 */
-	if (work & _TIF_NOHZ) {
-		enter_from_user_mode();
-		work &= ~_TIF_NOHZ;
-	}
-#endif
-
 #ifdef CONFIG_SECCOMP
 	/*
 	 * Do seccomp first -- it should minimize exposure of other
@@ -171,16 +163,6 @@
 	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
 		BUG_ON(regs != task_pt_regs(current));
 
-	/*
-	 * If we stepped into a sysenter/syscall insn, it trapped in
-	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
-	 * If user-mode had set TF itself, then it's still clear from
-	 * do_debug() and we need to set it again to restore the user
-	 * state.  If we entered on the slow path, TF was already set.
-	 */
-	if (work & _TIF_SINGLESTEP)
-		regs->flags |= X86_EFLAGS_TF;
-
 #ifdef CONFIG_SECCOMP
 	/*
 	 * Call seccomp_phase2 before running the other hooks so that
@@ -268,6 +250,7 @@
 /* Called with IRQs disabled. */
 __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 {
+	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	u32 cached_flags;
 
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -275,12 +258,22 @@
 
 	lockdep_sys_exit();
 
-	cached_flags =
-		READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+	cached_flags = READ_ONCE(ti->flags);
 
 	if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
 		exit_to_usermode_loop(regs, cached_flags);
 
+#ifdef CONFIG_COMPAT
+	/*
+	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
+	 * returning to user mode.  We need to clear it *after* signal
+	 * handling, because syscall restart has a fixup for compat
+	 * syscalls.  The fixup is exercised by the ptrace_syscall_32
+	 * selftest.
+	 */
+	ti->status &= ~TS_COMPAT;
+#endif
+
 	user_enter();
 }
 
@@ -332,33 +325,45 @@
 	if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
 		syscall_slow_exit_work(regs, cached_flags);
 
-#ifdef CONFIG_COMPAT
-	/*
-	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
-	 * returning to user mode.
-	 */
-	ti->status &= ~TS_COMPAT;
-#endif
-
 	local_irq_disable();
 	prepare_exit_to_usermode(regs);
 }
 
+#ifdef CONFIG_X86_64
+__visible void do_syscall_64(struct pt_regs *regs)
+{
+	struct thread_info *ti = pt_regs_to_thread_info(regs);
+	unsigned long nr = regs->orig_ax;
+
+	enter_from_user_mode();
+	local_irq_enable();
+
+	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
+		nr = syscall_trace_enter(regs);
+
+	/*
+	 * NB: Native and x32 syscalls are dispatched from the same
+	 * table.  The only functional difference is the x32 bit in
+	 * regs->orig_ax, which changes the behavior of some syscalls.
+	 */
+	if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
+		regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+			regs->di, regs->si, regs->dx,
+			regs->r10, regs->r8, regs->r9);
+	}
+
+	syscall_return_slowpath(regs);
+}
+#endif
+
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 /*
- * Does a 32-bit syscall.  Called with IRQs on and does all entry and
- * exit work and returns with IRQs off.  This function is extremely hot
- * in workloads that use it, and it's usually called from
+ * Does a 32-bit syscall.  Called with IRQs on in CONTEXT_KERNEL.  Does
+ * all entry and exit work and returns with IRQs off.  This function is
+ * extremely hot in workloads that use it, and it's usually called from
  * do_fast_syscall_32, so forcibly inline it to improve performance.
  */
-#ifdef CONFIG_X86_32
-/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
-__visible
-#else
-/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
-static
-#endif
-__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 {
 	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	unsigned int nr = (unsigned int)regs->orig_ax;
@@ -393,14 +398,13 @@
 	syscall_return_slowpath(regs);
 }
 
-#ifdef CONFIG_X86_64
-/* Handles INT80 on 64-bit kernels */
-__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
+/* Handles int $0x80 */
+__visible void do_int80_syscall_32(struct pt_regs *regs)
 {
+	enter_from_user_mode();
 	local_irq_enable();
 	do_syscall_32_irqs_on(regs);
 }
-#endif
 
 /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
 __visible long do_fast_syscall_32(struct pt_regs *regs)
@@ -420,12 +424,11 @@
 	 */
 	regs->ip = landing_pad;
 
-	/*
-	 * Fetch EBP from where the vDSO stashed it.
-	 *
-	 * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
-	 */
+	enter_from_user_mode();
+
 	local_irq_enable();
+
+	/* Fetch EBP from where the vDSO stashed it. */
 	if (
 #ifdef CONFIG_X86_64
 		/*
@@ -443,9 +446,6 @@
 		/* User code screwed up. */
 		local_irq_disable();
 		regs->ax = -EFAULT;
-#ifdef CONFIG_CONTEXT_TRACKING
-		enter_from_user_mode();
-#endif
 		prepare_exit_to_usermode(regs);
 		return 0;	/* Keep it simple: use IRET. */
 	}

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 77d8c51..10868aa 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S

@@ -40,7 +40,7 @@
 #include <asm/processor-flags.h>
 #include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
@@ -287,7 +287,58 @@
 END(resume_kernel)
 #endif
 
-	# SYSENTER  call handler stub
+GLOBAL(__begin_SYSENTER_singlestep_region)
+/*
+ * All code from here through __end_SYSENTER_singlestep_region is subject
+ * to being single-stepped if a user program sets TF and executes SYSENTER.
+ * There is absolutely nothing that we can do to prevent this from happening
+ * (thanks Intel!).  To keep our handling of this situation as simple as
+ * possible, we handle TF just like AC and NT, except that our #DB handler
+ * will ignore all of the single-step traps generated in this range.
+ */
+
+#ifdef CONFIG_XEN
+/*
+ * Xen doesn't set %esp to be precisely what the normal SYSENTER
+ * entry point expects, so fix it up before using the normal path.
+ */
+ENTRY(xen_sysenter_target)
+	addl	$5*4, %esp			/* remove xen-provided frame */
+	jmp	sysenter_past_esp
+#endif
+
+/*
+ * 32-bit SYSENTER entry.
+ *
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * if X86_FEATURE_SEP is available.  This is the preferred system call
+ * entry on 32-bit systems.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO.  In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction.  This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old EIP (!!!), ESP, or EFLAGS.
+ *
+ * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
+ * user and/or vm86 state), we explicitly disable the SYSENTER
+ * instruction in vm86 mode by reprogramming the MSRs.
+ *
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  user stack
+ * 0(%ebp) arg6
+ */
 ENTRY(entry_SYSENTER_32)
 	movl	TSS_sysenter_sp0(%esp), %esp
 sysenter_past_esp:
@@ -301,6 +352,29 @@
 	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
 
 	/*
+	 * SYSENTER doesn't filter flags, so we need to clear NT, AC
+	 * and TF ourselves.  To save a few cycles, we can check whether
+	 * either was set instead of doing an unconditional popfq.
+	 * This needs to happen before enabling interrupts so that
+	 * we don't get preempted with NT set.
+	 *
+	 * If TF is set, we will single-step all the way to here -- do_debug
+	 * will ignore all the traps.  (Yes, this is slow, but so is
+	 * single-stepping in general.  This allows us to avoid having
+	 * a more complicated code to handle the case where a user program
+	 * forces us to single-step through the SYSENTER entry code.)
+	 *
+	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
+	 * out-of-line as an optimization: NT is unlikely to be set in the
+	 * majority of the cases and instead of polluting the I$ unnecessarily,
+	 * we're keeping that code behind a branch which will predict as
+	 * not-taken and therefore its instructions won't be fetched.
+	 */
+	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
+	jnz	.Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
+
+	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
 	 * turned them off.
 	 */
@@ -326,6 +400,15 @@
 	popl	%eax			/* pt_regs->ax */
 
 	/*
+	 * Restore all flags except IF. (We restore IF separately because
+	 * STI gives a one-instruction window in which we won't be interrupted,
+	 * whereas POPF does not.)
+	 */
+	addl	$PT_EFLAGS-PT_DS, %esp	/* point esp at pt_regs->flags */
+	btr	$X86_EFLAGS_IF_BIT, (%esp)
+	popfl
+
+	/*
 	 * Return back to the vDSO, which will pop ecx and edx.
 	 * Don't bother with DS and ES (they already contain __USER_DS).
 	 */
@@ -338,28 +421,63 @@
 .popsection
 	_ASM_EXTABLE(1b, 2b)
 	PTGS_TO_GS_EX
+
+.Lsysenter_fix_flags:
+	pushl	$X86_EFLAGS_FIXED
+	popfl
+	jmp	.Lsysenter_flags_fixed
+GLOBAL(__end_SYSENTER_singlestep_region)
 ENDPROC(entry_SYSENTER_32)
 
-	# system call handler stub
+/*
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction.  INT $0x80 lands here.
+ *
+ * This entry point can be used by any 32-bit perform system calls.
+ * Instances of INT $0x80 can be found inline in various programs and
+ * libraries.  It is also used by the vDSO's __kernel_vsyscall
+ * fallback for hardware that doesn't support a faster entry method.
+ * Restarted 32-bit system calls also fall back to INT $0x80
+ * regardless of what instruction was originally used to do the system
+ * call.  (64-bit programs can use INT $0x80 as well, but they can
+ * only run on 64-bit kernels and therefore land in
+ * entry_INT80_compat.)
+ *
+ * This is considered a slow path.  It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6
+ */
 ENTRY(entry_INT80_32)
 	ASM_CLAC
 	pushl	%eax			/* pt_regs->orig_ax */
 	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
 
 	/*
-	 * User mode is traced as though IRQs are on.  Unlike the 64-bit
-	 * case, INT80 is a trap gate on 32-bit kernels, so interrupts
-	 * are already on (unless user code is messing around with iopl).
+	 * User mode is traced as though IRQs are on, and the interrupt gate
+	 * turned them off.
 	 */
+	TRACE_IRQS_OFF
 
 	movl	%esp, %eax
-	call	do_syscall_32_irqs_on
+	call	do_int80_syscall_32
 .Lsyscall_32_done:
 
 restore_all:
 	TRACE_IRQS_IRET
 restore_all_notrace:
 #ifdef CONFIG_X86_ESPFIX32
+	ALTERNATIVE	"jmp restore_nocheck", "", X86_BUG_ESPFIX
+
 	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
 	/*
 	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -386,19 +504,6 @@
 
 #ifdef CONFIG_X86_ESPFIX32
 ldt_ss:
-#ifdef CONFIG_PARAVIRT
-	/*
-	 * The kernel can't run on a non-flat stack if paravirt mode
-	 * is active.  Rather than try to fixup the high bits of
-	 * ESP, bypass this code entirely.  This may break DOSemu
-	 * and/or Wine support in a paravirt VM, although the option
-	 * is still available to implement the setting of the high
-	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
-	 */
-	cmpl	$0, pv_info+PARAVIRT_enabled
-	jne	restore_nocheck
-#endif
-
 /*
  * Setup and switch to ESPFIX stack
  *
@@ -631,14 +736,6 @@
 END(spurious_interrupt_bug)
 
 #ifdef CONFIG_XEN
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-ENTRY(xen_sysenter_target)
-	addl	$5*4, %esp			/* remove xen-provided frame */
-	jmp	sysenter_past_esp
-
 ENTRY(xen_hypervisor_callback)
 	pushl	$-1				/* orig_ax = -1 => not a system call */
 	SAVE_ALL
@@ -938,51 +1035,48 @@
 	jmp	ret_from_exception
 END(page_fault)
 
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-.macro FIX_STACK offset ok label
-	cmpw	$__KERNEL_CS, 4(%esp)
-	jne	\ok
-\label:
-	movl	TSS_sysenter_sp0 + \offset(%esp), %esp
-	pushfl
-	pushl	$__KERNEL_CS
-	pushl	$sysenter_past_esp
-.endm
-
 ENTRY(debug)
+	/*
+	 * #DB can happen at the first instruction of
+	 * entry_SYSENTER_32 or in Xen's SYSENTER prologue.  If this
+	 * happens, then we will be running on a very small stack.  We
+	 * need to detect this condition and switch to the thread
+	 * stack before calling any C code at all.
+	 *
+	 * If you edit this code, keep in mind that NMIs can happen in here.
+	 */
 	ASM_CLAC
-	cmpl	$entry_SYSENTER_32, (%esp)
-	jne	debug_stack_correct
-	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
-debug_stack_correct:
 	pushl	$-1				# mark this as an int
 	SAVE_ALL
-	TRACE_IRQS_OFF
 	xorl	%edx, %edx			# error code 0
 	movl	%esp, %eax			# pt_regs pointer
+
+	/* Are we currently on the SYSENTER stack? */
+	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
+	cmpl	$SIZEOF_SYSENTER_stack, %ecx
+	jb	.Ldebug_from_sysenter_stack
+
+	TRACE_IRQS_OFF
 	call	do_debug
 	jmp	ret_from_exception
+
+.Ldebug_from_sysenter_stack:
+	/* We're on the SYSENTER stack.  Switch off. */
+	movl	%esp, %ebp
+	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
+	TRACE_IRQS_OFF
+	call	do_debug
+	movl	%ebp, %esp
+	jmp	ret_from_exception
 END(debug)
 
 /*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got  an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
+ * NMI is doubly nasty.  It can happen on the first instruction of
+ * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
+ * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
+ * switched stacks.  We handle both conditions by simply checking whether we
+ * interrupted kernel code running on the SYSENTER stack.
  */
 ENTRY(nmi)
 	ASM_CLAC
@@ -993,41 +1087,32 @@
 	popl	%eax
 	je	nmi_espfix_stack
 #endif
-	cmpl	$entry_SYSENTER_32, (%esp)
-	je	nmi_stack_fixup
-	pushl	%eax
-	movl	%esp, %eax
-	/*
-	 * Do not access memory above the end of our stack page,
-	 * it might not exist.
-	 */
-	andl	$(THREAD_SIZE-1), %eax
-	cmpl	$(THREAD_SIZE-20), %eax
-	popl	%eax
-	jae	nmi_stack_correct
-	cmpl	$entry_SYSENTER_32, 12(%esp)
-	je	nmi_debug_stack_check
-nmi_stack_correct:
-	pushl	%eax
+
+	pushl	%eax				# pt_regs->orig_ax
 	SAVE_ALL
 	xorl	%edx, %edx			# zero error code
 	movl	%esp, %eax			# pt_regs pointer
+
+	/* Are we currently on the SYSENTER stack? */
+	PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+	subl	%eax, %ecx	/* ecx = (end of SYSENTER_stack) - esp */
+	cmpl	$SIZEOF_SYSENTER_stack, %ecx
+	jb	.Lnmi_from_sysenter_stack
+
+	/* Not on SYSENTER stack. */
 	call	do_nmi
 	jmp	restore_all_notrace
 
-nmi_stack_fixup:
-	FIX_STACK 12, nmi_stack_correct, 1
-	jmp	nmi_stack_correct
-
-nmi_debug_stack_check:
-	cmpw	$__KERNEL_CS, 16(%esp)
-	jne	nmi_stack_correct
-	cmpl	$debug, (%esp)
-	jb	nmi_stack_correct
-	cmpl	$debug_esp_fix_insn, (%esp)
-	ja	nmi_stack_correct
-	FIX_STACK 24, nmi_stack_correct, 1
-	jmp	nmi_stack_correct
+.Lnmi_from_sysenter_stack:
+	/*
+	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
+	 * is using the thread stack right now, so it's safe for us to use it.
+	 */
+	movl	%esp, %ebp
+	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
+	call	do_nmi
+	movl	%ebp, %esp
+	jmp	restore_all_notrace
 
 #ifdef CONFIG_X86_ESPFIX32
 nmi_espfix_stack:

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9d34d3c..858b555 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S

@@ -103,6 +103,16 @@
 /*
  * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
+ * This is the only entry point used for 64-bit system calls.  The
+ * hardware interface is reasonably well designed and the register to
+ * argument mapping Linux uses fits well with the registers that are
+ * available when SYSCALL is used.
+ *
+ * SYSCALL instructions can be found inlined in libc implementations as
+ * well as some other programs and libraries.  There are also a handful
+ * of SYSCALL instructions in the vDSO used, for example, as a
+ * clock_gettimeofday fallback.
+ *
  * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
  * then loads new ss, cs, and rip from previously programmed MSRs.
  * rflags gets masked by a value from another MSR (so CLD and CLAC
@@ -145,17 +155,11 @@
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
+	TRACE_IRQS_OFF
+
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER_DS			/* pt_regs->ss */
 	pushq	PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */
-	/*
-	 * Re-enable interrupts.
-	 * We use 'rsp_scratch' as a scratch space, hence irq-off block above
-	 * must execute atomically in the face of possible interrupt-driven
-	 * task preemption. We must enable interrupts only after we're done
-	 * with using rsp_scratch:
-	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq	%r11				/* pt_regs->flags */
 	pushq	$__USER_CS			/* pt_regs->cs */
 	pushq	%rcx				/* pt_regs->ip */
@@ -171,9 +175,21 @@
 	pushq	%r11				/* pt_regs->r11 */
 	sub	$(6*8), %rsp			/* pt_regs->bp, bx, r12-15 not saved */
 
-	testl	$_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	tracesys
+	/*
+	 * If we need to do entry work or if we guess we'll need to do
+	 * exit work, go straight to the slow path.
+	 */
+	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+	jnz	entry_SYSCALL64_slow_path
+
 entry_SYSCALL_64_fastpath:
+	/*
+	 * Easy case: enable interrupts and issue the syscall.  If the syscall
+	 * needs pt_regs, we'll call a stub that disables interrupts again
+	 * and jumps to the slow path.
+	 */
+	TRACE_IRQS_ON
+	ENABLE_INTERRUPTS(CLBR_NONE)
 #if __SYSCALL_MASK == ~0
 	cmpq	$__NR_syscall_max, %rax
 #else
@@ -182,103 +198,56 @@
 #endif
 	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
 	movq	%r10, %rcx
+
+	/*
+	 * This call instruction is handled specially in stub_ptregs_64.
+	 * It might end up jumping to the slow path.  If it jumps, RAX
+	 * and all argument registers are clobbered.
+	 */
 	call	*sys_call_table(, %rax, 8)
+.Lentry_SYSCALL_64_after_fastpath_call:
+
 	movq	%rax, RAX(%rsp)
 1:
-/*
- * Syscall return path ending with SYSRET (fast path).
- * Has incompletely filled pt_regs.
- */
-	LOCKDEP_SYS_EXIT
+
 	/*
-	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-	 * it is too small to ever cause noticeable irq latency.
+	 * If we get here, then we know that pt_regs is clean for SYSRET64.
+	 * If we see that no exit work is required (which we are required
+	 * to check with IRQs off), then we can go straight to SYSRET64.
 	 */
 	DISABLE_INTERRUPTS(CLBR_NONE)
-
-	/*
-	 * We must check ti flags with interrupts (or at least preemption)
-	 * off because we must *never* return to userspace without
-	 * processing exit work that is enqueued if we're preempted here.
-	 * In particular, returning to userspace with any of the one-shot
-	 * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
-	 * very bad.
-	 */
+	TRACE_IRQS_OFF
 	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	int_ret_from_sys_call_irqs_off	/* Go to the slow path */
+	jnz	1f
 
-	RESTORE_C_REGS_EXCEPT_RCX_R11
+	LOCKDEP_SYS_EXIT
+	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
 	movq	RIP(%rsp), %rcx
 	movq	EFLAGS(%rsp), %r11
+	RESTORE_C_REGS_EXCEPT_RCX_R11
 	movq	RSP(%rsp), %rsp
-	/*
-	 * 64-bit SYSRET restores rip from rcx,
-	 * rflags from r11 (but RF and VM bits are forced to 0),
-	 * cs and ss are loaded from MSRs.
-	 * Restoration of rflags re-enables interrupts.
-	 *
-	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
-	 * descriptor is not reinitialized.  This means that we should
-	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
-	 * exit the kernel, and re-enter using an interrupt vector.  (All
-	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
-	 * from happening by reloading SS in __switch_to.  (Actually
-	 * detecting the failure in 64-bit userspace is tricky but can be
-	 * done.)
-	 */
 	USERGS_SYSRET64
 
-GLOBAL(int_ret_from_sys_call_irqs_off)
+1:
+	/*
+	 * The fast path looked good when we started, but something changed
+	 * along the way and we need to switch to the slow path.  Calling
+	 * raise(3) will trigger this, for example.  IRQs are off.
+	 */
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
-	jmp int_ret_from_sys_call
-
-	/* Do syscall entry tracing */
-tracesys:
-	movq	%rsp, %rdi
-	movl	$AUDIT_ARCH_X86_64, %esi
-	call	syscall_trace_enter_phase1
-	test	%rax, %rax
-	jnz	tracesys_phase2			/* if needed, run the slow path */
-	RESTORE_C_REGS_EXCEPT_RAX		/* else restore clobbered regs */
-	movq	ORIG_RAX(%rsp), %rax
-	jmp	entry_SYSCALL_64_fastpath	/* and return to the fast path */
-
-tracesys_phase2:
-	SAVE_EXTRA_REGS
-	movq	%rsp, %rdi
-	movl	$AUDIT_ARCH_X86_64, %esi
-	movq	%rax, %rdx
-	call	syscall_trace_enter_phase2
-
-	/*
-	 * Reload registers from stack in case ptrace changed them.
-	 * We don't reload %rax because syscall_trace_entry_phase2() returned
-	 * the value it wants us to use in the table lookup.
-	 */
-	RESTORE_C_REGS_EXCEPT_RAX
-	RESTORE_EXTRA_REGS
-#if __SYSCALL_MASK == ~0
-	cmpq	$__NR_syscall_max, %rax
-#else
-	andl	$__SYSCALL_MASK, %eax
-	cmpl	$__NR_syscall_max, %eax
-#endif
-	ja	1f				/* return -ENOSYS (already in pt_regs->ax) */
-	movq	%r10, %rcx			/* fixup for C */
-	call	*sys_call_table(, %rax, 8)
-	movq	%rax, RAX(%rsp)
-1:
-	/* Use IRET because user could have changed pt_regs->foo */
-
-/*
- * Syscall return path ending with IRET.
- * Has correct iret frame.
- */
-GLOBAL(int_ret_from_sys_call)
 	SAVE_EXTRA_REGS
 	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
+	jmp	return_from_SYSCALL_64
+
+entry_SYSCALL64_slow_path:
+	/* IRQs are off. */
+	SAVE_EXTRA_REGS
+	movq	%rsp, %rdi
+	call	do_syscall_64		/* returns with IRQs disabled */
+
+return_from_SYSCALL_64:
 	RESTORE_EXTRA_REGS
 	TRACE_IRQS_IRETQ		/* we're about to change IF */
 
@@ -355,83 +324,45 @@
 	jmp	restore_c_regs_and_iret
 END(entry_SYSCALL_64)
 
-
-	.macro FORK_LIKE func
-ENTRY(stub_\func)
-	SAVE_EXTRA_REGS 8
-	jmp	sys_\func
-END(stub_\func)
-	.endm
-
-	FORK_LIKE  clone
-	FORK_LIKE  fork
-	FORK_LIKE  vfork
-
-ENTRY(stub_execve)
-	call	sys_execve
-return_from_execve:
-	testl	%eax, %eax
-	jz	1f
-	/* exec failed, can use fast SYSRET code path in this case */
-	ret
-1:
-	/* must use IRET code path (pt_regs->cs may have changed) */
-	addq	$8, %rsp
-	ZERO_EXTRA_REGS
-	movq	%rax, RAX(%rsp)
-	jmp	int_ret_from_sys_call
-END(stub_execve)
-/*
- * Remaining execve stubs are only 7 bytes long.
- * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
- */
-	.align	8
-GLOBAL(stub_execveat)
-	call	sys_execveat
-	jmp	return_from_execve
-END(stub_execveat)
-
-#if defined(CONFIG_X86_X32_ABI)
-	.align	8
-GLOBAL(stub_x32_execve)
-	call	compat_sys_execve
-	jmp	return_from_execve
-END(stub_x32_execve)
-	.align	8
-GLOBAL(stub_x32_execveat)
-	call	compat_sys_execveat
-	jmp	return_from_execve
-END(stub_x32_execveat)
-#endif
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
+ENTRY(stub_ptregs_64)
 	/*
-	 * SAVE_EXTRA_REGS result is not normally needed:
-	 * sigreturn overwrites all pt_regs->GPREGS.
-	 * But sigreturn can fail (!), and there is no easy way to detect that.
-	 * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
-	 * we SAVE_EXTRA_REGS here.
+	 * Syscalls marked as needing ptregs land here.
+	 * If we are on the fast path, we need to save the extra regs,
+	 * which we achieve by trying again on the slow path.  If we are on
+	 * the slow path, the extra regs are already saved.
+	 *
+	 * RAX stores a pointer to the C function implementing the syscall.
+	 * IRQs are on.
 	 */
-	SAVE_EXTRA_REGS 8
-	call	sys_rt_sigreturn
-return_from_stub:
-	addq	$8, %rsp
-	RESTORE_EXTRA_REGS
-	movq	%rax, RAX(%rsp)
-	jmp	int_ret_from_sys_call
-END(stub_rt_sigreturn)
+	cmpq	$.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+	jne	1f
 
-#ifdef CONFIG_X86_X32_ABI
-ENTRY(stub_x32_rt_sigreturn)
-	SAVE_EXTRA_REGS 8
-	call	sys32_x32_rt_sigreturn
-	jmp	return_from_stub
-END(stub_x32_rt_sigreturn)
-#endif
+	/*
+	 * Called from fast path -- disable IRQs again, pop return address
+	 * and jump to slow path
+	 */
+	DISABLE_INTERRUPTS(CLBR_NONE)
+	TRACE_IRQS_OFF
+	popq	%rax
+	jmp	entry_SYSCALL64_slow_path
+
+1:
+	/* Called from C */
+	jmp	*%rax				/* called from C */
+END(stub_ptregs_64)
+
+.macro ptregs_stub func
+ENTRY(ptregs_\func)
+	leaq	\func(%rip), %rax
+	jmp	stub_ptregs_64
+END(ptregs_\func)
+.endm
+
+/* Instantiate ptregs_stub for each ptregs-using syscall */
+#define __SYSCALL_64_QUAL_(sym)
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+#include <asm/syscalls_64.h>
 
 /*
  * A newly forked process directly context switches into this address.
@@ -439,7 +370,6 @@
  * rdi: prev task we switched from
  */
 ENTRY(ret_from_fork)
-
 	LOCK ; btr $TIF_FORK, TI_flags(%r8)
 
 	pushq	$0x0002
@@ -447,28 +377,32 @@
 
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 
-	RESTORE_EXTRA_REGS
-
 	testb	$3, CS(%rsp)			/* from kernel_thread? */
+	jnz	1f
 
 	/*
-	 * By the time we get here, we have no idea whether our pt_regs,
-	 * ti flags, and ti status came from the 64-bit SYSCALL fast path,
-	 * the slow path, or one of the 32-bit compat paths.
-	 * Use IRET code path to return, since it can safely handle
-	 * all of the above.
+	 * We came from kernel_thread.  This code path is quite twisted, and
+	 * someone should clean it up.
+	 *
+	 * copy_thread_tls stashes the function pointer in RBX and the
+	 * parameter to be passed in RBP.  The called function is permitted
+	 * to call do_execve and thereby jump to user mode.
 	 */
-	jnz	int_ret_from_sys_call
-
-	/*
-	 * We came from kernel_thread
-	 * nb: we depend on RESTORE_EXTRA_REGS above
-	 */
-	movq	%rbp, %rdi
-	call	*%rbx
+	movq	RBP(%rsp), %rdi
+	call	*RBX(%rsp)
 	movl	$0, RAX(%rsp)
-	RESTORE_EXTRA_REGS
-	jmp	int_ret_from_sys_call
+
+	/*
+	 * Fall through as though we're exiting a syscall.  This makes a
+	 * twisted sort of sense if we just called do_execve.
+	 */
+
+1:
+	movq	%rsp, %rdi
+	call	syscall_return_slowpath	/* returns with IRQs disabled */
+	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
+	SWAPGS
+	jmp	restore_regs_and_iret
 END(ret_from_fork)
 
 /*

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index ff1c6d6..847f2f0 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S

@@ -19,12 +19,21 @@
 	.section .entry.text, "ax"
 
 /*
- * 32-bit SYSENTER instruction entry.
+ * 32-bit SYSENTER entry.
  *
- * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
- * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on Intel CPUs.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO.  In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction.  This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
  * SYSENTER does not save anything on the stack,
- * and does not save old rip (!!!) and rflags.
+ * and does not save old RIP (!!!), RSP, or RFLAGS.
  *
  * Arguments:
  * eax  system call number
@@ -35,10 +44,6 @@
  * edi  arg5
  * ebp  user stack
  * 0(%ebp) arg6
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. We set up a complete hardware stack frame to share code
- * with the int 0x80 path.
  */
 ENTRY(entry_SYSENTER_compat)
 	/* Interrupts are off on entry. */
@@ -66,8 +71,6 @@
 	 */
 	pushfq				/* pt_regs->flags (except IF = 0) */
 	orl	$X86_EFLAGS_IF, (%rsp)	/* Fix saved flags */
-	ASM_CLAC			/* Clear AC after saving FLAGS */
-
 	pushq	$__USER32_CS		/* pt_regs->cs */
 	xorq    %r8,%r8
 	pushq	%r8			/* pt_regs->ip = 0 (placeholder) */
@@ -90,19 +93,25 @@
 	cld
 
 	/*
-	 * Sysenter doesn't filter flags, so we need to clear NT
+	 * SYSENTER doesn't filter flags, so we need to clear NT and AC
 	 * ourselves.  To save a few cycles, we can check whether
-	 * NT was set instead of doing an unconditional popfq.
+	 * either was set instead of doing an unconditional popfq.
 	 * This needs to happen before enabling interrupts so that
 	 * we don't get preempted with NT set.
 	 *
+	 * If TF is set, we will single-step all the way to here -- do_debug
+	 * will ignore all the traps.  (Yes, this is slow, but so is
+	 * single-stepping in general.  This allows us to avoid having
+	 * a more complicated code to handle the case where a user program
+	 * forces us to single-step through the SYSENTER entry code.)
+	 *
 	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
 	 * out-of-line as an optimization: NT is unlikely to be set in the
 	 * majority of the cases and instead of polluting the I$ unnecessarily,
 	 * we're keeping that code behind a branch which will predict as
 	 * not-taken and therefore its instructions won't be fetched.
 	 */
-	testl	$X86_EFLAGS_NT, EFLAGS(%rsp)
+	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
 	jnz	.Lsysenter_fix_flags
 .Lsysenter_flags_fixed:
 
@@ -123,20 +132,42 @@
 	pushq	$X86_EFLAGS_FIXED
 	popfq
 	jmp	.Lsysenter_flags_fixed
+GLOBAL(__end_entry_SYSENTER_compat)
 ENDPROC(entry_SYSENTER_compat)
 
 /*
- * 32-bit SYSCALL instruction entry.
+ * 32-bit SYSCALL entry.
  *
- * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
- * then loads new ss, cs, and rip from previously programmed MSRs.
- * rflags gets masked by a value from another MSR (so CLD and CLAC
- * are not needed). SYSCALL does not save anything on the stack
- * and does not change rsp.
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on AMD CPUs.
  *
- * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * The SYSCALL instruction, in principle, should *only* occur in the
+ * vDSO.  In practice, it appears that this really is the case.
+ * As evidence:
+ *
+ *  - The calling convention for SYSCALL has changed several times without
+ *    anyone noticing.
+ *
+ *  - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
+ *    user task that did SYSCALL without immediately reloading SS
+ *    would randomly crash.
+ *
+ *  - Most programmers do not directly target AMD CPUs, and the 32-bit
+ *    SYSCALL instruction does not exist on Intel CPUs.  Even on AMD
+ *    CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
+ *    because the SYSCALL instruction in legacy/native 32-bit mode (as
+ *    opposed to compat mode) is sufficiently poorly designed as to be
+ *    essentially unusable.
+ *
+ * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
+ * RFLAGS to R11, then loads new SS, CS, and RIP from previously
+ * programmed MSRs.  RFLAGS gets masked by a value from another MSR
+ * (so CLD and CLAC are not needed).  SYSCALL does not save anything on
+ * the stack and does not change RSP.
+ *
+ * Note: RFLAGS saving+masking-with-MSR happens only in Long mode
  * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
- * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
  * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
  * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
  *
@@ -236,7 +267,21 @@
 END(entry_SYSCALL_compat)
 
 /*
- * Emulated IA32 system calls via int 0x80.
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction.  INT $0x80 lands here.
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls.  Instances of INT $0x80 can be found inline in
+ * various programs and libraries.  It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method.  Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path.  It is not used by most libc
+ * implementations on modern hardware except during process startup.
  *
  * Arguments:
  * eax  system call number
@@ -245,22 +290,14 @@
  * edx  arg3
  * esi  arg4
  * edi  arg5
- * ebp  arg6	(note: not saved in the stack frame, should not be touched)
- *
- * Notes:
- * Uses the same stack frame as the x86-64 version.
- * All registers except eax must be saved (but ptrace may violate that).
- * Arguments are zero extended. For system calls that want sign extension and
- * take long arguments a wrapper is needed. Most calls can just be called
- * directly.
- * Assumes it is only called from user space and entered with interrupts off.
+ * ebp  arg6
  */
-
 ENTRY(entry_INT80_compat)
 	/*
 	 * Interrupts are off on entry.
 	 */
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
+	ASM_CLAC			/* Do this early to minimize exposure */
 	SWAPGS
 
 	/*
@@ -299,7 +336,7 @@
 	TRACE_IRQS_OFF
 
 	movq	%rsp, %rdi
-	call	do_syscall_32_irqs_off
+	call	do_int80_syscall_32
 .Lsyscall_32_done:
 
 	/* Go back to user mode. */

diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 9a66498..8f895ee 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c

@@ -6,17 +6,11 @@
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-#ifdef CONFIG_IA32_EMULATION
-#define SYM(sym, compat) compat
-#else
-#define SYM(sym, compat) sym
-#endif
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_32.h>
 #undef __SYSCALL_I386
 
-#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
+#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 

diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 41283d2..9dbc5ab 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c

@@ -6,19 +6,14 @@
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
+#define __SYSCALL_64_QUAL_(sym) sym
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
 
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
 
-#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
 
 extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index dc1040a..2e5b565 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl

@@ -21,7 +21,7 @@
 12	common	brk			sys_brk
 13	64	rt_sigaction		sys_rt_sigaction
 14	common	rt_sigprocmask		sys_rt_sigprocmask
-15	64	rt_sigreturn		stub_rt_sigreturn
+15	64	rt_sigreturn		sys_rt_sigreturn/ptregs
 16	64	ioctl			sys_ioctl
 17	common	pread64			sys_pread64
 18	common	pwrite64		sys_pwrite64
@@ -62,10 +62,10 @@
 53	common	socketpair		sys_socketpair
 54	64	setsockopt		sys_setsockopt
 55	64	getsockopt		sys_getsockopt
-56	common	clone			stub_clone
-57	common	fork			stub_fork
-58	common	vfork			stub_vfork
-59	64	execve			stub_execve
+56	common	clone			sys_clone/ptregs
+57	common	fork			sys_fork/ptregs
+58	common	vfork			sys_vfork/ptregs
+59	64	execve			sys_execve/ptregs
 60	common	exit			sys_exit
 61	common	wait4			sys_wait4
 62	common	kill			sys_kill
@@ -178,7 +178,7 @@
 169	common	reboot			sys_reboot
 170	common	sethostname		sys_sethostname
 171	common	setdomainname		sys_setdomainname
-172	common	iopl			sys_iopl
+172	common	iopl			sys_iopl/ptregs
 173	common	ioperm			sys_ioperm
 174	64	create_module
 175	common	init_module		sys_init_module
@@ -328,7 +328,7 @@
 319	common	memfd_create		sys_memfd_create
 320	common	kexec_file_load		sys_kexec_file_load
 321	common	bpf			sys_bpf
-322	64	execveat		stub_execveat
+322	64	execveat		sys_execveat/ptregs
 323	common	userfaultfd		sys_userfaultfd
 324	common	membarrier		sys_membarrier
 325	common	mlock2			sys_mlock2
@@ -339,14 +339,14 @@
 # for native 64-bit operation.
 #
 512	x32	rt_sigaction		compat_sys_rt_sigaction
-513	x32	rt_sigreturn		stub_x32_rt_sigreturn
+513	x32	rt_sigreturn		sys32_x32_rt_sigreturn
 514	x32	ioctl			compat_sys_ioctl
 515	x32	readv			compat_sys_readv
 516	x32	writev			compat_sys_writev
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
-520	x32	execve			stub_x32_execve
+520	x32	execve			compat_sys_execve/ptregs
 521	x32	ptrace			compat_sys_ptrace
 522	x32	rt_sigpending		compat_sys_rt_sigpending
 523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
@@ -371,4 +371,4 @@
 542	x32	getsockopt		compat_sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
-545	x32	execveat		stub_x32_execveat
+545	x32	execveat		compat_sys_execveat/ptregs

diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 0e7f8ec..cd3d301 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh

@@ -3,13 +3,63 @@
 in="$1"
 out="$2"
 
+syscall_macro() {
+    abi="$1"
+    nr="$2"
+    entry="$3"
+
+    # Entry can be either just a function name or "function/qualifier"
+    real_entry="${entry%%/*}"
+    qualifier="${entry:${#real_entry}}"		# Strip the function name
+    qualifier="${qualifier:1}"			# Strip the slash, if any
+
+    echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
+}
+
+emit() {
+    abi="$1"
+    nr="$2"
+    entry="$3"
+    compat="$4"
+
+    if [ "$abi" == "64" -a -n "$compat" ]; then
+	echo "a compat entry for a 64-bit syscall makes no sense" >&2
+	exit 1
+    fi
+
+    if [ -z "$compat" ]; then
+	if [ -n "$entry" ]; then
+	    syscall_macro "$abi" "$nr" "$entry"
+	fi
+    else
+	echo "#ifdef CONFIG_X86_32"
+	if [ -n "$entry" ]; then
+	    syscall_macro "$abi" "$nr" "$entry"
+	fi
+	echo "#else"
+	syscall_macro "$abi" "$nr" "$compat"
+	echo "#endif"
+    fi
+}
+
 grep '^[0-9]' "$in" | sort -n | (
     while read nr abi name entry compat; do
 	abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
-	if [ -n "$compat" ]; then
-	    echo "__SYSCALL_${abi}($nr, $entry, $compat)"
-	elif [ -n "$entry" ]; then
-	    echo "__SYSCALL_${abi}($nr, $entry, $entry)"
+	if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
+	    # COMMON is the same as 64, except that we don't expect X32
+	    # programs to use it.  Our expectation has nothing to do with
+	    # any generated code, so treat them the same.
+	    emit 64 "$nr" "$entry" "$compat"
+	elif [ "$abi" == "X32" ]; then
+	    # X32 is equivalent to 64 on an X32-compatible kernel.
+	    echo "#ifdef CONFIG_X86_X32_ABI"
+	    emit 64 "$nr" "$entry" "$compat"
+	    echo "#endif"
+	elif [ "$abi" == "I386" ]; then
+	    emit "$abi" "$nr" "$entry" "$compat"
+	else
+	    echo "Unknown abi $abi" >&2
+	    exit 1
 	fi
     done
 ) > "$out"

diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 0224987..63a03bb 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h

@@ -140,7 +140,7 @@
 	fprintf(outfile, "#include <asm/vdso.h>\n");
 	fprintf(outfile, "\n");
 	fprintf(outfile,
-		"static unsigned char raw_data[%lu] __page_aligned_data = {",
+		"static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
 		mapping_size);
 	for (j = 0; j < stripped_len; j++) {
 		if (j % 10 == 0)
@@ -150,16 +150,9 @@
 	}
 	fprintf(outfile, "\n};\n\n");
 
-	fprintf(outfile, "static struct page *pages[%lu];\n\n",
-		mapping_size / 4096);
-
 	fprintf(outfile, "const struct vdso_image %s = {\n", name);
 	fprintf(outfile, "\t.data = raw_data,\n");
 	fprintf(outfile, "\t.size = %lu,\n", mapping_size);
-	fprintf(outfile, "\t.text_mapping = {\n");
-	fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
-	fprintf(outfile, "\t\t.pages = pages,\n");
-	fprintf(outfile, "\t},\n");
 	if (alt_sec) {
 		fprintf(outfile, "\t.alt = %lu,\n",
 			(unsigned long)GET_LE(&alt_sec->sh_offset));

diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 08a317a..7853b53 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c

@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/mm_types.h>
 
-#include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/vdso.h>
 

diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 3a1d929..0109ac6 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S

@@ -3,7 +3,7 @@
 */
 
 #include <asm/dwarf2.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index b8f69e2..10f7045 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c

@@ -20,6 +20,7 @@
 #include <asm/page.h>
 #include <asm/hpet.h>
 #include <asm/desc.h>
+#include <asm/cpufeature.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
@@ -27,13 +28,7 @@
 
 void __init init_vdso_image(const struct vdso_image *image)
 {
-	int i;
-	int npages = (image->size) / PAGE_SIZE;
-
 	BUG_ON(image->size % PAGE_SIZE != 0);
-	for (i = 0; i < npages; i++)
-		image->text_mapping.pages[i] =
-			virt_to_page(image->data + i*PAGE_SIZE);
 
 	apply_alternatives((struct alt_instr *)(image->data + image->alt),
 			   (struct alt_instr *)(image->data + image->alt +
@@ -90,18 +85,87 @@
 #endif
 }
 
+static int vdso_fault(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+
+	if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_special_mapping text_mapping = {
+	.name = "[vdso]",
+	.fault = vdso_fault,
+};
+
+static int vvar_fault(const struct vm_special_mapping *sm,
+		      struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+	long sym_offset;
+	int ret = -EFAULT;
+
+	if (!image)
+		return VM_FAULT_SIGBUS;
+
+	sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
+		image->sym_vvar_start;
+
+	/*
+	 * Sanity check: a symbol offset of zero means that the page
+	 * does not exist for this vdso image, not that the page is at
+	 * offset zero relative to the text mapping.  This should be
+	 * impossible here, because sym_offset should only be zero for
+	 * the page past the end of the vvar mapping.
+	 */
+	if (sym_offset == 0)
+		return VM_FAULT_SIGBUS;
+
+	if (sym_offset == image->sym_vvar_page) {
+		ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+	} else if (sym_offset == image->sym_hpet_page) {
+#ifdef CONFIG_HPET_TIMER
+		if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
+			ret = vm_insert_pfn_prot(
+				vma,
+				(unsigned long)vmf->virtual_address,
+				hpet_address >> PAGE_SHIFT,
+				pgprot_noncached(PAGE_READONLY));
+		}
+#endif
+	} else if (sym_offset == image->sym_pvclock_page) {
+		struct pvclock_vsyscall_time_info *pvti =
+			pvclock_pvti_cpu0_va();
+		if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
+			ret = vm_insert_pfn(
+				vma,
+				(unsigned long)vmf->virtual_address,
+				__pa(pvti) >> PAGE_SHIFT);
+		}
+	}
+
+	if (ret == 0 || ret == -EBUSY)
+		return VM_FAULT_NOPAGE;
+
+	return VM_FAULT_SIGBUS;
+}
+
 static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
-	static struct page *no_pages[] = {NULL};
-	static struct vm_special_mapping vvar_mapping = {
+	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
-		.pages = no_pages,
+		.fault = vvar_fault,
 	};
-	struct pvclock_vsyscall_time_info *pvti;
 
 	if (calculate_addr) {
 		addr = vdso_addr(current->mm->start_stack,
@@ -121,6 +185,7 @@
 
 	text_start = addr - image->sym_vvar_start;
 	current->mm->context.vdso = (void __user *)text_start;
+	current->mm->context.vdso_image = image;
 
 	/*
 	 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -130,7 +195,7 @@
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &image->text_mapping);
+				       &text_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
@@ -140,7 +205,8 @@
 	vma = _install_special_mapping(mm,
 				       addr,
 				       -image->sym_vvar_start,
-				       VM_READ|VM_MAYREAD,
+				       VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+				       VM_PFNMAP,
 				       &vvar_mapping);
 
 	if (IS_ERR(vma)) {
@@ -148,41 +214,6 @@
 		goto up_fail;
 	}
 
-	if (image->sym_vvar_page)
-		ret = remap_pfn_range(vma,
-				      text_start + image->sym_vvar_page,
-				      __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
-				      PAGE_SIZE,
-				      PAGE_READONLY);
-
-	if (ret)
-		goto up_fail;
-
-#ifdef CONFIG_HPET_TIMER
-	if (hpet_address && image->sym_hpet_page) {
-		ret = io_remap_pfn_range(vma,
-			text_start + image->sym_hpet_page,
-			hpet_address >> PAGE_SHIFT,
-			PAGE_SIZE,
-			pgprot_noncached(PAGE_READONLY));
-
-		if (ret)
-			goto up_fail;
-	}
-#endif
-
-	pvti = pvclock_pvti_cpu0_va();
-	if (pvti && image->sym_pvclock_page) {
-		ret = remap_pfn_range(vma,
-				      text_start + image->sym_pvclock_page,
-				      __pa(pvti) >> PAGE_SHIFT,
-				      PAGE_SIZE,
-				      PAGE_READONLY);
-
-		if (ret)
-			goto up_fail;
-	}
-
 up_fail:
 	if (ret)
 		current->mm->context.vdso = NULL;
@@ -254,7 +285,7 @@
 #ifdef CONFIG_NUMA
 	node = cpu_to_node(cpu);
 #endif
-	if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
 		write_rdtscp_aux((node << 12) | cpu);
 
 	/*

diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 51e3304..0fb3a10 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c

@@ -16,6 +16,8 @@
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
 
+int vclocks_used __read_mostly;
+
 DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
 
 void update_vsyscall_tz(void)
@@ -26,12 +28,17 @@
 
 void update_vsyscall(struct timekeeper *tk)
 {
+	int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
 	struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
 
+	/* Mark the new vclock used. */
+	BUILD_BUG_ON(VCLOCK_MAX >= 32);
+	WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
 	gtod_write_begin(vdata);
 
 	/* copy vsyscall data */
-	vdata->vclock_mode	= tk->tkr_mono.clock->archdata.vclock_mode;
+	vdata->vclock_mode	= vclock_mode;
 	vdata->cycle_last	= tk->tkr_mono.cycle_last;
 	vdata->mask		= tk->tkr_mono.mask;
 	vdata->mult		= tk->tkr_mono.mult;

diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
new file mode 100644
index 0000000..fdfea15
--- /dev/null
+++ b/arch/x86/events/Makefile

@@ -0,0 +1,13 @@
+obj-y			+= core.o
+
+obj-$(CONFIG_CPU_SUP_AMD)               += amd/core.o amd/uncore.o
+obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
+endif
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/core.o intel/bts.o intel/cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/cstate.o intel/ds.o intel/knc.o 
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/rapl.o msr.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel/uncore.o intel/uncore_nhmex.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel/uncore_snb.o intel/uncore_snbep.o

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/events/amd/core.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_amd.c
rename to arch/x86/events/amd/core.c
index 5861053..049ada8d 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/events/amd/core.c

@@ -5,7 +5,7 @@
 #include <linux/slab.h>
 #include <asm/apicdef.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 static __initconst const u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/events/amd/ibs.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event_amd_ibs.c
rename to arch/x86/events/amd/ibs.c
index 989d3c2..51087c2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/events/amd/ibs.c

@@ -14,7 +14,7 @@
 
 #include <asm/apic.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 static u32 ibs_caps;
 
@@ -670,7 +670,7 @@
 	perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
 
 	register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
-	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+	pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 
 	return 0;
 }
@@ -774,14 +774,14 @@
 		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
 		if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
 			pci_dev_put(cpu_cfg);
-			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
-			       "IBSCTL = 0x%08x\n", value);
+			pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
+				 value);
 			return -EINVAL;
 		}
 	} while (1);
 
 	if (!nodes) {
-		printk(KERN_DEBUG "No CPU node configured for IBS\n");
+		pr_debug("No CPU node configured for IBS\n");
 		return -ENODEV;
 	}
 
@@ -810,7 +810,7 @@
 	preempt_enable();
 
 	if (offset == APIC_EILVT_NR_MAX) {
-		printk(KERN_DEBUG "No EILVT entry available\n");
+		pr_debug("No EILVT entry available\n");
 		return;
 	}
 

diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/events/amd/iommu.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_amd_iommu.c
rename to arch/x86/events/amd/iommu.c
index 97242a9..635e5eb 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
+++ b/arch/x86/events/amd/iommu.c

@@ -16,8 +16,8 @@
 #include <linux/cpumask.h>
 #include <linux/slab.h>
 
-#include "perf_event.h"
-#include "perf_event_amd_iommu.h"
+#include "../perf_event.h"
+#include "iommu.h"
 
 #define COUNTER_SHIFT		16
 

diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/events/amd/iommu.h
similarity index 100%
rename from arch/x86/kernel/cpu/perf_event_amd_iommu.h
rename to arch/x86/events/amd/iommu.h


diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/events/amd/uncore.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_amd_uncore.c
rename to arch/x86/events/amd/uncore.c
index 8836fc9..3db9569 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/events/amd/uncore.c

@@ -538,7 +538,7 @@
 		if (ret)
 			goto fail_nb;
 
-		printk(KERN_INFO "perf: AMD NB counters detected\n");
+		pr_info("perf: AMD NB counters detected\n");
 		ret = 0;
 	}
 
@@ -552,7 +552,7 @@
 		if (ret)
 			goto fail_l2;
 
-		printk(KERN_INFO "perf: AMD L2I counters detected\n");
+		pr_info("perf: AMD L2I counters detected\n");
 		ret = 0;
 	}
 

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/events/core.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event.c
rename to arch/x86/events/core.c
index 1b443db..5e830d0 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/events/core.c

@@ -254,15 +254,16 @@
 	 * We still allow the PMU driver to operate:
 	 */
 	if (bios_fail) {
-		printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
-		printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
+		pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
+		pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
+			      reg_fail, val_fail);
 	}
 
 	return true;
 
 msr_fail:
-	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
-	printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+	pr_cont("Broken PMU hardware detected, using software events only.\n");
+	pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
 		boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
 		reg, val_new);
 
@@ -596,6 +597,19 @@
 	}
 }
 
+/*
+ * There may be PMI landing after enabled=0. The PMI hitting could be before or
+ * after disable_all.
+ *
+ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
+ * It will not be re-enabled in the NMI handler again, because enabled=0. After
+ * handling the NMI, disable_all will be called, which will not change the
+ * state either. If PMI hits after disable_all, the PMU is already disabled
+ * before entering NMI handler. The NMI handler will not change the state
+ * either.
+ *
+ * So either situation is harmless.
+ */
 static void x86_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/events/intel/bts.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_bts.c
rename to arch/x86/events/intel/bts.c
index 2cad71d..b99dc92 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/events/intel/bts.c

@@ -26,7 +26,7 @@
 #include <asm-generic/sizes.h>
 #include <asm/perf_event.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 struct bts_ctx {
 	struct perf_output_handle	handle;

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/events/intel/core.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel.c
rename to arch/x86/events/intel/core.c
index fed2ab1..68fa55b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/events/intel/core.c

@@ -18,7 +18,7 @@
 #include <asm/hardirq.h>
 #include <asm/apic.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 /*
  * Intel PerfMon, used on Core and later.
@@ -1502,7 +1502,15 @@
 };
 
 /*
- * Use from PMIs where the LBRs are already disabled.
+ * Used from PMIs where the LBRs are already disabled.
+ *
+ * This function could be called consecutively. It is required to remain in
+ * disabled state if called consecutively.
+ *
+ * During consecutive calls, the same disable value will be written to related
+ * registers, so the PMU state remains unchanged. hw.state in
+ * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
+ * calls.
  */
 static void __intel_pmu_disable_all(void)
 {
@@ -1884,6 +1892,16 @@
 	if (__test_and_clear_bit(62, (unsigned long *)&status)) {
 		handled++;
 		x86_pmu.drain_pebs(regs);
+		/*
+		 * There are cases where, even though, the PEBS ovfl bit is set
+		 * in GLOBAL_OVF_STATUS, the PEBS events may also have their
+		 * overflow bits set for their counters. We must clear them
+		 * here because they have been processed as exact samples in
+		 * the drain_pebs() routine. They must not be processed again
+		 * in the for_each_bit_set() loop for regular samples below.
+		 */
+		status &= ~cpuc->pebs_enabled;
+		status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
 	}
 
 	/*
@@ -1929,7 +1947,10 @@
 		goto again;
 
 done:
-	__intel_pmu_enable_all(0, true);
+	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
+	if (cpuc->enabled)
+		__intel_pmu_enable_all(0, true);
+
 	/*
 	 * Only unmask the NMI after the overflow counters
 	 * have been reset. This avoids spurious NMIs on
@@ -3396,6 +3417,7 @@
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
 			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
+		intel_pmu_pebs_data_source_nhm();
 		x86_add_quirk(intel_nehalem_quirk);
 
 		pr_cont("Nehalem events, ");
@@ -3459,6 +3481,7 @@
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
 			X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
 
+		intel_pmu_pebs_data_source_nhm();
 		pr_cont("Westmere events, ");
 		break;
 
@@ -3581,7 +3604,7 @@
 		intel_pmu_lbr_init_hsw();
 
 		x86_pmu.event_constraints = intel_bdw_event_constraints;
-		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+		x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_snbep_extra_regs;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
 		x86_pmu.pebs_prec_dist = true;

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/events/intel/cqm.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event_intel_cqm.c
rename to arch/x86/events/intel/cqm.c
index a316ca9..93cb412 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/events/intel/cqm.c

@@ -7,7 +7,7 @@
 #include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
 
 #define MSR_IA32_PQR_ASSOC	0x0c8f
 #define MSR_IA32_QM_CTR		0x0c8e
@@ -1244,15 +1244,12 @@
 
 static inline void cqm_pick_event_reader(int cpu)
 {
-	int phys_id = topology_physical_package_id(cpu);
-	int i;
+	int reader;
 
-	for_each_cpu(i, &cqm_cpumask) {
-		if (phys_id == topology_physical_package_id(i))
-			return;	/* already got reader for this socket */
-	}
-
-	cpumask_set_cpu(cpu, &cqm_cpumask);
+	/* First online cpu in package becomes the reader */
+	reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
+	if (reader >= nr_cpu_ids)
+		cpumask_set_cpu(cpu, &cqm_cpumask);
 }
 
 static void intel_cqm_cpu_starting(unsigned int cpu)
@@ -1270,24 +1267,17 @@
 
 static void intel_cqm_cpu_exit(unsigned int cpu)
 {
-	int phys_id = topology_physical_package_id(cpu);
-	int i;
+	int target;
 
-	/*
-	 * Is @cpu a designated cqm reader?
-	 */
+	/* Is @cpu the current cqm reader for this package ? */
 	if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
 		return;
 
-	for_each_online_cpu(i) {
-		if (i == cpu)
-			continue;
+	/* Find another online reader in this package */
+	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
 
-		if (phys_id == topology_physical_package_id(i)) {
-			cpumask_set_cpu(i, &cqm_cpumask);
-			break;
-		}
-	}
+	if (target < nr_cpu_ids)
+		cpumask_set_cpu(target, &cqm_cpumask);
 }
 
 static int intel_cqm_cpu_notifier(struct notifier_block *nb,

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/events/intel/cstate.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_cstate.c
rename to arch/x86/events/intel/cstate.c
index 75a38b5..7946c42 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c
+++ b/arch/x86/events/intel/cstate.c

@@ -89,7 +89,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
 
 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
 static ssize_t __cstate_##_var##_show(struct kobject *kobj,	\

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/events/intel/ds.c
similarity index 93%
rename from arch/x86/kernel/cpu/perf_event_intel_ds.c
rename to arch/x86/events/intel/ds.c
index 10602f0..ce7211a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/events/intel/ds.c

@@ -5,7 +5,7 @@
 #include <asm/perf_event.h>
 #include <asm/insn.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE		24
@@ -51,7 +51,8 @@
 #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
 
-static const u64 pebs_data_source[] = {
+/* Version for Sandy Bridge and later */
+static u64 pebs_data_source[] = {
 	P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
 	OP_LH | P(LVL, L1)  | P(SNOOP, NONE),	/* 0x01: L1 local */
 	OP_LH | P(LVL, LFB) | P(SNOOP, NONE),	/* 0x02: LFB hit */
@@ -70,6 +71,14 @@
 	OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
 };
 
+/* Patch up minor differences in the bits */
+void __init intel_pmu_pebs_data_source_nhm(void)
+{
+	pebs_data_source[0x05] = OP_LH | P(LVL, L3)  | P(SNOOP, HIT);
+	pebs_data_source[0x06] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+	pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+}
+
 static u64 precise_store_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
@@ -269,7 +278,7 @@
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
+	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -286,7 +295,7 @@
 		per_cpu(insn_buffer, cpu) = ibuffer;
 	}
 
-	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+	max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
 
 	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
 	ds->pebs_index = ds->pebs_buffer_base;
@@ -722,6 +731,30 @@
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_bdw_pebs_event_constraints[] = {
+	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
+	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+	INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+	/* Allow all events as PEBS with no flags */
+	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+	EVENT_CONSTRAINT_END
+};
+
+
 struct event_constraint intel_skl_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
 	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
@@ -1319,19 +1352,28 @@
 
 	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
 	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
 	if (x86_pmu.pebs) {
 		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
 		int format = x86_pmu.intel_cap.pebs_format;
 
 		switch (format) {
 		case 0:
-			printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
+			pr_cont("PEBS fmt0%c, ", pebs_type);
 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+			/*
+			 * Using >PAGE_SIZE buffers makes the WRMSR to
+			 * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
+			 * mysteriously hang on Core2.
+			 *
+			 * As a workaround, we don't do this.
+			 */
+			x86_pmu.pebs_buffer_size = PAGE_SIZE;
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
 			break;
 
 		case 1:
-			printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
+			pr_cont("PEBS fmt1%c, ", pebs_type);
 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
 			break;
@@ -1351,7 +1393,7 @@
 			break;
 
 		default:
-			printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
+			pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
 			x86_pmu.pebs = 0;
 		}
 	}

diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/events/intel/knc.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event_knc.c
rename to arch/x86/events/intel/knc.c
index 5b0c232..548d5f7 100644
--- a/arch/x86/kernel/cpu/perf_event_knc.c
+++ b/arch/x86/events/intel/knc.c

@@ -5,7 +5,7 @@
 
 #include <asm/hardirq.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 static const u64 knc_perfmon_event_map[] =
 {
@@ -263,7 +263,9 @@
 		goto again;
 
 done:
-	knc_pmu_enable_all(0);
+	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
+	if (cpuc->enabled)
+		knc_pmu_enable_all(0);
 
 	return handled;
 }

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/events/intel/lbr.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_lbr.c
rename to arch/x86/events/intel/lbr.c
index 653f88d..69dd118 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/events/intel/lbr.c

@@ -5,7 +5,7 @@
 #include <asm/msr.h>
 #include <asm/insn.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 enum {
 	LBR_FORMAT_32		= 0x00,

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/events/intel/p4.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_p4.c
rename to arch/x86/events/intel/p4.c
index f2e5678..0a5ede1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/events/intel/p4.c

@@ -13,7 +13,7 @@
 #include <asm/hardirq.h>
 #include <asm/apic.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 #define P4_CNTR_LIMIT 3
 /*

diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/events/intel/p6.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_p6.c
rename to arch/x86/events/intel/p6.c
index 7c1a0c0..1f5c47a 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/events/intel/p6.c

@@ -1,7 +1,7 @@
 #include <linux/perf_event.h>
 #include <linux/types.h>
 
-#include "perf_event.h"
+#include "../perf_event.h"
 
 /*
  * Not sure about some of these

diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/events/intel/pt.c
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event_intel_pt.c
rename to arch/x86/events/intel/pt.c
index c0bbd10..6af7cf7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/events/intel/pt.c

@@ -29,8 +29,8 @@
 #include <asm/io.h>
 #include <asm/intel_pt.h>
 
-#include "perf_event.h"
-#include "intel_pt.h"
+#include "../perf_event.h"
+#include "pt.h"
 
 static DEFINE_PER_CPU(struct pt, pt_ctx);
 

diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/events/intel/pt.h
similarity index 100%
rename from arch/x86/kernel/cpu/intel_pt.h
rename to arch/x86/events/intel/pt.h


diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/events/intel/rapl.c
similarity index 73%
rename from arch/x86/kernel/cpu/perf_event_intel_rapl.c
rename to arch/x86/events/intel/rapl.c
index 24a351a..b834a3f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/events/intel/rapl.c

@@ -44,11 +44,14 @@
  * the duration of the measurement. Tools may use a function such as
  * ldexp(raw_count, -32);
  */
+
+#define pr_fmt(fmt) "RAPL PMU: " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
-#include "perf_event.h"
+#include "../perf_event.h"
 
 /*
  * RAPL energy status counters
@@ -107,7 +110,7 @@
 static struct kobj_attribute format_attr_##_var =		\
 	__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
 
-#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
+#define RAPL_CNTR_WIDTH 32
 
 #define RAPL_EVENT_ATTR_STR(_name, v, str)					\
 static struct perf_pmu_events_attr event_attr_##v = {				\
@@ -117,23 +120,33 @@
 };
 
 struct rapl_pmu {
-	spinlock_t	 lock;
-	int		 n_active; /* number of active events */
-	struct list_head active_list;
-	struct pmu	 *pmu; /* pointer to rapl_pmu_class */
-	ktime_t		 timer_interval; /* in ktime_t unit */
-	struct hrtimer   hrtimer;
+	raw_spinlock_t		lock;
+	int			n_active;
+	int			cpu;
+	struct list_head	active_list;
+	struct pmu		*pmu;
+	ktime_t			timer_interval;
+	struct hrtimer		hrtimer;
 };
 
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /* 1/2^hw_unit Joule */
-static struct pmu rapl_pmu_class;
+struct rapl_pmus {
+	struct pmu		pmu;
+	unsigned int		maxpkg;
+	struct rapl_pmu		*pmus[];
+};
+
+ /* 1/2^hw_unit Joule */
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+static struct rapl_pmus *rapl_pmus;
 static cpumask_t rapl_cpu_mask;
-static int rapl_cntr_mask;
+static unsigned int rapl_cntr_mask;
+static u64 rapl_timer_ms;
 
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
+static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+{
+	return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+}
 
-static struct x86_pmu_quirk *rapl_quirks;
 static inline u64 rapl_read_counter(struct perf_event *event)
 {
 	u64 raw;
@@ -141,19 +154,10 @@
 	return raw;
 }
 
-#define rapl_add_quirk(func_)						\
-do {									\
-	static struct x86_pmu_quirk __quirk __initdata = {		\
-		.func = func_,						\
-	};								\
-	__quirk.next = rapl_quirks;					\
-	rapl_quirks = &__quirk;						\
-} while (0)
-
 static inline u64 rapl_scale(u64 v, int cfg)
 {
 	if (cfg > NR_RAPL_DOMAINS) {
-		pr_warn("invalid domain %d, failed to scale data\n", cfg);
+		pr_warn("Invalid domain %d, failed to scale data\n", cfg);
 		return v;
 	}
 	/*
@@ -206,27 +210,21 @@
 		     HRTIMER_MODE_REL_PINNED);
 }
 
-static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
-{
-	hrtimer_cancel(&pmu->hrtimer);
-}
-
 static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
 {
-	struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+	struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
 	struct perf_event *event;
 	unsigned long flags;
 
 	if (!pmu->n_active)
 		return HRTIMER_NORESTART;
 
-	spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&pmu->lock, flags);
 
-	list_for_each_entry(event, &pmu->active_list, active_entry) {
+	list_for_each_entry(event, &pmu->active_list, active_entry)
 		rapl_event_update(event);
-	}
 
-	spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&pmu->lock, flags);
 
 	hrtimer_forward_now(hrtimer, pmu->timer_interval);
 
@@ -260,28 +258,28 @@
 
 static void rapl_pmu_event_start(struct perf_event *event, int mode)
 {
-	struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+	struct rapl_pmu *pmu = event->pmu_private;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&pmu->lock, flags);
 	__rapl_pmu_event_start(pmu, event);
-	spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&pmu->lock, flags);
 }
 
 static void rapl_pmu_event_stop(struct perf_event *event, int mode)
 {
-	struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+	struct rapl_pmu *pmu = event->pmu_private;
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&pmu->lock, flags);
 
 	/* mark event as deactivated and stopped */
 	if (!(hwc->state & PERF_HES_STOPPED)) {
 		WARN_ON_ONCE(pmu->n_active <= 0);
 		pmu->n_active--;
 		if (pmu->n_active == 0)
-			rapl_stop_hrtimer(pmu);
+			hrtimer_cancel(&pmu->hrtimer);
 
 		list_del(&event->active_entry);
 
@@ -299,23 +297,23 @@
 		hwc->state |= PERF_HES_UPTODATE;
 	}
 
-	spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&pmu->lock, flags);
 }
 
 static int rapl_pmu_event_add(struct perf_event *event, int mode)
 {
-	struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
+	struct rapl_pmu *pmu = event->pmu_private;
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pmu->lock, flags);
+	raw_spin_lock_irqsave(&pmu->lock, flags);
 
 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 
 	if (mode & PERF_EF_START)
 		__rapl_pmu_event_start(pmu, event);
 
-	spin_unlock_irqrestore(&pmu->lock, flags);
+	raw_spin_unlock_irqrestore(&pmu->lock, flags);
 
 	return 0;
 }
@@ -329,15 +327,19 @@
 {
 	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
 	int bit, msr, ret = 0;
+	struct rapl_pmu *pmu;
 
 	/* only look at RAPL events */
-	if (event->attr.type != rapl_pmu_class.type)
+	if (event->attr.type != rapl_pmus->pmu.type)
 		return -ENOENT;
 
 	/* check only supported bits are set */
 	if (event->attr.config & ~RAPL_EVENT_MASK)
 		return -EINVAL;
 
+	if (event->cpu < 0)
+		return -EINVAL;
+
 	/*
 	 * check event is known (determines counter)
 	 */
@@ -376,6 +378,9 @@
 		return -EINVAL;
 
 	/* must be done before validate_group */
+	pmu = cpu_to_rapl_pmu(event->cpu);
+	event->cpu = pmu->cpu;
+	event->pmu_private = pmu;
 	event->hw.event_base = msr;
 	event->hw.config = cfg;
 	event->hw.idx = bit;
@@ -506,139 +511,62 @@
 	NULL,
 };
 
-static struct pmu rapl_pmu_class = {
-	.attr_groups	= rapl_attr_groups,
-	.task_ctx_nr	= perf_invalid_context, /* system-wide only */
-	.event_init	= rapl_pmu_event_init,
-	.add		= rapl_pmu_event_add, /* must have */
-	.del		= rapl_pmu_event_del, /* must have */
-	.start		= rapl_pmu_event_start,
-	.stop		= rapl_pmu_event_stop,
-	.read		= rapl_pmu_event_read,
-};
-
 static void rapl_cpu_exit(int cpu)
 {
-	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-	int i, phys_id = topology_physical_package_id(cpu);
-	int target = -1;
+	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+	int target;
 
-	/* find a new cpu on same package */
-	for_each_online_cpu(i) {
-		if (i == cpu)
-			continue;
-		if (phys_id == topology_physical_package_id(i)) {
-			target = i;
-			break;
-		}
-	}
-	/*
-	 * clear cpu from cpumask
-	 * if was set in cpumask and still some cpu on package,
-	 * then move to new cpu
-	 */
-	if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
+	/* Check if exiting cpu is used for collecting rapl events */
+	if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
+		return;
+
+	pmu->cpu = -1;
+	/* Find a new cpu to collect rapl events */
+	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+	/* Migrate rapl events to the new target */
+	if (target < nr_cpu_ids) {
 		cpumask_set_cpu(target, &rapl_cpu_mask);
-
-	WARN_ON(cpumask_empty(&rapl_cpu_mask));
-	/*
-	 * migrate events and context to new cpu
-	 */
-	if (target >= 0)
+		pmu->cpu = target;
 		perf_pmu_migrate_context(pmu->pmu, cpu, target);
-
-	/* cancel overflow polling timer for CPU */
-	rapl_stop_hrtimer(pmu);
+	}
 }
 
 static void rapl_cpu_init(int cpu)
 {
-	int i, phys_id = topology_physical_package_id(cpu);
+	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+	int target;
 
-	/* check if phys_is is already covered */
-	for_each_cpu(i, &rapl_cpu_mask) {
-		if (phys_id == topology_physical_package_id(i))
-			return;
-	}
-	/* was not found, so add it */
-	cpumask_set_cpu(cpu, &rapl_cpu_mask);
-}
-
-static __init void rapl_hsw_server_quirk(void)
-{
 	/*
-	 * DRAM domain on HSW server has fixed energy unit which can be
-	 * different than the unit from power unit MSR.
-	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
-	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+	 * Check if there is an online cpu in the package which collects rapl
+	 * events already.
 	 */
-	rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+	target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+	if (target < nr_cpu_ids)
+		return;
+
+	cpumask_set_cpu(cpu, &rapl_cpu_mask);
+	pmu->cpu = cpu;
 }
 
 static int rapl_cpu_prepare(int cpu)
 {
-	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-	int phys_id = topology_physical_package_id(cpu);
-	u64 ms;
+	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
 
 	if (pmu)
 		return 0;
 
-	if (phys_id < 0)
-		return -1;
-
 	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
 	if (!pmu)
-		return -1;
-	spin_lock_init(&pmu->lock);
+		return -ENOMEM;
 
+	raw_spin_lock_init(&pmu->lock);
 	INIT_LIST_HEAD(&pmu->active_list);
-
-	pmu->pmu = &rapl_pmu_class;
-
-	/*
-	 * use reference of 200W for scaling the timeout
-	 * to avoid missing counter overflows.
-	 * 200W = 200 Joules/sec
-	 * divide interval by 2 to avoid lockstep (2 * 100)
-	 * if hw unit is 32, then we use 2 ms 1/200/2
-	 */
-	if (rapl_hw_unit[0] < 32)
-		ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
-	else
-		ms = 2;
-
-	pmu->timer_interval = ms_to_ktime(ms);
-
+	pmu->pmu = &rapl_pmus->pmu;
+	pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+	pmu->cpu = -1;
 	rapl_hrtimer_init(pmu);
-
-	/* set RAPL pmu for this cpu for now */
-	per_cpu(rapl_pmu, cpu) = pmu;
-	per_cpu(rapl_pmu_to_free, cpu) = NULL;
-
-	return 0;
-}
-
-static void rapl_cpu_kfree(int cpu)
-{
-	struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
-
-	kfree(pmu);
-
-	per_cpu(rapl_pmu_to_free, cpu) = NULL;
-}
-
-static int rapl_cpu_dying(int cpu)
-{
-	struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-
-	if (!pmu)
-		return 0;
-
-	per_cpu(rapl_pmu, cpu) = NULL;
-
-	per_cpu(rapl_pmu_to_free, cpu) = pmu;
-
+	rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
 	return 0;
 }
 
@@ -651,28 +579,20 @@
 	case CPU_UP_PREPARE:
 		rapl_cpu_prepare(cpu);
 		break;
-	case CPU_STARTING:
+
+	case CPU_DOWN_FAILED:
+	case CPU_ONLINE:
 		rapl_cpu_init(cpu);
 		break;
-	case CPU_UP_CANCELED:
-	case CPU_DYING:
-		rapl_cpu_dying(cpu);
-		break;
-	case CPU_ONLINE:
-	case CPU_DEAD:
-		rapl_cpu_kfree(cpu);
-		break;
+
 	case CPU_DOWN_PREPARE:
 		rapl_cpu_exit(cpu);
 		break;
-	default:
-		break;
 	}
-
 	return NOTIFY_OK;
 }
 
-static int rapl_check_hw_unit(void)
+static int rapl_check_hw_unit(bool apply_quirk)
 {
 	u64 msr_rapl_power_unit_bits;
 	int i;
@@ -683,28 +603,107 @@
 	for (i = 0; i < NR_RAPL_DOMAINS; i++)
 		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
 
+	/*
+	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
+	 * different than the unit from power unit MSR. See
+	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+	 */
+	if (apply_quirk)
+		rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+
+	/*
+	 * Calculate the timer rate:
+	 * Use reference of 200W for scaling the timeout to avoid counter
+	 * overflows. 200W = 200 Joules/sec
+	 * Divide interval by 2 to avoid lockstep (2 * 100)
+	 * if hw unit is 32, then we use 2 ms 1/200/2
+	 */
+	rapl_timer_ms = 2;
+	if (rapl_hw_unit[0] < 32) {
+		rapl_timer_ms = (1000 / (2 * 100));
+		rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+	}
 	return 0;
 }
 
-static const struct x86_cpu_id rapl_cpu_match[] = {
+static void __init rapl_advertise(void)
+{
+	int i;
+
+	pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+		hweight32(rapl_cntr_mask), rapl_timer_ms);
+
+	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+		if (rapl_cntr_mask & (1 << i)) {
+			pr_info("hw unit of domain %s 2^-%d Joules\n",
+				rapl_domain_names[i], rapl_hw_unit[i]);
+		}
+	}
+}
+
+static int __init rapl_prepare_cpus(void)
+{
+	unsigned int cpu, pkg;
+	int ret;
+
+	for_each_online_cpu(cpu) {
+		pkg = topology_logical_package_id(cpu);
+		if (rapl_pmus->pmus[pkg])
+			continue;
+
+		ret = rapl_cpu_prepare(cpu);
+		if (ret)
+			return ret;
+		rapl_cpu_init(cpu);
+	}
+	return 0;
+}
+
+static void __init cleanup_rapl_pmus(void)
+{
+	int i;
+
+	for (i = 0; i < rapl_pmus->maxpkg; i++)
+		kfree(rapl_pmus->pmus + i);
+	kfree(rapl_pmus);
+}
+
+static int __init init_rapl_pmus(void)
+{
+	int maxpkg = topology_max_packages();
+	size_t size;
+
+	size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+	rapl_pmus = kzalloc(size, GFP_KERNEL);
+	if (!rapl_pmus)
+		return -ENOMEM;
+
+	rapl_pmus->maxpkg		= maxpkg;
+	rapl_pmus->pmu.attr_groups	= rapl_attr_groups;
+	rapl_pmus->pmu.task_ctx_nr	= perf_invalid_context;
+	rapl_pmus->pmu.event_init	= rapl_pmu_event_init;
+	rapl_pmus->pmu.add		= rapl_pmu_event_add;
+	rapl_pmus->pmu.del		= rapl_pmu_event_del;
+	rapl_pmus->pmu.start		= rapl_pmu_event_start;
+	rapl_pmus->pmu.stop		= rapl_pmu_event_stop;
+	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+	return 0;
+}
+
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
 	[1] = {},
 };
 
 static int __init rapl_pmu_init(void)
 {
-	struct rapl_pmu *pmu;
-	int cpu, ret;
-	struct x86_pmu_quirk *quirk;
-	int i;
+	bool apply_quirk = false;
+	int ret;
 
-	/*
-	 * check for Intel processor family 6
-	 */
 	if (!x86_match_cpu(rapl_cpu_match))
-		return 0;
+		return -ENODEV;
 
-	/* check supported CPU */
 	switch (boot_cpu_data.x86_model) {
 	case 42: /* Sandy Bridge */
 	case 58: /* Ivy Bridge */
@@ -712,7 +711,7 @@
 		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
 		break;
 	case 63: /* Haswell-Server */
-		rapl_add_quirk(rapl_hsw_server_quirk);
+		apply_quirk = true;
 		rapl_cntr_mask = RAPL_IDX_SRV;
 		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
 		break;
@@ -728,56 +727,41 @@
 		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
 		break;
 	case 87: /* Knights Landing */
-		rapl_add_quirk(rapl_hsw_server_quirk);
+		apply_quirk = true;
 		rapl_cntr_mask = RAPL_IDX_KNL;
 		rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-
+		break;
 	default:
-		/* unsupported */
-		return 0;
+		return -ENODEV;
 	}
-	ret = rapl_check_hw_unit();
+
+	ret = rapl_check_hw_unit(apply_quirk);
 	if (ret)
 		return ret;
 
-	/* run cpu model quirks */
-	for (quirk = rapl_quirks; quirk; quirk = quirk->next)
-		quirk->func();
+	ret = init_rapl_pmus();
+	if (ret)
+		return ret;
+
 	cpu_notifier_register_begin();
 
-	for_each_online_cpu(cpu) {
-		ret = rapl_cpu_prepare(cpu);
-		if (ret)
-			goto out;
-		rapl_cpu_init(cpu);
-	}
+	ret = rapl_prepare_cpus();
+	if (ret)
+		goto out;
+
+	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+	if (ret)
+		goto out;
 
 	__perf_cpu_notifier(rapl_cpu_notifier);
-
-	ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
-	if (WARN_ON(ret)) {
-		pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
-		cpu_notifier_register_done();
-		return -1;
-	}
-
-	pmu = __this_cpu_read(rapl_pmu);
-
-	pr_info("RAPL PMU detected,"
-		" API unit is 2^-32 Joules,"
-		" %d fixed counters"
-		" %llu ms ovfl timer\n",
-		hweight32(rapl_cntr_mask),
-		ktime_to_ms(pmu->timer_interval));
-	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
-		if (rapl_cntr_mask & (1 << i)) {
-			pr_info("hw unit of domain %s 2^-%d Joules\n",
-				rapl_domain_names[i], rapl_hw_unit[i]);
-		}
-	}
-out:
 	cpu_notifier_register_done();
-
+	rapl_advertise();
 	return 0;
+
+out:
+	pr_warn("Initialization failed (%d), disabled\n", ret);
+	cleanup_rapl_pmus();
+	cpu_notifier_register_done();
+	return ret;
 }
 device_initcall(rapl_pmu_init);

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/events/intel/uncore.c
similarity index 74%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore.c
rename to arch/x86/events/intel/uncore.c
index 3bf41d4..7012d18 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/events/intel/uncore.c

@@ -1,4 +1,4 @@
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
 
 static struct intel_uncore_type *empty_uncore[] = { NULL, };
 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
@@ -9,9 +9,9 @@
 /* pci bus to socket mapping */
 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
-struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+struct pci_extra_dev *uncore_extra_pci_dev;
+static int max_packages;
 
-static DEFINE_RAW_SPINLOCK(uncore_box_lock);
 /* mask of cpus that collect uncore events */
 static cpumask_t uncore_cpu_mask;
 
@@ -21,7 +21,7 @@
 struct event_constraint uncore_constraint_empty =
 	EVENT_CONSTRAINT(0, 0, 0);
 
-int uncore_pcibus_to_physid(struct pci_bus *bus)
+static int uncore_pcibus_to_physid(struct pci_bus *bus)
 {
 	struct pci2phy_map *map;
 	int phys_id = -1;
@@ -38,6 +38,16 @@
 	return phys_id;
 }
 
+static void uncore_free_pcibus_map(void)
+{
+	struct pci2phy_map *map, *tmp;
+
+	list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
+		list_del(&map->list);
+		kfree(map);
+	}
+}
+
 struct pci2phy_map *__find_pci2phy_map(int segment)
 {
 	struct pci2phy_map *map, *alloc = NULL;
@@ -82,43 +92,9 @@
 	return sprintf(buf, "%s", event->config);
 }
 
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
-{
-	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
-}
-
 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 {
-	struct intel_uncore_box *box;
-
-	box = *per_cpu_ptr(pmu->box, cpu);
-	if (box)
-		return box;
-
-	raw_spin_lock(&uncore_box_lock);
-	/* Recheck in lock to handle races. */
-	if (*per_cpu_ptr(pmu->box, cpu))
-		goto out;
-	list_for_each_entry(box, &pmu->box_list, list) {
-		if (box->phys_id == topology_physical_package_id(cpu)) {
-			atomic_inc(&box->refcnt);
-			*per_cpu_ptr(pmu->box, cpu) = box;
-			break;
-		}
-	}
-out:
-	raw_spin_unlock(&uncore_box_lock);
-
-	return *per_cpu_ptr(pmu->box, cpu);
-}
-
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
-{
-	/*
-	 * perf core schedules event on the basis of cpu, uncore events are
-	 * collected by one of the cpus inside a physical package.
-	 */
-	return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
+	return pmu->boxes[topology_logical_package_id(cpu)];
 }
 
 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -207,7 +183,8 @@
 	return config;
 }
 
-static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+				   struct perf_event *event, int idx)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -302,24 +279,25 @@
 	box->hrtimer.function = uncore_pmu_hrtimer;
 }
 
-static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+						 int node)
 {
+	int i, size, numshared = type->num_shared_regs ;
 	struct intel_uncore_box *box;
-	int i, size;
 
-	size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
+	size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
 
 	box = kzalloc_node(size, GFP_KERNEL, node);
 	if (!box)
 		return NULL;
 
-	for (i = 0; i < type->num_shared_regs; i++)
+	for (i = 0; i < numshared; i++)
 		raw_spin_lock_init(&box->shared_regs[i].lock);
 
 	uncore_pmu_init_hrtimer(box);
-	atomic_set(&box->refcnt, 1);
 	box->cpu = -1;
-	box->phys_id = -1;
+	box->pci_phys_id = -1;
+	box->pkgid = -1;
 
 	/* set default hrtimer timeout */
 	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
@@ -341,7 +319,8 @@
 }
 
 static int
-uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
+		      bool dogrp)
 {
 	struct perf_event *event;
 	int n, max_count;
@@ -402,7 +381,8 @@
 	return &type->unconstrainted;
 }
 
-static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+					struct perf_event *event)
 {
 	if (box->pmu->type->ops->put_constraint)
 		box->pmu->type->ops->put_constraint(box, event);
@@ -582,7 +562,7 @@
 		if (event == box->event_list[i]) {
 			uncore_put_event_constraint(box, event);
 
-			while (++i < box->n_events)
+			for (++i; i < box->n_events; i++)
 				box->event_list[i - 1] = box->event_list[i];
 
 			--box->n_events;
@@ -676,6 +656,7 @@
 	if (!box || box->cpu < 0)
 		return -EINVAL;
 	event->cpu = box->cpu;
+	event->pmu_private = box;
 
 	event->hw.idx = -1;
 	event->hw.last_tag = ~0ULL;
@@ -760,64 +741,110 @@
 	}
 
 	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+	if (!ret)
+		pmu->registered = true;
 	return ret;
 }
 
+static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
+{
+	if (!pmu->registered)
+		return;
+	perf_pmu_unregister(&pmu->pmu);
+	pmu->registered = false;
+}
+
+static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+{
+	struct intel_uncore_pmu *pmu = type->pmus;
+	struct intel_uncore_box *box;
+	int i, pkg;
+
+	if (pmu) {
+		pkg = topology_physical_package_id(cpu);
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (box)
+				uncore_box_exit(box);
+		}
+	}
+}
+
+static void __init uncore_exit_boxes(void *dummy)
+{
+	struct intel_uncore_type **types;
+
+	for (types = uncore_msr_uncores; *types; types++)
+		__uncore_exit_boxes(*types++, smp_processor_id());
+}
+
+static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
+{
+	int pkg;
+
+	for (pkg = 0; pkg < max_packages; pkg++)
+		kfree(pmu->boxes[pkg]);
+	kfree(pmu->boxes);
+}
+
 static void __init uncore_type_exit(struct intel_uncore_type *type)
 {
+	struct intel_uncore_pmu *pmu = type->pmus;
 	int i;
 
-	for (i = 0; i < type->num_boxes; i++)
-		free_percpu(type->pmus[i].box);
-	kfree(type->pmus);
-	type->pmus = NULL;
+	if (pmu) {
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			uncore_pmu_unregister(pmu);
+			uncore_free_boxes(pmu);
+		}
+		kfree(type->pmus);
+		type->pmus = NULL;
+	}
 	kfree(type->events_group);
 	type->events_group = NULL;
 }
 
 static void __init uncore_types_exit(struct intel_uncore_type **types)
 {
-	int i;
-	for (i = 0; types[i]; i++)
-		uncore_type_exit(types[i]);
+	for (; *types; types++)
+		uncore_type_exit(*types);
 }
 
-static int __init uncore_type_init(struct intel_uncore_type *type)
+static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 {
 	struct intel_uncore_pmu *pmus;
 	struct attribute_group *attr_group;
 	struct attribute **attrs;
+	size_t size;
 	int i, j;
 
 	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
 	if (!pmus)
 		return -ENOMEM;
 
-	type->pmus = pmus;
+	size = max_packages * sizeof(struct intel_uncore_box *);
 
+	for (i = 0; i < type->num_boxes; i++) {
+		pmus[i].func_id	= setid ? i : -1;
+		pmus[i].pmu_idx	= i;
+		pmus[i].type	= type;
+		pmus[i].boxes	= kzalloc(size, GFP_KERNEL);
+		if (!pmus[i].boxes)
+			return -ENOMEM;
+	}
+
+	type->pmus = pmus;
 	type->unconstrainted = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
 				0, type->num_counters, 0, 0);
 
-	for (i = 0; i < type->num_boxes; i++) {
-		pmus[i].func_id = -1;
-		pmus[i].pmu_idx = i;
-		pmus[i].type = type;
-		INIT_LIST_HEAD(&pmus[i].box_list);
-		pmus[i].box = alloc_percpu(struct intel_uncore_box *);
-		if (!pmus[i].box)
-			goto fail;
-	}
-
 	if (type->event_descs) {
-		i = 0;
-		while (type->event_descs[i].attr.attr.name)
-			i++;
+		for (i = 0; type->event_descs[i].attr.attr.name; i++);
 
 		attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
 					sizeof(*attr_group), GFP_KERNEL);
 		if (!attr_group)
-			goto fail;
+			return -ENOMEM;
 
 		attrs = (struct attribute **)(attr_group + 1);
 		attr_group->name = "events";
@@ -831,25 +858,19 @@
 
 	type->pmu_group = &uncore_pmu_attr_group;
 	return 0;
-fail:
-	uncore_type_exit(type);
-	return -ENOMEM;
 }
 
-static int __init uncore_types_init(struct intel_uncore_type **types)
+static int __init
+uncore_types_init(struct intel_uncore_type **types, bool setid)
 {
-	int i, ret;
+	int ret;
 
-	for (i = 0; types[i]; i++) {
-		ret = uncore_type_init(types[i]);
+	for (; *types; types++) {
+		ret = uncore_type_init(*types, setid);
 		if (ret)
-			goto fail;
+			return ret;
 	}
 	return 0;
-fail:
-	while (--i >= 0)
-		uncore_type_exit(types[i]);
-	return ret;
 }
 
 /*
@@ -857,28 +878,28 @@
  */
 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct intel_uncore_type *type;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	struct intel_uncore_type *type;
-	int phys_id;
-	bool first_box = false;
+	int phys_id, pkg, ret;
 
 	phys_id = uncore_pcibus_to_physid(pdev->bus);
 	if (phys_id < 0)
 		return -ENODEV;
 
+	pkg = topology_phys_to_logical_pkg(phys_id);
+	if (WARN_ON_ONCE(pkg < 0))
+		return -EINVAL;
+
 	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
 		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
-		uncore_extra_pci_dev[phys_id][idx] = pdev;
+
+		uncore_extra_pci_dev[pkg].dev[idx] = pdev;
 		pci_set_drvdata(pdev, NULL);
 		return 0;
 	}
 
 	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
-	box = uncore_alloc_box(type, NUMA_NO_NODE);
-	if (!box)
-		return -ENOMEM;
-
 	/*
 	 * for performance monitoring unit with multiple boxes,
 	 * each box has a different function id.
@@ -890,44 +911,60 @@
 	 * some device types. Hence PCI device idx would be 0 for all devices.
 	 * So increment pmu pointer to point to an unused array element.
 	 */
-	if (boot_cpu_data.x86_model == 87)
+	if (boot_cpu_data.x86_model == 87) {
 		while (pmu->func_id >= 0)
 			pmu++;
+	}
+
+	if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+		return -EINVAL;
+
+	box = uncore_alloc_box(type, NUMA_NO_NODE);
+	if (!box)
+		return -ENOMEM;
+
 	if (pmu->func_id < 0)
 		pmu->func_id = pdev->devfn;
 	else
 		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
 
-	box->phys_id = phys_id;
+	atomic_inc(&box->refcnt);
+	box->pci_phys_id = phys_id;
+	box->pkgid = pkg;
 	box->pci_dev = pdev;
 	box->pmu = pmu;
 	uncore_box_init(box);
 	pci_set_drvdata(pdev, box);
 
-	raw_spin_lock(&uncore_box_lock);
-	if (list_empty(&pmu->box_list))
-		first_box = true;
-	list_add_tail(&box->list, &pmu->box_list);
-	raw_spin_unlock(&uncore_box_lock);
+	pmu->boxes[pkg] = box;
+	if (atomic_inc_return(&pmu->activeboxes) > 1)
+		return 0;
 
-	if (first_box)
-		uncore_pmu_register(pmu);
-	return 0;
+	/* First active box registers the pmu */
+	ret = uncore_pmu_register(pmu);
+	if (ret) {
+		pci_set_drvdata(pdev, NULL);
+		pmu->boxes[pkg] = NULL;
+		uncore_box_exit(box);
+		kfree(box);
+	}
+	return ret;
 }
 
 static void uncore_pci_remove(struct pci_dev *pdev)
 {
 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
 	struct intel_uncore_pmu *pmu;
-	int i, cpu, phys_id;
-	bool last_box = false;
+	int i, phys_id, pkg;
 
 	phys_id = uncore_pcibus_to_physid(pdev->bus);
+	pkg = topology_phys_to_logical_pkg(phys_id);
+
 	box = pci_get_drvdata(pdev);
 	if (!box) {
 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
-			if (uncore_extra_pci_dev[phys_id][i] == pdev) {
-				uncore_extra_pci_dev[phys_id][i] = NULL;
+			if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
+				uncore_extra_pci_dev[pkg].dev[i] = NULL;
 				break;
 			}
 		}
@@ -936,33 +973,20 @@
 	}
 
 	pmu = box->pmu;
-	if (WARN_ON_ONCE(phys_id != box->phys_id))
+	if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
 		return;
 
 	pci_set_drvdata(pdev, NULL);
-
-	raw_spin_lock(&uncore_box_lock);
-	list_del(&box->list);
-	if (list_empty(&pmu->box_list))
-		last_box = true;
-	raw_spin_unlock(&uncore_box_lock);
-
-	for_each_possible_cpu(cpu) {
-		if (*per_cpu_ptr(pmu->box, cpu) == box) {
-			*per_cpu_ptr(pmu->box, cpu) = NULL;
-			atomic_dec(&box->refcnt);
-		}
-	}
-
-	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
+	pmu->boxes[pkg] = NULL;
+	if (atomic_dec_return(&pmu->activeboxes) == 0)
+		uncore_pmu_unregister(pmu);
+	uncore_box_exit(box);
 	kfree(box);
-
-	if (last_box)
-		perf_pmu_unregister(&pmu->pmu);
 }
 
 static int __init uncore_pci_init(void)
 {
+	size_t size;
 	int ret;
 
 	switch (boot_cpu_data.x86_model) {
@@ -999,25 +1023,40 @@
 		ret = skl_uncore_pci_init();
 		break;
 	default:
-		return 0;
+		return -ENODEV;
 	}
 
 	if (ret)
 		return ret;
 
-	ret = uncore_types_init(uncore_pci_uncores);
+	size = max_packages * sizeof(struct pci_extra_dev);
+	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
+	if (!uncore_extra_pci_dev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = uncore_types_init(uncore_pci_uncores, false);
 	if (ret)
-		return ret;
+		goto errtype;
 
 	uncore_pci_driver->probe = uncore_pci_probe;
 	uncore_pci_driver->remove = uncore_pci_remove;
 
 	ret = pci_register_driver(uncore_pci_driver);
-	if (ret == 0)
-		pcidrv_registered = true;
-	else
-		uncore_types_exit(uncore_pci_uncores);
+	if (ret)
+		goto errtype;
 
+	pcidrv_registered = true;
+	return 0;
+
+errtype:
+	uncore_types_exit(uncore_pci_uncores);
+	kfree(uncore_extra_pci_dev);
+	uncore_extra_pci_dev = NULL;
+	uncore_free_pcibus_map();
+err:
+	uncore_pci_uncores = empty_uncore;
 	return ret;
 }
 
@@ -1027,173 +1066,139 @@
 		pcidrv_registered = false;
 		pci_unregister_driver(uncore_pci_driver);
 		uncore_types_exit(uncore_pci_uncores);
-	}
-}
-
-/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
-static LIST_HEAD(boxes_to_free);
-
-static void uncore_kfree_boxes(void)
-{
-	struct intel_uncore_box *box;
-
-	while (!list_empty(&boxes_to_free)) {
-		box = list_entry(boxes_to_free.next,
-				 struct intel_uncore_box, list);
-		list_del(&box->list);
-		kfree(box);
+		kfree(uncore_extra_pci_dev);
+		uncore_free_pcibus_map();
 	}
 }
 
 static void uncore_cpu_dying(int cpu)
 {
-	struct intel_uncore_type *type;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, j;
+	int i, pkg;
 
-	for (i = 0; uncore_msr_uncores[i]; i++) {
-		type = uncore_msr_uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			box = *per_cpu_ptr(pmu->box, cpu);
-			*per_cpu_ptr(pmu->box, cpu) = NULL;
-			if (box && atomic_dec_and_test(&box->refcnt))
-				list_add(&box->list, &boxes_to_free);
+	pkg = topology_logical_package_id(cpu);
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (box && atomic_dec_return(&box->refcnt) == 0)
+				uncore_box_exit(box);
 		}
 	}
 }
 
-static int uncore_cpu_starting(int cpu)
+static void uncore_cpu_starting(int cpu, bool init)
 {
-	struct intel_uncore_type *type;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
 	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_box *box, *exist;
-	int i, j, k, phys_id;
+	struct intel_uncore_box *box;
+	int i, pkg, ncpus = 1;
 
-	phys_id = topology_physical_package_id(cpu);
+	if (init) {
+		/*
+		 * On init we get the number of online cpus in the package
+		 * and set refcount for all of them.
+		 */
+		ncpus = cpumask_weight(topology_core_cpumask(cpu));
+	}
 
-	for (i = 0; uncore_msr_uncores[i]; i++) {
-		type = uncore_msr_uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			box = *per_cpu_ptr(pmu->box, cpu);
-			/* called by uncore_cpu_init? */
-			if (box && box->phys_id >= 0) {
-				uncore_box_init(box);
+	pkg = topology_logical_package_id(cpu);
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (!box)
 				continue;
-			}
-
-			for_each_online_cpu(k) {
-				exist = *per_cpu_ptr(pmu->box, k);
-				if (exist && exist->phys_id == phys_id) {
-					atomic_inc(&exist->refcnt);
-					*per_cpu_ptr(pmu->box, cpu) = exist;
-					if (box) {
-						list_add(&box->list,
-							 &boxes_to_free);
-						box = NULL;
-					}
-					break;
-				}
-			}
-
-			if (box) {
-				box->phys_id = phys_id;
+			/* The first cpu on a package activates the box */
+			if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
 				uncore_box_init(box);
-			}
 		}
 	}
-	return 0;
 }
 
-static int uncore_cpu_prepare(int cpu, int phys_id)
+static int uncore_cpu_prepare(int cpu)
 {
-	struct intel_uncore_type *type;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, j;
+	int i, pkg;
 
-	for (i = 0; uncore_msr_uncores[i]; i++) {
-		type = uncore_msr_uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			if (pmu->func_id < 0)
-				pmu->func_id = j;
-
+	pkg = topology_logical_package_id(cpu);
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			if (pmu->boxes[pkg])
+				continue;
+			/* First cpu of a package allocates the box */
 			box = uncore_alloc_box(type, cpu_to_node(cpu));
 			if (!box)
 				return -ENOMEM;
-
 			box->pmu = pmu;
-			box->phys_id = phys_id;
-			*per_cpu_ptr(pmu->box, cpu) = box;
+			box->pkgid = pkg;
+			pmu->boxes[pkg] = box;
 		}
 	}
 	return 0;
 }
 
-static void
-uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
+static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
+				   int new_cpu)
 {
-	struct intel_uncore_type *type;
-	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_pmu *pmu = type->pmus;
 	struct intel_uncore_box *box;
-	int i, j;
+	int i, pkg;
 
-	for (i = 0; uncores[i]; i++) {
-		type = uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			if (old_cpu < 0)
-				box = uncore_pmu_to_box(pmu, new_cpu);
-			else
-				box = uncore_pmu_to_box(pmu, old_cpu);
-			if (!box)
-				continue;
+	pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+	for (i = 0; i < type->num_boxes; i++, pmu++) {
+		box = pmu->boxes[pkg];
+		if (!box)
+			continue;
 
-			if (old_cpu < 0) {
-				WARN_ON_ONCE(box->cpu != -1);
-				box->cpu = new_cpu;
-				continue;
-			}
-
-			WARN_ON_ONCE(box->cpu != old_cpu);
-			if (new_cpu >= 0) {
-				uncore_pmu_cancel_hrtimer(box);
-				perf_pmu_migrate_context(&pmu->pmu,
-						old_cpu, new_cpu);
-				box->cpu = new_cpu;
-			} else {
-				box->cpu = -1;
-			}
+		if (old_cpu < 0) {
+			WARN_ON_ONCE(box->cpu != -1);
+			box->cpu = new_cpu;
+			continue;
 		}
+
+		WARN_ON_ONCE(box->cpu != old_cpu);
+		box->cpu = -1;
+		if (new_cpu < 0)
+			continue;
+
+		uncore_pmu_cancel_hrtimer(box);
+		perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
+		box->cpu = new_cpu;
 	}
 }
 
+static void uncore_change_context(struct intel_uncore_type **uncores,
+				  int old_cpu, int new_cpu)
+{
+	for (; *uncores; uncores++)
+		uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
+}
+
 static void uncore_event_exit_cpu(int cpu)
 {
-	int i, phys_id, target;
+	int target;
 
-	/* if exiting cpu is used for collecting uncore events */
+	/* Check if exiting cpu is used for collecting uncore events */
 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
 		return;
 
-	/* find a new cpu to collect uncore events */
-	phys_id = topology_physical_package_id(cpu);
-	target = -1;
-	for_each_online_cpu(i) {
-		if (i == cpu)
-			continue;
-		if (phys_id == topology_physical_package_id(i)) {
-			target = i;
-			break;
-		}
-	}
+	/* Find a new cpu to collect uncore events */
+	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
 
-	/* migrate uncore events to the new cpu */
-	if (target >= 0)
+	/* Migrate uncore events to the new target */
+	if (target < nr_cpu_ids)
 		cpumask_set_cpu(target, &uncore_cpu_mask);
+	else
+		target = -1;
 
 	uncore_change_context(uncore_msr_uncores, cpu, target);
 	uncore_change_context(uncore_pci_uncores, cpu, target);
@@ -1201,13 +1206,15 @@
 
 static void uncore_event_init_cpu(int cpu)
 {
-	int i, phys_id;
+	int target;
 
-	phys_id = topology_physical_package_id(cpu);
-	for_each_cpu(i, &uncore_cpu_mask) {
-		if (phys_id == topology_physical_package_id(i))
-			return;
-	}
+	/*
+	 * Check if there is an online cpu in the package
+	 * which collects uncore events already.
+	 */
+	target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+	if (target < nr_cpu_ids)
+		return;
 
 	cpumask_set_cpu(cpu, &uncore_cpu_mask);
 
@@ -1220,39 +1227,25 @@
 {
 	unsigned int cpu = (long)hcpu;
 
-	/* allocate/free data structure for uncore box */
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
-		uncore_cpu_prepare(cpu, -1);
-		break;
+		return notifier_from_errno(uncore_cpu_prepare(cpu));
+
 	case CPU_STARTING:
-		uncore_cpu_starting(cpu);
+		uncore_cpu_starting(cpu, false);
+	case CPU_DOWN_FAILED:
+		uncore_event_init_cpu(cpu);
 		break;
+
 	case CPU_UP_CANCELED:
 	case CPU_DYING:
 		uncore_cpu_dying(cpu);
 		break;
-	case CPU_ONLINE:
-	case CPU_DEAD:
-		uncore_kfree_boxes();
-		break;
-	default:
-		break;
-	}
 
-	/* select the cpu that collects uncore events */
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DOWN_FAILED:
-	case CPU_STARTING:
-		uncore_event_init_cpu(cpu);
-		break;
 	case CPU_DOWN_PREPARE:
 		uncore_event_exit_cpu(cpu);
 		break;
-	default:
-		break;
 	}
-
 	return NOTIFY_OK;
 }
 
@@ -1265,9 +1258,29 @@
 	.priority	= CPU_PRI_PERF + 1,
 };
 
-static void __init uncore_cpu_setup(void *dummy)
+static int __init type_pmu_register(struct intel_uncore_type *type)
 {
-	uncore_cpu_starting(smp_processor_id());
+	int i, ret;
+
+	for (i = 0; i < type->num_boxes; i++) {
+		ret = uncore_pmu_register(&type->pmus[i]);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int __init uncore_msr_pmus_register(void)
+{
+	struct intel_uncore_type **types = uncore_msr_uncores;
+	int ret;
+
+	for (; *types; types++) {
+		ret = type_pmu_register(*types);
+		if (ret)
+			return ret;
+	}
+	return 0;
 }
 
 static int __init uncore_cpu_init(void)
@@ -1311,71 +1324,61 @@
 		knl_uncore_cpu_init();
 		break;
 	default:
-		return 0;
+		return -ENODEV;
 	}
 
-	ret = uncore_types_init(uncore_msr_uncores);
+	ret = uncore_types_init(uncore_msr_uncores, true);
 	if (ret)
-		return ret;
+		goto err;
 
+	ret = uncore_msr_pmus_register();
+	if (ret)
+		goto err;
 	return 0;
+err:
+	uncore_types_exit(uncore_msr_uncores);
+	uncore_msr_uncores = empty_uncore;
+	return ret;
 }
 
-static int __init uncore_pmus_register(void)
+static void __init uncore_cpu_setup(void *dummy)
 {
-	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_type *type;
-	int i, j;
-
-	for (i = 0; uncore_msr_uncores[i]; i++) {
-		type = uncore_msr_uncores[i];
-		for (j = 0; j < type->num_boxes; j++) {
-			pmu = &type->pmus[j];
-			uncore_pmu_register(pmu);
-		}
-	}
-
-	return 0;
+	uncore_cpu_starting(smp_processor_id(), true);
 }
 
-static void __init uncore_cpumask_init(void)
+/* Lazy to avoid allocation of a few bytes for the normal case */
+static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
+
+static int __init uncore_cpumask_init(bool msr)
 {
-	int cpu;
-
-	/*
-	 * ony invoke once from msr or pci init code
-	 */
-	if (!cpumask_empty(&uncore_cpu_mask))
-		return;
-
-	cpu_notifier_register_begin();
+	unsigned int cpu;
 
 	for_each_online_cpu(cpu) {
-		int i, phys_id = topology_physical_package_id(cpu);
+		unsigned int pkg = topology_logical_package_id(cpu);
+		int ret;
 
-		for_each_cpu(i, &uncore_cpu_mask) {
-			if (phys_id == topology_physical_package_id(i)) {
-				phys_id = -1;
-				break;
-			}
-		}
-		if (phys_id < 0)
+		if (test_and_set_bit(pkg, packages))
 			continue;
-
-		uncore_cpu_prepare(cpu, phys_id);
+		/*
+		 * The first online cpu of each package allocates and takes
+		 * the refcounts for all other online cpus in that package.
+		 * If msrs are not enabled no allocation is required.
+		 */
+		if (msr) {
+			ret = uncore_cpu_prepare(cpu);
+			if (ret)
+				return ret;
+		}
 		uncore_event_init_cpu(cpu);
+		smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
 	}
-	on_each_cpu(uncore_cpu_setup, NULL, 1);
-
 	__register_cpu_notifier(&uncore_cpu_nb);
-
-	cpu_notifier_register_done();
+	return 0;
 }
 
-
 static int __init intel_uncore_init(void)
 {
-	int ret;
+	int pret, cret, ret;
 
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 		return -ENODEV;
@@ -1383,19 +1386,27 @@
 	if (cpu_has_hypervisor)
 		return -ENODEV;
 
-	ret = uncore_pci_init();
-	if (ret)
-		goto fail;
-	ret = uncore_cpu_init();
-	if (ret) {
-		uncore_pci_exit();
-		goto fail;
-	}
-	uncore_cpumask_init();
+	max_packages = topology_max_packages();
 
-	uncore_pmus_register();
+	pret = uncore_pci_init();
+	cret = uncore_cpu_init();
+
+	if (cret && pret)
+		return -ENODEV;
+
+	cpu_notifier_register_begin();
+	ret = uncore_cpumask_init(!cret);
+	if (ret)
+		goto err;
+	cpu_notifier_register_done();
 	return 0;
-fail:
+
+err:
+	/* Undo box->init_box() */
+	on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
+	uncore_types_exit(uncore_msr_uncores);
+	uncore_pci_exit();
+	cpu_notifier_register_done();
 	return ret;
 }
 device_initcall(intel_uncore_init);

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/events/intel/uncore.h
similarity index 90%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore.h
rename to arch/x86/events/intel/uncore.h
index a7086b8..79766b9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/events/intel/uncore.h

@@ -1,8 +1,10 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <asm/apicdef.h>
+
 #include <linux/perf_event.h>
-#include "perf_event.h"
+#include "../perf_event.h"
 
 #define UNCORE_PMU_NAME_LEN		32
 #define UNCORE_PMU_HRTIMER_INTERVAL	(60LL * NSEC_PER_SEC)
@@ -19,11 +21,12 @@
 #define UNCORE_EXTRA_PCI_DEV		0xff
 #define UNCORE_EXTRA_PCI_DEV_MAX	3
 
-/* support up to 8 sockets */
-#define UNCORE_SOCKET_MAX		8
-
 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
 
+struct pci_extra_dev {
+	struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
+};
+
 struct intel_uncore_ops;
 struct intel_uncore_pmu;
 struct intel_uncore_box;
@@ -61,6 +64,7 @@
 
 struct intel_uncore_ops {
 	void (*init_box)(struct intel_uncore_box *);
+	void (*exit_box)(struct intel_uncore_box *);
 	void (*disable_box)(struct intel_uncore_box *);
 	void (*enable_box)(struct intel_uncore_box *);
 	void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
@@ -73,13 +77,14 @@
 };
 
 struct intel_uncore_pmu {
-	struct pmu pmu;
-	char name[UNCORE_PMU_NAME_LEN];
-	int pmu_idx;
-	int func_id;
-	struct intel_uncore_type *type;
-	struct intel_uncore_box ** __percpu box;
-	struct list_head box_list;
+	struct pmu			pmu;
+	char				name[UNCORE_PMU_NAME_LEN];
+	int				pmu_idx;
+	int				func_id;
+	bool				registered;
+	atomic_t			activeboxes;
+	struct intel_uncore_type	*type;
+	struct intel_uncore_box		**boxes;
 };
 
 struct intel_uncore_extra_reg {
@@ -89,7 +94,8 @@
 };
 
 struct intel_uncore_box {
-	int phys_id;
+	int pci_phys_id;
+	int pkgid;
 	int n_active;	/* number of active events */
 	int n_events;
 	int cpu;	/* cpu to collect events */
@@ -123,7 +129,6 @@
 	int pbus_to_physid[256];
 };
 
-int uncore_pcibus_to_physid(struct pci_bus *bus);
 struct pci2phy_map *__find_pci2phy_map(int segment);
 
 ssize_t uncore_event_show(struct kobject *kobj,
@@ -305,14 +310,30 @@
 	}
 }
 
-static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+static inline void uncore_box_exit(struct intel_uncore_box *box)
 {
-	return (box->phys_id < 0);
+	if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+		if (box->pmu->type->ops->exit_box)
+			box->pmu->type->ops->exit_box(box);
+	}
 }
 
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
+static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+{
+	return (box->pkgid < 0);
+}
+
+static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+	return event->pmu_private;
+}
+
 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
@@ -328,7 +349,7 @@
 extern struct pci_driver *uncore_pci_driver;
 extern raw_spinlock_t pci2phy_map_lock;
 extern struct list_head pci2phy_map_head;
-extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+extern struct pci_extra_dev *uncore_extra_pci_dev;
 extern struct event_constraint uncore_constraint_empty;
 
 /* perf_event_intel_uncore_snb.c */

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
rename to arch/x86/events/intel/uncore_nhmex.c
index 2749965..cda5693 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c

@@ -1,5 +1,5 @@
 /* Nehalem-EX/Westmere-EX uncore support */
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
 
 /* NHM-EX event control */
 #define NHMEX_PMON_CTL_EV_SEL_MASK	0x000000ff
@@ -201,6 +201,11 @@
 	wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
 }
 
+static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+	wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
+}
+
 static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
 {
 	unsigned msr = uncore_msr_box_ctl(box);
@@ -250,6 +255,7 @@
 
 #define NHMEX_UNCORE_OPS_COMMON_INIT()				\
 	.init_box	= nhmex_uncore_msr_init_box,		\
+	.exit_box	= nhmex_uncore_msr_exit_box,		\
 	.disable_box	= nhmex_uncore_msr_disable_box,		\
 	.enable_box	= nhmex_uncore_msr_enable_box,		\
 	.disable_event	= nhmex_uncore_msr_disable_event,	\

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
similarity index 97%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
rename to arch/x86/events/intel/uncore_snb.c
index 2bd030d..96531d2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c

@@ -1,5 +1,5 @@
 /* Nehalem/SandBridge/Haswell uncore support */
-#include "perf_event_intel_uncore.h"
+#include "uncore.h"
 
 /* Uncore IMC PCI IDs */
 #define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
@@ -95,6 +95,12 @@
 	}
 }
 
+static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->pmu_idx == 0)
+		wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0);
+}
+
 static struct uncore_event_desc snb_uncore_events[] = {
 	INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
 	{ /* end: all zeroes */ },
@@ -116,6 +122,7 @@
 
 static struct intel_uncore_ops snb_uncore_msr_ops = {
 	.init_box	= snb_uncore_msr_init_box,
+	.exit_box	= snb_uncore_msr_exit_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,
 	.read_counter	= uncore_msr_read_counter,
@@ -231,6 +238,11 @@
 	box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
 }
 
+static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
+{
+	iounmap(box->io_addr);
+}
+
 static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
 {}
 
@@ -301,6 +313,7 @@
 		return -EINVAL;
 
 	event->cpu = box->cpu;
+	event->pmu_private = box;
 
 	event->hw.idx = -1;
 	event->hw.last_tag = ~0ULL;
@@ -458,6 +471,7 @@
 
 static struct intel_uncore_ops snb_uncore_imc_ops = {
 	.init_box	= snb_uncore_imc_init_box,
+	.exit_box	= snb_uncore_imc_exit_box,
 	.enable_box	= snb_uncore_imc_enable_box,
 	.disable_box	= snb_uncore_imc_disable_box,
 	.disable_event	= snb_uncore_imc_disable_event,

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
similarity index 98%
rename from arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
rename to arch/x86/events/intel/uncore_snbep.c
index 33acb88..93f6bd9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c

@@ -1,6 +1,5 @@
 /* SandyBridge-EP/IvyTown uncore support */
-#include "perf_event_intel_uncore.h"
-
+#include "uncore.h"
 
 /* SNB-EP Box level control */
 #define SNBEP_PMON_BOX_CTL_RST_CTRL	(1 << 0)
@@ -987,7 +986,9 @@
 
 	if (reg1->idx != EXTRA_REG_NONE) {
 		int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
-		struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx];
+		int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+		struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+
 		if (filter_pdev) {
 			pci_write_config_dword(filter_pdev, reg1->reg,
 						(u32)reg1->config);
@@ -2521,14 +2522,16 @@
 
 void hswep_uncore_cpu_init(void)
 {
+	int pkg = topology_phys_to_logical_pkg(0);
+
 	if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
 
 	/* Detect 6-8 core systems with only two SBOXes */
-	if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
+	if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
 		u32 capid4;
 
-		pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
+		pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3],
 				      0x94, &capid4);
 		if (((capid4 >> 6) & 0x3) == 0)
 			hswep_uncore_sbox.num_boxes = 2;
@@ -2875,11 +2878,13 @@
 	.format_group		= &hswep_uncore_sbox_format_group,
 };
 
+#define BDX_MSR_UNCORE_SBOX	3
+
 static struct intel_uncore_type *bdx_msr_uncores[] = {
 	&bdx_uncore_ubox,
 	&bdx_uncore_cbox,
-	&bdx_uncore_sbox,
 	&hswep_uncore_pcu,
+	&bdx_uncore_sbox,
 	NULL,
 };
 
@@ -2888,6 +2893,10 @@
 	if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
 	uncore_msr_uncores = bdx_msr_uncores;
+
+	/* BDX-DE doesn't have SBOX */
+	if (boot_cpu_data.x86_model == 86)
+		uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
 }
 
 static struct intel_uncore_type bdx_uncore_ha = {

diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/events/msr.c
similarity index 100%
rename from arch/x86/kernel/cpu/perf_event_msr.c
rename to arch/x86/events/msr.c


diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/events/perf_event.h
similarity index 99%
rename from arch/x86/kernel/cpu/perf_event.h
rename to arch/x86/events/perf_event.h
index 7bb61e3..68155ca 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/events/perf_event.h

@@ -586,6 +586,7 @@
 			pebs_broken	:1,
 			pebs_prec_dist	:1;
 	int		pebs_record_size;
+	int		pebs_buffer_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
@@ -860,6 +861,8 @@
 
 extern struct event_constraint intel_hsw_pebs_event_constraints[];
 
+extern struct event_constraint intel_bdw_pebs_event_constraints[];
+
 extern struct event_constraint intel_skl_pebs_event_constraints[];
 
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
@@ -904,6 +907,8 @@
 
 void intel_pmu_lbr_init_knl(void);
 
+void intel_pmu_pebs_data_source_nhm(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7bfc85b..99afb66 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h

@@ -152,12 +152,6 @@
 	".popsection"
 
 /*
- * This must be included *after* the definition of ALTERNATIVE due to
- * <asm/arch_hweight.h>
- */
-#include <asm/cpufeature.h>
-
-/*
  * Alternative instructions for different CPU types or capabilities.
  *
  * This allows to use optimized instructions even on generic binary

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 3c56ef1..5e828da 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h

@@ -27,15 +27,23 @@
 };
 
 struct threshold_block {
-	unsigned int		block;
-	unsigned int		bank;
-	unsigned int		cpu;
-	u32			address;
-	u16			interrupt_enable;
-	bool			interrupt_capable;
-	u16			threshold_limit;
-	struct kobject		kobj;
-	struct list_head	miscj;
+	unsigned int	 block;			/* Number within bank */
+	unsigned int	 bank;			/* MCA bank the block belongs to */
+	unsigned int	 cpu;			/* CPU which controls MCA bank */
+	u32		 address;		/* MSR address for the block */
+	u16		 interrupt_enable;	/* Enable/Disable APIC interrupt */
+	bool		 interrupt_capable;	/* Bank can generate an interrupt. */
+
+	u16		 threshold_limit;	/*
+						 * Value upon which threshold
+						 * interrupt is generated.
+						 */
+
+	struct kobject	 kobj;			/* sysfs object */
+	struct list_head miscj;			/*
+						 * List of threshold blocks
+						 * within a bank.
+						 */
 };
 
 struct threshold_bank {

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c80f6b6..0899cfc 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h

@@ -6,7 +6,6 @@
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <asm/processor.h>
 #include <asm/apicdef.h>
 #include <linux/atomic.h>
 #include <asm/fixmap.h>

diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 259a7c1..02e799f 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h

@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_HWEIGHT_H
 #define _ASM_X86_HWEIGHT_H
 
+#include <asm/cpufeatures.h>
+
 #ifdef CONFIG_64BIT
 /* popcnt %edi, %eax -- redundant REX prefix for alignment */
 #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 189679a..f5063b6 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h

@@ -44,19 +44,22 @@
 
 /* Exception table entry */
 #ifdef __ASSEMBLY__
-# define _ASM_EXTABLE(from,to)					\
+# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
 	.pushsection "__ex_table","a" ;				\
-	.balign 8 ;						\
+	.balign 4 ;						\
 	.long (from) - . ;					\
 	.long (to) - . ;					\
+	.long (handler) - . ;					\
 	.popsection
 
-# define _ASM_EXTABLE_EX(from,to)				\
-	.pushsection "__ex_table","a" ;				\
-	.balign 8 ;						\
-	.long (from) - . ;					\
-	.long (to) - . + 0x7ffffff0 ;				\
-	.popsection
+# define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
 
 # define _ASM_NOKPROBE(entry)					\
 	.pushsection "_kprobe_blacklist","aw" ;			\
@@ -89,19 +92,24 @@
 	.endm
 
 #else
-# define _ASM_EXTABLE(from,to)					\
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
 	" .pushsection \"__ex_table\",\"a\"\n"			\
-	" .balign 8\n"						\
+	" .balign 4\n"						\
 	" .long (" #from ") - .\n"				\
 	" .long (" #to ") - .\n"				\
+	" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n"	\
 	" .popsection\n"
 
-# define _ASM_EXTABLE_EX(from,to)				\
-	" .pushsection \"__ex_table\",\"a\"\n"			\
-	" .balign 8\n"						\
-	" .long (" #from ") - .\n"				\
-	" .long (" #to ") - . + 0x7ffffff0\n"			\
-	" .popsection\n"
+# define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# define _ASM_EXTABLE_EX(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
+
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 

diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index a584e1c..bfb28ca 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h

@@ -6,18 +6,17 @@
 
 /*
  * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
+ * And yes, this might be required on UP too when we're talking
  * to devices.
  */
 
 #ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \
+				      X86_FEATURE_XMM2) ::: "memory", "cc")
+#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \
+				       X86_FEATURE_XMM2) ::: "memory", "cc")
+#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \
+				       X86_FEATURE_XMM2) ::: "memory", "cc")
 #else
 #define mb() 	asm volatile("mfence":::"memory")
 #define rmb()	asm volatile("lfence":::"memory")

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index cfe3b95..7766d1c 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h

@@ -91,7 +91,7 @@
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
@@ -128,13 +128,13 @@
  * clear_bit() is atomic and implies release semantics before the memory
  * operation. It can be used for an unlock.
  */
-static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
 	barrier();
 	clear_bit(nr, addr);
 }
 
-static inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
 }
@@ -151,7 +151,7 @@
  * No memory barrier is required here, because x86 cannot reorder stores past
  * older loads. Same principle as spin_unlock.
  */
-static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
 {
 	barrier();
 	__clear_bit(nr, addr);
@@ -166,7 +166,7 @@
  * If it's called on the same region of memory simultaneously, the effect
  * may be that only one operation succeeds.
  */
-static inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
 {
 	asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
 }
@@ -180,7 +180,7 @@
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void change_bit(long nr, volatile unsigned long *addr)
 {
 	if (IS_IMMEDIATE(nr)) {
 		asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -201,7 +201,7 @@
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
 }
@@ -228,7 +228,7 @@
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -247,7 +247,7 @@
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
 }
@@ -268,7 +268,7 @@
  * accessed from a hypervisor on the same CPU if running in a VM: don't change
  * this without also updating arch/x86/kernel/kvm.c
  */
-static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -280,7 +280,7 @@
 }
 
 /* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	int oldbit;
 
@@ -300,7 +300,7 @@
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
 }
@@ -311,7 +311,7 @@
 		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
 }
 
-static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
+static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
 {
 	int oldbit;
 
@@ -343,7 +343,7 @@
  *
  * Undefined if no bit exists, so code should check against 0 first.
  */
-static inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned long __ffs(unsigned long word)
 {
 	asm("rep; bsf %1,%0"
 		: "=r" (word)
@@ -357,7 +357,7 @@
  *
  * Undefined if no zero exists, so code should check against ~0UL first.
  */
-static inline unsigned long ffz(unsigned long word)
+static __always_inline unsigned long ffz(unsigned long word)
 {
 	asm("rep; bsf %1,%0"
 		: "=r" (word)
@@ -371,7 +371,7 @@
  *
  * Undefined if no set bit exists, so code should check against 0 first.
  */
-static inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned long __fls(unsigned long word)
 {
 	asm("bsr %1,%0"
 	    : "=r" (word)
@@ -393,7 +393,7 @@
  * set bit if value is nonzero. The first (least significant) bit
  * is at position 1.
  */
-static inline int ffs(int x)
+static __always_inline int ffs(int x)
 {
 	int r;
 
@@ -434,7 +434,7 @@
  * set bit if value is nonzero. The last (most significant) bit is
  * at position 32.
  */
-static inline int fls(int x)
+static __always_inline int fls(int x)
 {
 	int r;
 

diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e63aa38..61518cf 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h

@@ -91,16 +91,10 @@
 
 #define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
 extern const int rodata_test_data;
 extern int kernel_set_to_readonly;
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
-#else
-static inline void set_kernel_text_rw(void) { }
-static inline void set_kernel_text_ro(void) { }
-#endif
 
 #ifdef CONFIG_DEBUG_RODATA_TEST
 int rodata_test(void);

diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index eda81dc..d194266 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h

@@ -3,10 +3,11 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#define VCLOCK_NONE 0  /* No vDSO clock available.	*/
-#define VCLOCK_TSC  1  /* vDSO should use vread_tsc.	*/
-#define VCLOCK_HPET 2  /* vDSO should use vread_hpet.	*/
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_NONE	0  /* No vDSO clock available.	*/
+#define VCLOCK_TSC	1  /* vDSO should use vread_tsc.	*/
+#define VCLOCK_HPET	2  /* vDSO should use vread_hpet.	*/
+#define VCLOCK_PVCLOCK	3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_MAX	3
 
 struct arch_clocksource_data {
 	int vclock_mode;

diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index ad19841..9733361 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h

@@ -2,6 +2,7 @@
 #define ASM_X86_CMPXCHG_H
 
 #include <linux/compiler.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
 /*

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ad8c94..68e4e82 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h

@@ -1,288 +1,7 @@
-/*
- * Defines x86 CPU feature bits
- */
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
-#define NCAPINTS	16	/* N 32-bit words worth of info */
-#define NBUGINTS	1	/* N 32-bit bug flags */
-
-/*
- * Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name.  If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
-					  /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
-/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
-#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
-#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
-#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7.
- *
- * Reuse free bits when adding new feature flags!
- */
-
-#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-
-#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-
-#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
-
-/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
-
-#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
-
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
-#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
-
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-
-/*
- * BUG word(s)
- */
-#define X86_BUG(x)		(NCAPINTS*32 + (x))
-
-#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#include <asm/processor.h>
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
@@ -369,8 +88,7 @@
  * is not relevant.
  */
 #define cpu_feature_enabled(bit)	\
-	(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 :	\
-	 cpu_has(&boot_cpu_data, bit))
+	(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
 
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
 
@@ -406,106 +124,19 @@
 #define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 /*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * Do not add any more of those clumsy macros - use static_cpu_has() for
  * fast paths and boot_cpu_has() otherwise!
  */
 
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
-
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
+ * These will statically patch the target code for additional
+ * performance.
  */
-static __always_inline __pure bool __static_cpu_has(u16 bit)
+static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
-		/*
-		 * Catch too early usage of this before alternatives
-		 * have run.
-		 */
-		asm_volatile_goto("1: jmp %l[t_warn]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* 1: do replace */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
-		asm_volatile_goto("1: jmp %l[t_no]\n"
-			 "2:\n"
-			 ".section .altinstructions,\"a\"\n"
-			 " .long 1b - .\n"
-			 " .long 0\n"		/* no replacement */
-			 " .word %P0\n"		/* feature bit */
-			 " .byte 2b - 1b\n"	/* source len */
-			 " .byte 0\n"		/* replacement len */
-			 " .byte 0\n"		/* pad len */
-			 ".previous\n"
-			 /* skipping size check since replacement size = 0 */
-			 : : "i" (bit) : : t_no);
-		return true;
-	t_no:
-		return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-	t_warn:
-		warn_pre_alternatives();
-		return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $0,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"
-			     " .long 3f - .\n"
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $1,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     : "=qm" (flag) : "i" (bit));
-		return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit)					\
-(								\
-	__builtin_constant_p(boot_cpu_has(bit)) ?		\
-		boot_cpu_has(bit) :				\
-	__builtin_constant_p(bit) ?				\
-		__static_cpu_has(bit) :				\
-		boot_cpu_has(bit)				\
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+		asm_volatile_goto("1: jmp 6f\n"
 			 "2:\n"
 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
 			         "((5f-4f) - (2b-1b)),0x90\n"
@@ -530,66 +161,34 @@
 			 " .byte 0\n"			/* repl len */
 			 " .byte 0\n"			/* pad len */
 			 ".previous\n"
-			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
-			 : : t_dynamic, t_no);
+			 ".section .altinstr_aux,\"ax\"\n"
+			 "6:\n"
+			 " testb %[bitnum],%[cap_byte]\n"
+			 " jnz %l[t_yes]\n"
+			 " jmp %l[t_no]\n"
+			 ".previous\n"
+			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+			     [bitnum] "i" (1 << (bit & 7)),
+			     [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+			 : : t_yes, t_no);
+	t_yes:
 		return true;
 	t_no:
 		return false;
-	t_dynamic:
-		return __static_cpu_has_safe(bit);
-#else
-		u8 flag;
-		/* Open-coded due to __stringify() in ALTERNATIVE() */
-		asm volatile("1: movb $2,%0\n"
-			     "2:\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 3f - .\n"		/* repl offset */
-			     " .word %P2\n"		/* always replace */
-			     " .byte 2b - 1b\n"		/* source len */
-			     " .byte 4f - 3f\n"		/* replacement len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "3: movb $0,%0\n"
-			     "4:\n"
-			     ".previous\n"
-			     ".section .altinstructions,\"a\"\n"
-			     " .long 1b - .\n"		/* src offset */
-			     " .long 5f - .\n"		/* repl offset */
-			     " .word %P1\n"		/* feature bit */
-			     " .byte 4b - 3b\n"		/* src len */
-			     " .byte 6f - 5f\n"		/* repl len */
-			     " .byte 0\n"		/* pad len */
-			     ".previous\n"
-			     ".section .discard,\"aw\",@progbits\n"
-			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
-			     ".previous\n"
-			     ".section .altinstr_replacement,\"ax\"\n"
-			     "5: movb $1,%0\n"
-			     "6:\n"
-			     ".previous\n"
-			     : "=qm" (flag)
-			     : "i" (bit), "i" (X86_FEATURE_ALWAYS));
-		return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
 }
 
-#define static_cpu_has_safe(bit)				\
+#define static_cpu_has(bit)					\
 (								\
 	__builtin_constant_p(boot_cpu_has(bit)) ?		\
 		boot_cpu_has(bit) :				\
-		_static_cpu_has_safe(bit)			\
+		_static_cpu_has(bit)				\
 )
 #else
 /*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
  */
 #define static_cpu_has(bit)		boot_cpu_has(bit)
-#define static_cpu_has_safe(bit)	boot_cpu_has(bit)
 #endif
 
 #define cpu_has_bug(c, bit)		cpu_has(c, (bit))
@@ -597,7 +196,6 @@
 #define clear_cpu_bug(c, bit)		clear_cpu_cap(c, (bit))
 
 #define static_cpu_has_bug(bit)		static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit)	static_cpu_has_safe((bit))
 #define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
 
 #define MAX_CPU_FEATURES		(NCAPINTS * 32)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 0000000..074b760
--- /dev/null
+++ b/arch/x86/include/asm/cpufeatures.h

@@ -0,0 +1,300 @@
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
+
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#include <asm/required-features.h>
+#endif
+
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#include <asm/disabled-features.h>
+#endif
+
+/*
+ * Defines x86 CPU feature bits
+ */
+#define NCAPINTS	16	/* N 32-bit words worth of info */
+#define NBUGINTS	1	/* N 32-bit bug flags */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name.  If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
+					  /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_ART		( 3*32+10) /* Platform has always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+/* free, was #define X86_FEATURE_11AP	( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
+/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
+#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
+#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
+#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+
+#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+
+#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
+
+/* Virtualization flags: Linux defined, word 8 */
+#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
+
+#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
+
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ	( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
+#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW	( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL	( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+
+/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
+
+/*
+ * BUG word(s)
+ */
+#define X86_BUG(x)		(NCAPINTS*32 + (x))
+
+#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+
+#ifdef CONFIG_X86_32
+/*
+ * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
+ * to avoid confusion.
+ */
+#define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#endif
+
+#endif /* _ASM_X86_CPUFEATURES_H */

diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 278441f..eb5deb4 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h

@@ -98,4 +98,27 @@
 
 #endif /* !__ASSEMBLY__ */
 
+/* Access rights as returned by LAR */
+#define AR_TYPE_RODATA		(0 * (1 << 9))
+#define AR_TYPE_RWDATA		(1 * (1 << 9))
+#define AR_TYPE_RODATA_EXPDOWN	(2 * (1 << 9))
+#define AR_TYPE_RWDATA_EXPDOWN	(3 * (1 << 9))
+#define AR_TYPE_XOCODE		(4 * (1 << 9))
+#define AR_TYPE_XRCODE		(5 * (1 << 9))
+#define AR_TYPE_XOCODE_CONF	(6 * (1 << 9))
+#define AR_TYPE_XRCODE_CONF	(7 * (1 << 9))
+#define AR_TYPE_MASK		(7 * (1 << 9))
+
+#define AR_DPL0			(0 * (1 << 13))
+#define AR_DPL3			(3 * (1 << 13))
+#define AR_DPL_MASK		(3 * (1 << 13))
+
+#define AR_A			(1 << 8)   /* "Accessed" */
+#define AR_S			(1 << 12)  /* If clear, "System" segment */
+#define AR_P			(1 << 15)  /* "Present" */
+#define AR_AVL			(1 << 20)  /* "AVaiLable" (no HW effect) */
+#define AR_L			(1 << 21)  /* "Long mode" for code segments */
+#define AR_DB			(1 << 22)  /* D/B, effect depends on type */
+#define AR_G			(1 << 23)  /* "Granularity" (limit in pages) */
+
 #endif /* _ASM_X86_DESC_DEFS_H */

diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 535192f..3c69fed 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h

@@ -15,7 +15,7 @@
 /* Use early IO mappings for DMI because it's initialized early */
 #define dmi_early_remap		early_ioremap
 #define dmi_early_unmap		early_iounmap
-#define dmi_remap		ioremap
+#define dmi_remap		ioremap_cache
 #define dmi_unmap		iounmap
 
 #endif /* _ASM_X86_DMI_H */

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1514753..15340e3 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h

@@ -256,7 +256,7 @@
    instruction set this CPU supports.  This could be done in user space,
    but it's not easy, and we've already done it here.  */
 
-#define ELF_HWCAP		(boot_cpu_data.x86_capability[0])
+#define ELF_HWCAP		(boot_cpu_data.x86_capability[CPUID_1_EDX])
 
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 6d7d0e5..8554f96 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h

@@ -138,7 +138,7 @@
 extern int fixmaps_set;
 
 extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
+#define kmap_prot PAGE_KERNEL
 extern pte_t *pkmap_page_table;
 
 void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0fd440d..a212434 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h

@@ -17,6 +17,7 @@
 #include <asm/user.h>
 #include <asm/fpu/api.h>
 #include <asm/fpu/xstate.h>
+#include <asm/cpufeature.h>
 
 /*
  * High level FPU state handling functions:
@@ -58,22 +59,22 @@
  */
 static __always_inline __pure bool use_eager_fpu(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+	return static_cpu_has(X86_FEATURE_EAGER_FPU);
 }
 
 static __always_inline __pure bool use_xsaveopt(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+	return static_cpu_has(X86_FEATURE_XSAVEOPT);
 }
 
 static __always_inline __pure bool use_xsave(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_XSAVE);
+	return static_cpu_has(X86_FEATURE_XSAVE);
 }
 
 static __always_inline __pure bool use_fxsr(void)
 {
-	return static_cpu_has_safe(X86_FEATURE_FXSR);
+	return static_cpu_has(X86_FEATURE_FXSR);
 }
 
 /*
@@ -300,7 +301,7 @@
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -322,7 +323,7 @@
 
 	WARN_ON(system_state != SYSTEM_BOOTING);
 
-	if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+	if (static_cpu_has(X86_FEATURE_XSAVES))
 		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -460,7 +461,7 @@
 	 * pending. Clear the x87 state here by setting it to fixed values.
 	 * "m" is a random variable that should be in L1.
 	 */
-	if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+	if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
 		asm volatile(
 			"fnclex\n\t"
 			"emms\n\t"
@@ -589,7 +590,8 @@
 	 * If the task has used the math, pre-load the FPU on xsave processors
 	 * or if the past 5 consecutive context-switches used math.
 	 */
-	fpu.preload = new_fpu->fpstate_active &&
+	fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
+		      new_fpu->fpstate_active &&
 		      (use_eager_fpu() || new_fpu->counter > 5);
 
 	if (old_fpu->fpregs_active) {

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index af30fde..f23cd8c 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h

@@ -20,16 +20,15 @@
 
 /* Supported features which support lazy state saving */
 #define XFEATURE_MASK_LAZY	(XFEATURE_MASK_FP | \
-				 XFEATURE_MASK_SSE)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER	(XFEATURE_MASK_BNDREGS | \
-				 XFEATURE_MASK_BNDCSR | \
+				 XFEATURE_MASK_SSE | \
 				 XFEATURE_MASK_YMM | \
 				 XFEATURE_MASK_OPMASK | \
 				 XFEATURE_MASK_ZMM_Hi256 | \
 				 XFEATURE_MASK_Hi16_ZMM)
 
+/* Supported features which require eager state saving */
+#define XFEATURE_MASK_EAGER	(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
+
 /* All currently supported features */
 #define XCNTXT_MASK	(XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
 

diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 793179c..6e4d170 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h

@@ -1,23 +1,44 @@
-#ifdef __ASSEMBLY__
+#ifndef _ASM_X86_FRAME_H
+#define _ASM_X86_FRAME_H
 
 #include <asm/asm.h>
 
-/* The annotation hides the frame from the unwinder and makes it look
-   like a ordinary ebp save/restore. This avoids some special cases for
-   frame pointer later */
-#ifdef CONFIG_FRAME_POINTER
-	.macro FRAME
-	__ASM_SIZE(push,)	%__ASM_REG(bp)
-	__ASM_SIZE(mov)		%__ASM_REG(sp), %__ASM_REG(bp)
-	.endm
-	.macro ENDFRAME
-	__ASM_SIZE(pop,)	%__ASM_REG(bp)
-	.endm
-#else
-	.macro FRAME
-	.endm
-	.macro ENDFRAME
-	.endm
-#endif
+/*
+ * These are stack frame creation macros.  They should be used by every
+ * callable non-leaf asm function to make kernel stack traces more reliable.
+ */
 
-#endif  /*  __ASSEMBLY__  */
+#ifdef CONFIG_FRAME_POINTER
+
+#ifdef __ASSEMBLY__
+
+.macro FRAME_BEGIN
+	push %_ASM_BP
+	_ASM_MOV %_ASM_SP, %_ASM_BP
+.endm
+
+.macro FRAME_END
+	pop %_ASM_BP
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define FRAME_BEGIN				\
+	"push %" _ASM_BP "\n"			\
+	_ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
+
+#define FRAME_END "pop %" _ASM_BP "\n"
+
+#endif /* __ASSEMBLY__ */
+
+#define FRAME_OFFSET __ASM_SEL(4, 8)
+
+#else /* !CONFIG_FRAME_POINTER */
+
+#define FRAME_BEGIN
+#define FRAME_END
+#define FRAME_OFFSET 0
+
+#endif /* CONFIG_FRAME_POINTER */
+
+#endif /* _ASM_X86_FRAME_H */

diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
index cd2ce40..ebea2c9 100644
--- a/arch/x86/include/asm/imr.h
+++ b/arch/x86/include/asm/imr.h

@@ -53,7 +53,7 @@
 #define IMR_MASK		(IMR_ALIGN - 1)
 
 int imr_add_range(phys_addr_t base, size_t size,
-		  unsigned int rmask, unsigned int wmask, bool lock);
+		  unsigned int rmask, unsigned int wmask);
 
 int imr_remove_range(phys_addr_t base, size_t size);
 

diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index cfc9a0d..a4fe16e 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h

@@ -57,67 +57,13 @@
 		cpu_relax();
 }
 
-static inline void
-__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
-{
-	/*
-	 * Subtle. In the case of the 'never do double writes' workaround
-	 * we have to lock out interrupts to be safe.  As we don't care
-	 * of the value read we use an atomic rmw access to avoid costly
-	 * cli/sti.  Otherwise we use an even cheaper single atomic write
-	 * to the APIC.
-	 */
-	unsigned int cfg;
-
-	/*
-	 * Wait for idle.
-	 */
-	__xapic_wait_icr_idle();
-
-	/*
-	 * No need to touch the target chip field
-	 */
-	cfg = __prepare_ICR(shortcut, vector, dest);
-
-	/*
-	 * Send the IPI. The write to APIC_ICR fires this off.
-	 */
-	native_apic_mem_write(APIC_ICR, cfg);
-}
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
 
 /*
  * This is used to send an IPI with no shorthand notation (the destination is
  * specified in bits 56 to 63 of the ICR).
  */
-static inline void
- __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
-{
-	unsigned long cfg;
-
-	/*
-	 * Wait for idle.
-	 */
-	if (unlikely(vector == NMI_VECTOR))
-		safe_apic_wait_icr_idle();
-	else
-		__xapic_wait_icr_idle();
-
-	/*
-	 * prepare target chip field
-	 */
-	cfg = __prepare_ICR2(mask);
-	native_apic_mem_write(APIC_ICR2, cfg);
-
-	/*
-	 * program the ICR
-	 */
-	cfg = __prepare_ICR(0, vector, dest);
-
-	/*
-	 * Send the IPI. The write to APIC_ICR fires this off.
-	 */
-	native_apic_mem_write(APIC_ICR, cfg);
-}
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
 
 extern void default_send_IPI_single(int cpu, int vector);
 extern void default_send_IPI_single_phys(int cpu, int vector);

diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 78162f8..d0afb05 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h

@@ -1,7 +1,7 @@
 #ifndef _ASM_IRQ_WORK_H
 #define _ASM_IRQ_WORK_H
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 
 static inline bool arch_irq_work_has_interrupt(void)
 {

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c1adf33..bc62e7c 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h

@@ -17,15 +17,8 @@
 }
 #endif /* CONFIG_KVM_GUEST */
 
-#ifdef CONFIG_DEBUG_RODATA
 #define KVM_HYPERCALL \
         ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
-#else
-/* On AMD processors, vmcall will generate a trap that we will
- * then rewrite to the appropriate instruction.
- */
-#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
-#endif
 
 /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
  * instruction.  The hypervisor may replace it with something else but only the

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2ea4527..92b6f65 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h

@@ -40,8 +40,20 @@
 #define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
 
 /* AMD-specific bits */
-#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* uncorrected error, deferred exception */
 #define MCI_STATUS_POISON	(1ULL<<43)  /* access poisonous data */
+#define MCI_STATUS_TCC		(1ULL<<55)  /* Task context corrupt */
+
+/*
+ * McaX field if set indicates a given bank supports MCA extensions:
+ *  - Deferred error interrupt type is specifiable by bank.
+ *  - MCx_MISC0[BlkPtr] field indicates presence of extended MISC registers,
+ *    But should not be used to determine MSR numbers.
+ *  - TCC bit is present in MCx_STATUS.
+ */
+#define MCI_CONFIG_MCAX		0x1
+#define MCI_IPID_MCATYPE	0xFFFF0000
+#define MCI_IPID_HWID		0xFFF
 
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
@@ -91,6 +103,16 @@
 #define MCE_LOG_LEN 32
 #define MCE_LOG_SIGNATURE	"MACHINECHECK"
 
+/* AMD Scalable MCA */
+#define MSR_AMD64_SMCA_MC0_MISC0	0xc0002003
+#define MSR_AMD64_SMCA_MC0_CONFIG	0xc0002004
+#define MSR_AMD64_SMCA_MC0_IPID		0xc0002005
+#define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
+#define MSR_AMD64_SMCA_MCx_MISC(x)	(MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_CONFIG(x)	(MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_IPID(x)	(MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+
 /*
  * This structure contains all data related to the MCE log.  Also
  * carries a signature to make it easier to find from external
@@ -113,6 +135,7 @@
 	bool ignore_ce;
 	bool disabled;
 	bool ser;
+	bool recovery;
 	bool bios_cmci_threshold;
 	u8 banks;
 	s8 bootlog;
@@ -287,4 +310,49 @@
 extern void apei_mce_report_mem_error(int corrected,
 				      struct cper_sec_mem_err *mem_err);
 
+/*
+ * Enumerate new IP types and HWID values in AMD processors which support
+ * Scalable MCA.
+ */
+#ifdef CONFIG_X86_MCE_AMD
+enum amd_ip_types {
+	SMCA_F17H_CORE = 0,	/* Core errors */
+	SMCA_DF,		/* Data Fabric */
+	SMCA_UMC,		/* Unified Memory Controller */
+	SMCA_PB,		/* Parameter Block */
+	SMCA_PSP,		/* Platform Security Processor */
+	SMCA_SMU,		/* System Management Unit */
+	N_AMD_IP_TYPES
+};
+
+struct amd_hwid {
+	const char *name;
+	unsigned int hwid;
+};
+
+extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
+
+enum amd_core_mca_blocks {
+	SMCA_LS = 0,	/* Load Store */
+	SMCA_IF,	/* Instruction Fetch */
+	SMCA_L2_CACHE,	/* L2 cache */
+	SMCA_DE,	/* Decoder unit */
+	RES,		/* Reserved */
+	SMCA_EX,	/* Execution unit */
+	SMCA_FP,	/* Floating Point */
+	SMCA_L3_CACHE,	/* L3 cache */
+	N_CORE_MCA_BLOCKS
+};
+
+extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
+
+enum amd_df_mca_blocks {
+	SMCA_CS = 0,	/* Coherent Slave */
+	SMCA_PIE,	/* Power management, Interrupts, etc */
+	N_DF_BLOCKS
+};
+
+extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+#endif
+
 #endif /* _ASM_X86_MCE_H */

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 1e1b07a..9d3a96c 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h

@@ -3,6 +3,7 @@
 
 #include <asm/cpu.h>
 #include <linux/earlycpio.h>
+#include <linux/initrd.h>
 
 #define native_rdmsr(msr, val1, val2)			\
 do {							\
@@ -143,4 +144,29 @@
 static inline bool
 get_builtin_firmware(struct cpio_data *cd, const char *name)	{ return false; }
 #endif
+
+static inline unsigned long get_initrd_start(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	return initrd_start;
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned long get_initrd_start_addr(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+#ifdef CONFIG_X86_32
+	unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+
+	return (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+	return get_initrd_start();
+#endif
+#else /* CONFIG_BLK_DEV_INITRD */
+	return 0;
+#endif
+}
+
 #endif /* _ASM_X86_MICROCODE_H */

diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 8559b01..603417f 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h

@@ -40,7 +40,6 @@
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
 #define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable))
 #define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature))
-#define DWSIZE			(sizeof(u32))
 
 #define get_totalsize(mc) \
 	(((struct microcode_intel *)mc)->hdr.datasize ? \

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 55234d5..1ea0bae 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h

@@ -19,7 +19,8 @@
 #endif
 
 	struct mutex lock;
-	void __user *vdso;
+	void __user *vdso;			/* vdso base address */
+	const struct vdso_image *vdso_image;	/* vdso image in use */
 
 	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
 } mm_context_t;

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b05402e..984ab75 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h

@@ -1,7 +1,12 @@
 #ifndef _ASM_X86_MSR_INDEX_H
 #define _ASM_X86_MSR_INDEX_H
 
-/* CPU model specific register (MSR) numbers */
+/*
+ * CPU model specific register (MSR) numbers.
+ *
+ * Do not add new entries to this file unless the definitions are shared
+ * between multiple compilation units.
+ */
 
 /* x86-64 specific MSRs */
 #define MSR_EFER		0xc0000080 /* extended feature register */

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index c70689b..0deeb2d 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h

@@ -3,6 +3,8 @@
 
 #include <linux/sched.h>
 
+#include <asm/cpufeature.h>
+
 #define MWAIT_SUBSTATE_MASK		0xf
 #define MWAIT_CSTATE_MASK		0xf
 #define MWAIT_SUBSTATE_SIZE		4

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 46873fb..d08eacd2 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h

@@ -93,6 +93,8 @@
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 7bcb861..5a2ed3e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h

@@ -165,6 +165,7 @@
 #define GLOBAL_STATUS_ASIF				BIT_ULL(60)
 #define GLOBAL_STATUS_COUNTERS_FROZEN			BIT_ULL(59)
 #define GLOBAL_STATUS_LBRS_FROZEN			BIT_ULL(58)
+#define GLOBAL_STATUS_TRACE_TOPAPMI			BIT_ULL(55)
 
 /*
  * IBS cpuid feature detection

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 20c11d1..983738a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h

@@ -13,7 +13,7 @@
 #include <asm/types.h>
 #include <uapi/asm/sigcontext.h>
 #include <asm/current.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/page.h>
 #include <asm/pgtable_types.h>
 #include <asm/percpu.h>
@@ -24,7 +24,6 @@
 #include <asm/fpu/types.h>
 
 #include <linux/personality.h>
-#include <linux/cpumask.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
 #include <linux/math64.h>
@@ -129,6 +128,8 @@
 	u16			booted_cores;
 	/* Physical processor id: */
 	u16			phys_proc_id;
+	/* Logical processor id: */
+	u16			logical_proc_id;
 	/* Core id: */
 	u16			cpu_core_id;
 	/* Compute unit id */
@@ -298,10 +299,13 @@
 	 */
 	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
 
+#ifdef CONFIG_X86_32
 	/*
-	 * Space for the temporary SYSENTER stack:
+	 * Space for the temporary SYSENTER stack.
 	 */
+	unsigned long		SYSENTER_stack_canary;
 	unsigned long		SYSENTER_stack[64];
+#endif
 
 } ____cacheline_aligned;
 

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index a4a7728..9b9b30b1 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h

@@ -7,12 +7,23 @@
 
 void syscall_init(void);
 
+#ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
-void entry_SYSCALL_compat(void);
+#endif
+
+#ifdef CONFIG_X86_32
 void entry_INT80_32(void);
-void entry_INT80_compat(void);
 void entry_SYSENTER_32(void);
+void __begin_SYSENTER_singlestep_region(void);
+void __end_SYSENTER_singlestep_region(void);
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
 void entry_SYSENTER_compat(void);
+void __end_entry_SYSENTER_compat(void);
+void entry_SYSCALL_compat(void);
+void entry_INT80_compat(void);
+#endif
 
 void x86_configure_nx(void);
 void x86_report_nx(void);

diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 0a52424..13b6cdd 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h

@@ -7,7 +7,7 @@
 extern char __brk_base[], __brk_limit[];
 extern struct exception_table_entry __stop___ex_table[];
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
 extern char __end_rodata_hpage_align[];
 #endif
 

diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 89db467..452c88b 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h

@@ -13,7 +13,6 @@
 			 X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		     struct pt_regs *regs, unsigned long mask);
 

diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index ba665eb..db33330 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h

@@ -15,7 +15,7 @@
 
 #include <linux/stringify.h>
 #include <asm/nops.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 
 /* "Raw" instruction opcodes */
 #define __ASM_CLAC	.byte 0x0f,0x01,0xca

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index dfcf072..20a3de5 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h

@@ -16,7 +16,6 @@
 #endif
 #include <asm/thread_info.h>
 #include <asm/cpumask.h>
-#include <asm/cpufeature.h>
 
 extern int smp_num_siblings;
 extern unsigned int num_processors;

diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index ff8b9a1..ca6ba36 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h

@@ -78,6 +78,19 @@
 #define memset(s, c, n) __memset(s, c, n)
 #endif
 
+/**
+ * memcpy_mcsafe - copy memory with indication if a machine check happened
+ *
+ * @dst:	destination address
+ * @src:	source address
+ * @cnt:	number of bytes to copy
+ *
+ * Low level memory copy function that catches machine checks
+ *
+ * Return true for success, false for fail
+ */
+bool memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_STRING_64_H */

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c7b5510..8286669 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h

@@ -49,7 +49,7 @@
  */
 #ifndef __ASSEMBLY__
 struct task_struct;
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
 struct thread_info {
@@ -134,10 +134,13 @@
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
 #define _TIF_X32		(1 << TIF_X32)
 
-/* work to do in syscall_trace_enter() */
+/*
+ * work to do in syscall_trace_enter().  Also includes TIF_NOHZ for
+ * enter_from_user_mode()
+ */
 #define _TIF_WORK_SYSCALL_ENTRY	\
 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |	\
-	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT |	\
+	 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT |	\
 	 _TIF_NOHZ)
 
 /* work to do on any return to user space */

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6df2029..c24b422 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h

@@ -5,8 +5,57 @@
 #include <linux/sched.h>
 
 #include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+			     unsigned long type)
+{
+	struct { u64 d[2]; } desc = { { pcid, addr } };
+
+	/*
+	 * The memory clobber is because the whole point is to invalidate
+	 * stale TLB entries and, especially if we're flushing global
+	 * mappings, we don't want the compiler to reorder any subsequent
+	 * memory accesses before the TLB flush.
+	 *
+	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+	 * invpcid (%rcx), %rax in long mode.
+	 */
+	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR		0
+#define INVPCID_TYPE_SINGLE_CTXT	1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL	2
+#define INVPCID_TYPE_ALL_NON_GLOBAL	3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+				     unsigned long addr)
+{
+	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -104,6 +153,15 @@
 {
 	unsigned long flags;
 
+	if (static_cpu_has(X86_FEATURE_INVPCID)) {
+		/*
+		 * Using INVPCID is considerably faster than a pair of writes
+		 * to CR4 sandwiched inside an IRQ flag save/restore.
+		 */
+		invpcid_flush_all();
+		return;
+	}
+
 	/*
 	 * Read-modify-write to CR4 - protect it from preemption and
 	 * from interrupts. (Use the raw variant because this code can

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 0fb4648..7f991bd5 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h

@@ -119,12 +119,23 @@
 
 extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
+#define topology_logical_package_id(cpu)	(cpu_data(cpu).logical_proc_id)
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 
 #ifdef ENABLE_TOPO_DEFINES
 #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
+
+extern unsigned int __max_logical_packages;
+#define topology_max_packages()			(__max_logical_packages)
+int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+extern int topology_phys_to_logical_pkg(unsigned int pkg);
+#else
+#define topology_max_packages()			(1)
+static inline int
+topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 6d7c547..174c421 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h

@@ -29,6 +29,8 @@
 	return rdtsc();
 }
 
+extern struct system_counterval_t convert_art_to_tsc(cycle_t art);
+
 extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a4a30e4..c0f27d7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h

@@ -90,12 +90,11 @@
 	likely(!__range_not_ok(addr, size, user_addr_max()))
 
 /*
- * The exception table consists of pairs of addresses relative to the
- * exception table enty itself: the first is the address of an
- * instruction that is allowed to fault, and the second is the address
- * at which the program should continue.  No registers are modified,
- * so it is entirely up to the continuation code to figure out what to
- * do.
+ * The exception table consists of triples of addresses relative to the
+ * exception table entry itself. The first address is of an instruction
+ * that is allowed to fault, the second is the target at which the program
+ * should continue. The third is a handler function to deal with the fault
+ * caused by the instruction in the first field.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -104,13 +103,14 @@
  */
 
 struct exception_table_entry {
-	int insn, fixup;
+	int insn, fixup, handler;
 };
 /* This is not the generic standard exception_table_entry format */
 #define ARCH_HAS_SORT_EXTABLE
 #define ARCH_HAS_SEARCH_EXTABLE
 
-extern int fixup_exception(struct pt_regs *regs);
+extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern bool ex_has_fault_handler(unsigned long ip);
 extern int early_fixup_exception(unsigned long *ip);
 
 /*

diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index f5dcb52..3fe0eac 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h

@@ -48,20 +48,28 @@
 
 		switch (n) {
 		case 1:
+			__uaccess_begin();
 			__put_user_size(*(u8 *)from, (u8 __user *)to,
 					1, ret, 1);
+			__uaccess_end();
 			return ret;
 		case 2:
+			__uaccess_begin();
 			__put_user_size(*(u16 *)from, (u16 __user *)to,
 					2, ret, 2);
+			__uaccess_end();
 			return ret;
 		case 4:
+			__uaccess_begin();
 			__put_user_size(*(u32 *)from, (u32 __user *)to,
 					4, ret, 4);
+			__uaccess_end();
 			return ret;
 		case 8:
+			__uaccess_begin();
 			__put_user_size(*(u64 *)from, (u64 __user *)to,
 					8, ret, 8);
+			__uaccess_end();
 			return ret;
 		}
 	}
@@ -103,13 +111,19 @@
 
 		switch (n) {
 		case 1:
+			__uaccess_begin();
 			__get_user_size(*(u8 *)to, from, 1, ret, 1);
+			__uaccess_end();
 			return ret;
 		case 2:
+			__uaccess_begin();
 			__get_user_size(*(u16 *)to, from, 2, ret, 2);
+			__uaccess_end();
 			return ret;
 		case 4:
+			__uaccess_begin();
 			__get_user_size(*(u32 *)to, from, 4, ret, 4);
+			__uaccess_end();
 			return ret;
 		}
 	}
@@ -148,13 +162,19 @@
 
 		switch (n) {
 		case 1:
+			__uaccess_begin();
 			__get_user_size(*(u8 *)to, from, 1, ret, 1);
+			__uaccess_end();
 			return ret;
 		case 2:
+			__uaccess_begin();
 			__get_user_size(*(u16 *)to, from, 2, ret, 2);
+			__uaccess_end();
 			return ret;
 		case 4:
+			__uaccess_begin();
 			__get_user_size(*(u32 *)to, from, 4, ret, 4);
+			__uaccess_end();
 			return ret;
 		}
 	}
@@ -170,13 +190,19 @@
 
 		switch (n) {
 		case 1:
+			__uaccess_begin();
 			__get_user_size(*(u8 *)to, from, 1, ret, 1);
+			__uaccess_end();
 			return ret;
 		case 2:
+			__uaccess_begin();
 			__get_user_size(*(u16 *)to, from, 2, ret, 2);
+			__uaccess_end();
 			return ret;
 		case 4:
+			__uaccess_begin();
 			__get_user_size(*(u32 *)to, from, 4, ret, 4);
+			__uaccess_end();
 			return ret;
 		}
 	}

diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b89c34c..3076986 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h

@@ -8,7 +8,7 @@
 #include <linux/errno.h>
 #include <linux/lockdep.h>
 #include <asm/alternative.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/page.h>
 
 /*

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index deabaf9..43dc55b 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h

@@ -13,9 +13,6 @@
 	void *data;
 	unsigned long size;   /* Always a multiple of PAGE_SIZE */
 
-	/* text_mapping.pages is big enough for data/size page pointers */
-	struct vm_special_mapping text_mapping;
-
 	unsigned long alt, alt_len;
 
 	long sym_vvar_start;  /* Negative offset to the vvar area */

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index f556c48..e728699 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h

@@ -37,6 +37,12 @@
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+extern int vclocks_used;
+static inline bool vclock_was_used(int vclock)
+{
+	return READ_ONCE(vclocks_used) & (1 << vclock);
+}
+
 static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
 {
 	unsigned ret;

diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 968d57d..f320ee3 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h

@@ -57,7 +57,7 @@
 {
 	if (xen_pci_frontend && xen_pci_frontend->enable_msi)
 		return xen_pci_frontend->enable_msi(dev, vectors);
-	return -ENODEV;
+	return -ENOSYS;
 }
 static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
 {
@@ -69,7 +69,7 @@
 {
 	if (xen_pci_frontend && xen_pci_frontend->enable_msix)
 		return xen_pci_frontend->enable_msix(dev, vectors, nvec);
-	return -ENODEV;
+	return -ENOSYS;
 }
 static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
 {

diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d485232..62d4111 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h

@@ -256,7 +256,7 @@
 	__u16				cs;
 	__u16				gs;
 	__u16				fs;
-	__u16				__pad0;
+	__u16				ss;
 	__u64				err;
 	__u64				trapno;
 	__u64				oldmask;
@@ -341,9 +341,37 @@
 	__u64				rip;
 	__u64				eflags;		/* RFLAGS */
 	__u16				cs;
+
+	/*
+	 * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+	 * Linux saved and restored fs and gs in these slots.  This
+	 * was counterproductive, as fsbase and gsbase were never
+	 * saved, so arch_prctl was presumably unreliable.
+	 *
+	 * These slots should never be reused without extreme caution:
+	 *
+	 *  - Some DOSEMU versions stash fs and gs in these slots manually,
+	 *    thus overwriting anything the kernel expects to be preserved
+	 *    in these slots.
+	 *
+	 *  - If these slots are ever needed for any other purpose,
+	 *    there is some risk that very old 64-bit binaries could get
+	 *    confused.  I doubt that many such binaries still work,
+	 *    though, since the same patch in 2.5.64 also removed the
+	 *    64-bit set_thread_area syscall, so it appears that there
+	 *    is no TLS API beyond modify_ldt that works in both pre-
+	 *    and post-2.5.64 kernels.
+	 *
+	 * If the kernel ever adds explicit fs, gs, fsbase, and gsbase
+	 * save/restore, it will most likely need to be opt-in and use
+	 * different context slots.
+	 */
 	__u16				gs;
 	__u16				fs;
-	__u16				__pad0;
+	union {
+		__u16			ss;	/* If UC_SIGCONTEXT_SS */
+		__u16			__pad0;	/* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
+	};
 	__u64				err;
 	__u64				trapno;
 	__u64				oldmask;

diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8..e3d1ec9 100644
--- a/arch/x86/include/uapi/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h

@@ -1,11 +1,54 @@
 #ifndef _ASM_X86_UCONTEXT_H
 #define _ASM_X86_UCONTEXT_H
 
-#define UC_FP_XSTATE	0x1	/* indicates the presence of extended state
-				 * information in the memory layout pointed
-				 * by the fpstate pointer in the ucontext's
-				 * sigcontext struct (uc_mcontext).
-				 */
+/*
+ * Indicates the presence of extended state information in the memory
+ * layout pointed by the fpstate pointer in the ucontext's sigcontext
+ * struct (uc_mcontext).
+ */
+#define UC_FP_XSTATE	0x1
+
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ *     saved CS is not 64-bit)
+ *         new SS = saved SS  (will fail IRET and signal if invalid)
+ * else
+ *         new SS = a flat 32-bit data segment
+ *
+ * This behavior serves three purposes:
+ *
+ * - Legacy programs that construct a 64-bit sigcontext from scratch
+ *   with zero or garbage in the SS slot (e.g. old CRIU) and call
+ *   sigreturn will still work.
+ *
+ * - Old DOSEMU versions sometimes catch a signal from a segmented
+ *   context, delete the old SS segment (with modify_ldt), and change
+ *   the saved CS to a 64-bit segment.  These DOSEMU versions expect
+ *   sigreturn to send them back to 64-bit mode without killing them,
+ *   despite the fact that the SS selector when the signal was raised is
+ *   no longer valid.  UC_STRICT_RESTORE_SS will be clear, so the kernel
+ *   will fix up SS for these DOSEMU versions.
+ *
+ * - Old and new programs that catch a signal and return without
+ *   modifying the saved context will end up in exactly the state they
+ *   started in, even if they were running in a segmented context when
+ *   the signal was raised..  Old kernels would lose track of the
+ *   previous SS value.
+ */
+#define UC_SIGCONTEXT_SS	0x2
+#define UC_STRICT_RESTORE_SS	0x4
+#endif
 
 #include <asm-generic/ucontext.h>
 

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index d1daead..adb3eaf 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c

@@ -16,6 +16,7 @@
 #include <asm/cacheflush.h>
 #include <asm/realmode.h>
 
+#include <linux/ftrace.h>
 #include "../../realmode/rm/wakeup.h"
 #include "sleep.h"
 
@@ -107,7 +108,13 @@
        saved_magic = 0x123456789abcdef0L;
 #endif /* CONFIG_64BIT */
 
+	/*
+	 * Pause/unpause graph tracing around do_suspend_lowlevel as it has
+	 * inconsistent call/return info after it jumps to the wakeup vector.
+	 */
+	pause_graph_tracing();
 	do_suspend_lowlevel();
+	unpause_graph_tracing();
 	return 0;
 }
 

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8a5cdda..531b961 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c

@@ -2078,6 +2078,20 @@
 		cpu = cpumask_next_zero(-1, cpu_present_mask);
 
 	/*
+	 * This can happen on physical hotplug. The sanity check at boot time
+	 * is done from native_smp_prepare_cpus() after num_possible_cpus() is
+	 * established.
+	 */
+	if (topology_update_package_map(apicid, cpu) < 0) {
+		int thiscpu = max + disabled_cpus;
+
+		pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+			   thiscpu, apicid);
+		disabled_cpus++;
+		return -ENOSPC;
+	}
+
+	/*
 	 * Validate version
 	 */
 	if (version == 0x0) {

diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 9968f30..76f89e2 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c

@@ -53,7 +53,7 @@
 	apic_write(APIC_LDR, val);
 }
 
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+static void _flat_send_IPI_mask(unsigned long mask, int vector)
 {
 	unsigned long flags;
 

diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c..ab5c2c6 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c

@@ -30,7 +30,7 @@
 	unsigned long value;
 	unsigned int id = (x >> 24) & 0xff;
 
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, value);
 		id |= (value << 2) & 0xff00;
 	}
@@ -178,7 +178,7 @@
 	this_cpu_write(cpu_llc_id, node);
 
 	/* Account for nodes per socket in multi-core-module processors */
-	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+	if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
 		rdmsrl(MSR_FAM10H_NODE_ID, val);
 		nodes = ((val >> 3) & 7) + 1;
 	}

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index eb45fc9..28bde88 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c

@@ -18,6 +18,66 @@
 #include <asm/proto.h>
 #include <asm/ipi.h>
 
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
+{
+	/*
+	 * Subtle. In the case of the 'never do double writes' workaround
+	 * we have to lock out interrupts to be safe.  As we don't care
+	 * of the value read we use an atomic rmw access to avoid costly
+	 * cli/sti.  Otherwise we use an even cheaper single atomic write
+	 * to the APIC.
+	 */
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	__xapic_wait_icr_idle();
+
+	/*
+	 * No need to touch the target chip field
+	 */
+	cfg = __prepare_ICR(shortcut, vector, dest);
+
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	native_apic_mem_write(APIC_ICR, cfg);
+}
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
+{
+	unsigned long cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	if (unlikely(vector == NMI_VECTOR))
+		safe_apic_wait_icr_idle();
+	else
+		__xapic_wait_icr_idle();
+
+	/*
+	 * prepare target chip field
+	 */
+	cfg = __prepare_ICR2(mask);
+	native_apic_mem_write(APIC_ICR2, cfg);
+
+	/*
+	 * program the ICR
+	 */
+	cfg = __prepare_ICR(0, vector, dest);
+
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	native_apic_mem_write(APIC_ICR, cfg);
+}
+
 void default_send_IPI_single_phys(int cpu, int vector)
 {
 	unsigned long flags;

diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 84a7524..5c04246 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c

@@ -59,7 +59,6 @@
 
 #ifdef CONFIG_PARAVIRT
 	BLANK();
-	OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
 	OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
 	OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
 	OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);

diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6ce3902..ecdc1d2 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c

@@ -7,7 +7,7 @@
 #include <linux/lguest.h>
 #include "../../../drivers/lguest/lg.h"
 
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls[] = {
 #include <asm/syscalls_32.h>
 };
@@ -52,6 +52,11 @@
 	DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
 	       offsetofend(struct tss_struct, SYSENTER_stack));
 
+	/* Offset from cpu_tss to SYSENTER_stack */
+	OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+	/* Size of SYSENTER_stack */
+	DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f2edafb..d875f97 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c

@@ -4,17 +4,11 @@
 
 #include <asm/ia32.h>
 
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
 static char syscalls_64[] = {
 #include <asm/syscalls_64.h>
 };
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls_ia32[] = {
 #include <asm/syscalls_32.h>
 };

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 5803130..0d373d7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile

@@ -30,33 +30,11 @@
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
-
-ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o perf_event_amd_uncore.o
-ifdef CONFIG_AMD_IOMMU
-obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd_iommu.o
-endif
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_rapl.o perf_event_intel_cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_pt.o perf_event_intel_bts.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_cstate.o
-
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= perf_event_intel_uncore.o \
-					   perf_event_intel_uncore_snb.o \
-					   perf_event_intel_uncore_snbep.o \
-					   perf_event_intel_uncore_nhmex.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_msr.o
-obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_msr.o
-endif
-
-
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
 obj-$(CONFIG_MICROCODE)			+= microcode/
 
-obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
 obj-$(CONFIG_HYPERVISOR_GUEST)		+= vmware.o hypervisor.o mshyperv.o
 
@@ -64,7 +42,7 @@
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
 
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
 
 targets += capflags.c
 $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a..97c59fd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c

@@ -117,7 +117,7 @@
 		void (*f_vide)(void);
 		u64 d, d2;
 
-		printk(KERN_INFO "AMD K6 stepping B detected - ");
+		pr_info("AMD K6 stepping B detected - ");
 
 		/*
 		 * It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@
 		d = d2-d;
 
 		if (d > 20*K6_BUG_LOOP)
-			printk(KERN_CONT
-				"system stability may be impaired when more than 32 MB are used.\n");
+			pr_cont("system stability may be impaired when more than 32 MB are used.\n");
 		else
-			printk(KERN_CONT "probably OK (after B9730xxxx).\n");
+			pr_cont("probably OK (after B9730xxxx).\n");
 	}
 
 	/* K6 with old style WHCR */
@@ -154,7 +153,7 @@
 			wbinvd();
 			wrmsr(MSR_K6_WHCR, l, h);
 			local_irq_restore(flags);
-			printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+			pr_info("Enabling old style K6 write allocation for %d Mb\n",
 				mbytes);
 		}
 		return;
@@ -175,7 +174,7 @@
 			wbinvd();
 			wrmsr(MSR_K6_WHCR, l, h);
 			local_irq_restore(flags);
-			printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+			pr_info("Enabling new style K6 write allocation for %d Mb\n",
 				mbytes);
 		}
 
@@ -202,7 +201,7 @@
 	 */
 	if (c->x86_model >= 6 && c->x86_model <= 10) {
 		if (!cpu_has(c, X86_FEATURE_XMM)) {
-			printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+			pr_info("Enabling disabled K7/SSE Support.\n");
 			msr_clear_bit(MSR_K7_HWCR, 15);
 			set_cpu_cap(c, X86_FEATURE_XMM);
 		}
@@ -216,9 +215,8 @@
 	if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
 		rdmsr(MSR_K7_CLK_CTL, l, h);
 		if ((l & 0xfff00000) != 0x20000000) {
-			printk(KERN_INFO
-			    "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
-					l, ((l & 0x000fffff)|0x20000000));
+			pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+				l, ((l & 0x000fffff)|0x20000000));
 			wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
 		}
 	}
@@ -485,7 +483,7 @@
 		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
 			unsigned long pfn = tseg >> PAGE_SHIFT;
 
-			printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+			pr_debug("tseg: %010llx\n", tseg);
 			if (pfn_range_is_mapped(pfn, pfn + 1))
 				set_memory_4k((unsigned long)__va(tseg), 1);
 		}
@@ -500,8 +498,7 @@
 
 			rdmsrl(MSR_K7_HWCR, val);
 			if (!(val & BIT(24)))
-				printk(KERN_WARNING FW_BUG "TSC doesn't count "
-					"with P0 frequency!\n");
+				pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
 		}
 	}
 

diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 04f0fe5..a972ac4 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c

@@ -15,7 +15,7 @@
 {
 	identify_boot_cpu();
 #if !defined(CONFIG_SMP)
-	printk(KERN_INFO "CPU: ");
+	pr_info("CPU: ");
 	print_cpu_info(&boot_cpu_data);
 #endif
 	alternative_instructions();

diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ae20be6..1661d8e 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c

@@ -1,7 +1,7 @@
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
@@ -29,7 +29,7 @@
 			rdmsr(MSR_VIA_FCR, lo, hi);
 			lo |= ACE_FCR;		/* enable ACE unit */
 			wrmsr(MSR_VIA_FCR, lo, hi);
-			printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+			pr_info("CPU: Enabled ACE h/w crypto\n");
 		}
 
 		/* enable RNG unit, if present and disabled */
@@ -37,7 +37,7 @@
 			rdmsr(MSR_VIA_RNG, lo, hi);
 			lo |= RNG_ENABLE;	/* enable RNG unit */
 			wrmsr(MSR_VIA_RNG, lo, hi);
-			printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+			pr_info("CPU: Enabled h/w RNG\n");
 		}
 
 		/* store Centaur Extended Feature Flags as
@@ -130,7 +130,7 @@
 			name = "C6";
 			fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
 			fcr_clr = DPDC;
-			printk(KERN_NOTICE "Disabling bugged TSC.\n");
+			pr_notice("Disabling bugged TSC.\n");
 			clear_cpu_cap(c, X86_FEATURE_TSC);
 			break;
 		case 8:
@@ -163,11 +163,11 @@
 		newlo = (lo|fcr_set) & (~fcr_clr);
 
 		if (newlo != lo) {
-			printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+			pr_info("Centaur FCR was 0x%X now 0x%X\n",
 				lo, newlo);
 			wrmsr(MSR_IDT_FCR1, newlo, hi);
 		} else {
-			printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+			pr_info("Centaur FCR is 0x%X\n", lo);
 		}
 		/* Emulate MTRRs using Centaur's MCR. */
 		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37830de..249461f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c

@@ -162,6 +162,22 @@
 }
 __setup("nompx", x86_mpx_setup);
 
+static int __init x86_noinvpcid_setup(char *s)
+{
+	/* noinvpcid doesn't accept parameters */
+	if (s)
+		return -EINVAL;
+
+	/* do not emit a message if the feature is not present */
+	if (!boot_cpu_has(X86_FEATURE_INVPCID))
+		return 0;
+
+	setup_clear_cpu_cap(X86_FEATURE_INVPCID);
+	pr_info("noinvpcid: INVPCID feature disabled\n");
+	return 0;
+}
+early_param("noinvpcid", x86_noinvpcid_setup);
+
 #ifdef CONFIG_X86_32
 static int cachesize_override = -1;
 static int disable_x86_serial_nr = 1;
@@ -228,7 +244,7 @@
 	lo |= 0x200000;
 	wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
 
-	printk(KERN_NOTICE "CPU serial number disabled.\n");
+	pr_notice("CPU serial number disabled.\n");
 	clear_cpu_cap(c, X86_FEATURE_PN);
 
 	/* Disabling the serial number may affect the cpuid level */
@@ -329,9 +345,8 @@
 		if (!warn)
 			continue;
 
-		printk(KERN_WARNING
-		       "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
-				x86_cap_flag(df->feature), df->level);
+		pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+			x86_cap_flag(df->feature), df->level);
 	}
 }
 
@@ -510,7 +525,7 @@
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
 	if (smp_num_siblings == 1) {
-		printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
+		pr_info_once("CPU0: Hyper-Threading is disabled\n");
 		goto out;
 	}
 
@@ -531,10 +546,10 @@
 
 out:
 	if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
-		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
-		       c->phys_proc_id);
-		printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
-		       c->cpu_core_id);
+		pr_info("CPU: Physical Processor ID: %d\n",
+			c->phys_proc_id);
+		pr_info("CPU: Processor Core ID: %d\n",
+			c->cpu_core_id);
 		printed = 1;
 	}
 #endif
@@ -559,9 +574,8 @@
 		}
 	}
 
-	printk_once(KERN_ERR
-			"CPU: vendor_id '%s' unknown, using generic init.\n" \
-			"CPU: Your system may be unstable.\n", v);
+	pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
+		    "CPU: Your system may be unstable.\n", v);
 
 	c->x86_vendor = X86_VENDOR_UNKNOWN;
 	this_cpu = &default_cpu;
@@ -760,7 +774,7 @@
 	int count = 0;
 
 #ifdef CONFIG_PROCESSOR_SELECT
-	printk(KERN_INFO "KERNEL supported cpus:\n");
+	pr_info("KERNEL supported cpus:\n");
 #endif
 
 	for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
@@ -778,7 +792,7 @@
 			for (j = 0; j < 2; j++) {
 				if (!cpudev->c_ident[j])
 					continue;
-				printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
+				pr_info("  %s %s\n", cpudev->c_vendor,
 					cpudev->c_ident[j]);
 			}
 		}
@@ -803,6 +817,31 @@
 #else
 	set_cpu_cap(c, X86_FEATURE_NOPL);
 #endif
+
+	/*
+	 * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
+	 * systems that run Linux at CPL > 0 may or may not have the
+	 * issue, but, even if they have the issue, there's absolutely
+	 * nothing we can do about it because we can't use the real IRET
+	 * instruction.
+	 *
+	 * NB: For the time being, only 32-bit kernels support
+	 * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
+	 * whether to apply espfix using paravirt hooks.  If any
+	 * non-paravirt system ever shows up that does *not* have the
+	 * ESPFIX issue, we can change this.
+	 */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT
+	do {
+		extern void native_iret(void);
+		if (pv_cpu_ops.iret == native_iret)
+			set_cpu_bug(c, X86_BUG_ESPFIX);
+	} while (0);
+#else
+	set_cpu_bug(c, X86_BUG_ESPFIX);
+#endif
+#endif
 }
 
 static void generic_identify(struct cpuinfo_x86 *c)
@@ -977,6 +1016,8 @@
 #ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
+	/* The boot/hotplug time assigment got cleared, restore it */
+	c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
 }
 
 /*
@@ -1061,7 +1102,7 @@
 		for (index = index_min; index < index_max; index++) {
 			if (rdmsrl_safe(index, &val))
 				continue;
-			printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+			pr_info(" MSR%08x: %016llx\n", index, val);
 		}
 	}
 }
@@ -1100,19 +1141,19 @@
 	}
 
 	if (vendor && !strstr(c->x86_model_id, vendor))
-		printk(KERN_CONT "%s ", vendor);
+		pr_cont("%s ", vendor);
 
 	if (c->x86_model_id[0])
-		printk(KERN_CONT "%s", c->x86_model_id);
+		pr_cont("%s", c->x86_model_id);
 	else
-		printk(KERN_CONT "%d86", c->x86);
+		pr_cont("%d86", c->x86);
 
-	printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
+	pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
 
 	if (c->x86_mask || c->cpuid_level >= 0)
-		printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
+		pr_cont(", stepping: 0x%x)\n", c->x86_mask);
 	else
-		printk(KERN_CONT ")\n");
+		pr_cont(")\n");
 
 	print_cpu_msr(c);
 }
@@ -1438,7 +1479,7 @@
 
 	show_ucode_info_early();
 
-	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+	pr_info("Initializing CPU#%d\n", cpu);
 
 	if (cpu_feature_enabled(X86_FEATURE_VME) ||
 	    cpu_has_tsc ||
@@ -1475,20 +1516,6 @@
 }
 #endif
 
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
-	WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
-{
-	return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
 static void bsp_resume(void)
 {
 	if (this_cpu->c_bsp_resume)

diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index aaf152e..6adef9c 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c

@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <asm/pci-direct.h>
 #include <asm/tsc.h>
+#include <asm/cpufeature.h>
 
 #include "cpu.h"
 
@@ -103,7 +104,7 @@
 		local_irq_restore(flags);
 
 		if (ccr5 & 2) { /* possible wrong calibration done */
-			printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+			pr_info("Recalibrating delay loop with SLOP bit reset\n");
 			calibrate_delay();
 			c->loops_per_jiffy = loops_per_jiffy;
 		}
@@ -115,7 +116,7 @@
 {
 	u8 ccr3;
 
-	printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+	pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n");
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
 
@@ -128,7 +129,7 @@
 
 static void set_cx86_memwb(void)
 {
-	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+	pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 
 	/* CCR2 bit 2: unlock NW bit */
 	setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
@@ -268,7 +269,7 @@
 		 *  VSA1 we work around however.
 		 */
 
-		printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+		pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n");
 		isa_dma_bridge_buggy = 2;
 
 		/* We do this before the PCI layer is running. However we
@@ -426,7 +427,7 @@
 		if (dir0 == 5 || dir0 == 3) {
 			unsigned char ccr3;
 			unsigned long flags;
-			printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+			pr_info("Enabling CPUID on Cyrix processor.\n");
 			local_irq_save(flags);
 			ccr3 = getCx86(CX86_CCR3);
 			/* enable MAPEN  */

diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index d820d8e..73d391a 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c

@@ -56,7 +56,7 @@
 	}
 
 	if (max_pri)
-		printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
+		pr_info("Hypervisor detected: %s\n", x86_hyper->name);
 }
 
 void init_hypervisor(struct cpuinfo_x86 *c)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 565648bc..1f7fdb9 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c

@@ -8,7 +8,7 @@
 #include <linux/module.h>
 #include <linux/uaccess.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/pgtable.h>
 #include <asm/msr.h>
 #include <asm/bugs.h>
@@ -61,7 +61,7 @@
 	 */
 	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
 	    c->microcode < 0x20e) {
-		printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+		pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
 		clear_cpu_cap(c, X86_FEATURE_PSE);
 	}
 
@@ -140,7 +140,7 @@
 	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 		if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
-			printk(KERN_INFO "Disabled fast string operations\n");
+			pr_info("Disabled fast string operations\n");
 			setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
 			setup_clear_cpu_cap(X86_FEATURE_ERMS);
 		}
@@ -160,6 +160,19 @@
 		pr_info("Disabling PGE capability bit\n");
 		setup_clear_cpu_cap(X86_FEATURE_PGE);
 	}
+
+	if (c->cpuid_level >= 0x00000001) {
+		u32 eax, ebx, ecx, edx;
+
+		cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+		/*
+		 * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+		 * apicids which are reserved per package. Store the resulting
+		 * shift value for the package management code.
+		 */
+		if (edx & (1U << 28))
+			c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+	}
 }
 
 #ifdef CONFIG_X86_32
@@ -176,7 +189,7 @@
 	    boot_cpu_data.x86 == 6 &&
 	    boot_cpu_data.x86_model == 1 &&
 	    boot_cpu_data.x86_mask < 8) {
-		printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+		pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
 		return 1;
 	}
 	return 0;
@@ -225,7 +238,7 @@
 
 		set_cpu_bug(c, X86_BUG_F00F);
 		if (!f00f_workaround_enabled) {
-			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+			pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
 			f00f_workaround_enabled = 1;
 		}
 	}
@@ -244,7 +257,7 @@
 	 * Forcefully enable PAE if kernel parameter "forcepae" is present.
 	 */
 	if (forcepae) {
-		printk(KERN_WARNING "PAE forced!\n");
+		pr_warn("PAE forced!\n");
 		set_cpu_cap(c, X86_FEATURE_PAE);
 		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
 	}

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0b6c523..de6626c 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c

@@ -14,7 +14,7 @@
 #include <linux/sysfs.h>
 #include <linux/pci.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/amd_nb.h>
 #include <asm/smp.h>
 
@@ -444,7 +444,7 @@
 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
 	if (err) {
 		if (err == -EEXIST)
-			pr_warning("L3 slot %d in use/index already disabled!\n",
+			pr_warn("L3 slot %d in use/index already disabled!\n",
 				   slot);
 		return err;
 	}

diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d..fbb5e90 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c

@@ -1,5 +1,5 @@
 #include <asm/cpu_device_id.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/slab.h>

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 4cfba43..517619e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c

@@ -115,7 +115,7 @@
 	int cpu = m->extcpu;
 
 	if (m->inject_flags & MCJ_EXCEPTION) {
-		printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+		pr_info("Triggering MCE exception on CPU %d\n", cpu);
 		switch (context) {
 		case MCJ_CTX_IRQ:
 			/*
@@ -128,15 +128,15 @@
 			raise_exception(m, NULL);
 			break;
 		default:
-			printk(KERN_INFO "Invalid MCE context\n");
+			pr_info("Invalid MCE context\n");
 			ret = -EINVAL;
 		}
-		printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+		pr_info("MCE exception done on CPU %d\n", cpu);
 	} else if (m->status) {
-		printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+		pr_info("Starting machine check poll CPU %d\n", cpu);
 		raise_poll(m);
 		mce_notify_irq();
-		printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
+		pr_info("Machine check poll done on CPU %d\n", cpu);
 	} else
 		m->finished = 0;
 
@@ -183,8 +183,7 @@
 		start = jiffies;
 		while (!cpumask_empty(mce_inject_cpumask)) {
 			if (!time_before(jiffies, start + 2*HZ)) {
-				printk(KERN_ERR
-				"Timeout waiting for mce inject %lx\n",
+				pr_err("Timeout waiting for mce inject %lx\n",
 					*cpumask_bits(mce_inject_cpumask));
 				break;
 			}
@@ -241,7 +240,7 @@
 {
 	if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
 		return -ENOMEM;
-	printk(KERN_INFO "Machine check injector initialized\n");
+	pr_info("Machine check injector initialized\n");
 	register_mce_write_callback(mce_write);
 	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
 				"mce_notify");

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 9c682c2..5119766 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c

@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/debugfs.h>
 #include <asm/mce.h>
+#include <asm/uaccess.h>
 
 #include "mce-internal.h"
 
@@ -29,7 +30,7 @@
  * panic situations)
  */
 
-enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
 enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
@@ -48,6 +49,7 @@
 #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
 #define  KERNEL		.context = IN_KERNEL
 #define  USER		.context = IN_USER
+#define  KERNEL_RECOV	.context = IN_KERNEL_RECOV
 #define  SER		.ser = SER_REQUIRED
 #define  NOSER		.ser = NO_SER
 #define  EXCP		.excp = EXCP_CONTEXT
@@ -87,6 +89,10 @@
 		EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
 		),
 	MCESEV(
+		PANIC, "In kernel and no restart IP",
+		EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
+		),
+	MCESEV(
 		DEFERRED, "Deferred error",
 		NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
 		),
@@ -123,6 +129,11 @@
 		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
 		),
 	MCESEV(
+		AR, "Action required: data load in error recoverable area of kernel",
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+		KERNEL_RECOV
+		),
+	MCESEV(
 		AR, "Action required: data load error in a user process",
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
 		USER
@@ -170,6 +181,9 @@
 		)	/* always matches. keep at end */
 };
 
+#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
+				(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+
 /*
  * If mcgstatus indicated that ip/cs on the stack were
  * no good, then "m->cs" will be zero and we will have
@@ -183,7 +197,11 @@
  */
 static int error_context(struct mce *m)
 {
-	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+	if ((m->cs & 3) == 3)
+		return IN_USER;
+	if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
+		return IN_KERNEL_RECOV;
+	return IN_KERNEL;
 }
 
 /*

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a006f4c..f0c921b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c

@@ -961,6 +961,20 @@
 	}
 }
 
+static int do_memory_failure(struct mce *m)
+{
+	int flags = MF_ACTION_REQUIRED;
+	int ret;
+
+	pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
+	if (!(m->mcgstatus & MCG_STATUS_RIPV))
+		flags |= MF_MUST_KILL;
+	ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
+	if (ret)
+		pr_err("Memory error not recovered");
+	return ret;
+}
+
 /*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
@@ -998,8 +1012,6 @@
 	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
 	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
 	char *msg = "Unknown";
-	u64 recover_paddr = ~0ull;
-	int flags = MF_ACTION_REQUIRED;
 	int lmce = 0;
 
 	/* If this CPU is offline, just bail out. */
@@ -1136,22 +1148,13 @@
 	}
 
 	/*
-	 * At insane "tolerant" levels we take no action. Otherwise
-	 * we only die if we have no other choice. For less serious
-	 * issues we try to recover, or limit damage to the current
-	 * process.
+	 * If tolerant is at an insane level we drop requests to kill
+	 * processes and continue even when there is no way out.
 	 */
-	if (cfg->tolerant < 3) {
-		if (no_way_out)
-			mce_panic("Fatal machine check on current CPU", &m, msg);
-		if (worst == MCE_AR_SEVERITY) {
-			recover_paddr = m.addr;
-			if (!(m.mcgstatus & MCG_STATUS_RIPV))
-				flags |= MF_MUST_KILL;
-		} else if (kill_it) {
-			force_sig(SIGBUS, current);
-		}
-	}
+	if (cfg->tolerant == 3)
+		kill_it = 0;
+	else if (no_way_out)
+		mce_panic("Fatal machine check on current CPU", &m, msg);
 
 	if (worst > 0)
 		mce_report_event(regs);
@@ -1159,25 +1162,24 @@
 out:
 	sync_core();
 
-	if (recover_paddr == ~0ull)
-		goto done;
+	if (worst != MCE_AR_SEVERITY && !kill_it)
+		goto out_ist;
 
-	pr_err("Uncorrected hardware memory error in user-access at %llx",
-		 recover_paddr);
-	/*
-	 * We must call memory_failure() here even if the current process is
-	 * doomed. We still need to mark the page as poisoned and alert any
-	 * other users of the page.
-	 */
-	ist_begin_non_atomic(regs);
-	local_irq_enable();
-	if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
-		pr_err("Memory error not recovered");
-		force_sig(SIGBUS, current);
+	/* Fault was in user mode and we need to take some action */
+	if ((m.cs & 3) == 3) {
+		ist_begin_non_atomic(regs);
+		local_irq_enable();
+
+		if (kill_it || do_memory_failure(&m))
+			force_sig(SIGBUS, current);
+		local_irq_disable();
+		ist_end_non_atomic();
+	} else {
+		if (!fixup_exception(regs, X86_TRAP_MC))
+			mce_panic("Failed kernel mode recovery", &m, NULL);
 	}
-	local_irq_disable();
-	ist_end_non_atomic();
-done:
+
+out_ist:
 	ist_exit(regs);
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1576,6 +1578,17 @@
 
 		if (c->x86 == 6 && c->x86_model == 45)
 			quirk_no_way_out = quirk_sandybridge_ifu;
+		/*
+		 * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
+		 * whether this processor will actually generate recoverable
+		 * machine checks. Check to see if this is an E7 model Xeon.
+		 * We can't do a model number check because E5 and E7 use the
+		 * same model number. E5 doesn't support recovery, E7 does.
+		 */
+		if (mca_cfg.recovery || (mca_cfg.ser &&
+			!strncmp(c->x86_model_id,
+				 "Intel(R) Xeon(R) CPU E7-", 24)))
+			set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
 	}
 	if (cfg->monarch_timeout < 0)
 		cfg->monarch_timeout = 0;
@@ -1617,10 +1630,10 @@
 	case X86_VENDOR_AMD: {
 		u32 ebx = cpuid_ebx(0x80000007);
 
-		mce_amd_feature_init(c);
 		mce_flags.overflow_recov = !!(ebx & BIT(0));
 		mce_flags.succor	 = !!(ebx & BIT(1));
 		mce_flags.smca		 = !!(ebx & BIT(3));
+		mce_amd_feature_init(c);
 
 		break;
 		}
@@ -2028,6 +2041,8 @@
 		cfg->bootlog = (str[0] == 'b');
 	else if (!strcmp(str, "bios_cmci_threshold"))
 		cfg->bios_cmci_threshold = true;
+	else if (!strcmp(str, "recovery"))
+		cfg->recovery = true;
 	else if (isdigit(str[0])) {
 		if (get_option(&str, &cfg->tolerant) == 2)
 			get_option(&str, &(cfg->monarch_timeout));

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index e99b150..9d656fd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c

@@ -1,5 +1,5 @@
 /*
- *  (c) 2005-2015 Advanced Micro Devices, Inc.
+ *  (c) 2005-2016 Advanced Micro Devices, Inc.
  *  Your use of this code is subject to the terms and conditions of the
  *  GNU general public license version 2. See "COPYING" or
  *  http://www.gnu.org/licenses/gpl.html
@@ -28,7 +28,7 @@
 #include <asm/msr.h>
 #include <asm/trace/irq_vectors.h>
 
-#define NR_BLOCKS         9
+#define NR_BLOCKS         5
 #define THRESHOLD_MAX     0xFFF
 #define INT_TYPE_APIC     0x00020000
 #define MASK_VALID_HI     0x80000000
@@ -49,6 +49,19 @@
 #define DEF_LVT_OFF		0x2
 #define DEF_INT_TYPE_APIC	0x2
 
+/* Scalable MCA: */
+
+/* Threshold LVT offset is at MSR0xC0000410[15:12] */
+#define SMCA_THR_LVT_OFF	0xF000
+
+/*
+ * OS is required to set the MCAX bit to acknowledge that it is now using the
+ * new MSR ranges and new registers under each bank. It also means that the OS
+ * will configure deferred errors in the new MCx_CONFIG register. If the bit is
+ * not set, uncorrectable errors will cause a system panic.
+ */
+#define SMCA_MCAX_EN_OFF	0x1
+
 static const char * const th_names[] = {
 	"load_store",
 	"insn_fetch",
@@ -58,6 +71,35 @@
 	"execution_unit",
 };
 
+/* Define HWID to IP type mappings for Scalable MCA */
+struct amd_hwid amd_hwids[] = {
+	[SMCA_F17H_CORE]	= { "f17h_core",	0xB0 },
+	[SMCA_DF]		= { "data_fabric",	0x2E },
+	[SMCA_UMC]		= { "umc",		0x96 },
+	[SMCA_PB]		= { "param_block",	0x5 },
+	[SMCA_PSP]		= { "psp",		0xFF },
+	[SMCA_SMU]		= { "smu",		0x1 },
+};
+EXPORT_SYMBOL_GPL(amd_hwids);
+
+const char * const amd_core_mcablock_names[] = {
+	[SMCA_LS]		= "load_store",
+	[SMCA_IF]		= "insn_fetch",
+	[SMCA_L2_CACHE]		= "l2_cache",
+	[SMCA_DE]		= "decode_unit",
+	[RES]			= "",
+	[SMCA_EX]		= "execution_unit",
+	[SMCA_FP]		= "floating_point",
+	[SMCA_L3_CACHE]		= "l3_cache",
+};
+EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+
+const char * const amd_df_mcablock_names[] = {
+	[SMCA_CS]		= "coherent_slave",
+	[SMCA_PIE]		= "pie",
+};
+EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
 static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
 
@@ -84,6 +126,13 @@
 
 static inline bool is_shared_bank(int bank)
 {
+	/*
+	 * Scalable MCA provides for only one core to have access to the MSRs of
+	 * a shared bank.
+	 */
+	if (mce_flags.smca)
+		return false;
+
 	/* Bank 4 is for northbridge reporting and is thus shared */
 	return (bank == 4);
 }
@@ -135,6 +184,14 @@
 	}
 
 	if (apic != msr) {
+		/*
+		 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
+		 * the BIOS provides the value. The original field where LVT offset
+		 * was set is reserved. Return early here:
+		 */
+		if (mce_flags.smca)
+			return 0;
+
 		pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
 		       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
 		       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
@@ -144,10 +201,7 @@
 	return 1;
 };
 
-/*
- * Called via smp_call_function_single(), must be called with correct
- * cpu affinity.
- */
+/* Reprogram MCx_MISC MSR behind this threshold bank. */
 static void threshold_restart_bank(void *_tr)
 {
 	struct thresh_restart *tr = _tr;
@@ -247,27 +301,116 @@
 	wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+			     unsigned int bank, unsigned int block)
+{
+	u32 addr = 0, offset = 0;
+
+	if (mce_flags.smca) {
+		if (!block) {
+			addr = MSR_AMD64_SMCA_MCx_MISC(bank);
+		} else {
+			/*
+			 * For SMCA enabled processors, BLKPTR field of the
+			 * first MISC register (MCx_MISC0) indicates presence of
+			 * additional MISC register set (MISC1-4).
+			 */
+			u32 low, high;
+
+			if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+				return addr;
+
+			if (!(low & MCI_CONFIG_MCAX))
+				return addr;
+
+			if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+			    (low & MASK_BLKPTR_LO))
+				addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+		}
+		return addr;
+	}
+
+	/* Fall back to method we used for older processors: */
+	switch (block) {
+	case 0:
+		addr = MSR_IA32_MCx_MISC(bank);
+		break;
+	case 1:
+		offset = ((low & MASK_BLKPTR_LO) >> 21);
+		if (offset)
+			addr = MCG_XBLK_ADDR + offset;
+		break;
+	default:
+		addr = ++current_addr;
+	}
+	return addr;
+}
+
+static int
+prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+			int offset, u32 misc_high)
+{
+	unsigned int cpu = smp_processor_id();
+	struct threshold_block b;
+	int new;
+
+	if (!block)
+		per_cpu(bank_map, cpu) |= (1 << bank);
+
+	memset(&b, 0, sizeof(b));
+	b.cpu			= cpu;
+	b.bank			= bank;
+	b.block			= block;
+	b.address		= addr;
+	b.interrupt_capable	= lvt_interrupt_supported(bank, misc_high);
+
+	if (!b.interrupt_capable)
+		goto done;
+
+	b.interrupt_enable = 1;
+
+	if (mce_flags.smca) {
+		u32 smca_low, smca_high;
+		u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+
+		if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+			smca_high |= SMCA_MCAX_EN_OFF;
+			wrmsr(smca_addr, smca_low, smca_high);
+		}
+
+		/* Gather LVT offset for thresholding: */
+		if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+			goto out;
+
+		new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+	} else {
+		new = (misc_high & MASK_LVTOFF_HI) >> 20;
+	}
+
+	offset = setup_APIC_mce_threshold(offset, new);
+
+	if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
+		mce_threshold_vector = amd_threshold_interrupt;
+
+done:
+	mce_threshold_block_init(&b, offset);
+
+out:
+	return offset;
+}
+
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
-	struct threshold_block b;
-	unsigned int cpu = smp_processor_id();
 	u32 low = 0, high = 0, address = 0;
 	unsigned int bank, block;
-	int offset = -1, new;
+	int offset = -1;
 
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			if (block == 0)
-				address = MSR_IA32_MCx_MISC(bank);
-			else if (block == 1) {
-				address = (low & MASK_BLKPTR_LO) >> 21;
-				if (!address)
-					break;
-
-				address += MCG_XBLK_ADDR;
-			} else
-				++address;
+			address = get_block_address(address, low, high, bank, block);
+			if (!address)
+				break;
 
 			if (rdmsr_safe(address, &low, &high))
 				break;
@@ -279,29 +422,7 @@
 			     (high & MASK_LOCKED_HI))
 				continue;
 
-			if (!block)
-				per_cpu(bank_map, cpu) |= (1 << bank);
-
-			memset(&b, 0, sizeof(b));
-			b.cpu			= cpu;
-			b.bank			= bank;
-			b.block			= block;
-			b.address		= address;
-			b.interrupt_capable	= lvt_interrupt_supported(bank, high);
-
-			if (!b.interrupt_capable)
-				goto init;
-
-			b.interrupt_enable = 1;
-			new	= (high & MASK_LVTOFF_HI) >> 20;
-			offset  = setup_APIC_mce_threshold(offset, new);
-
-			if ((offset == new) &&
-			    (mce_threshold_vector != amd_threshold_interrupt))
-				mce_threshold_vector = amd_threshold_interrupt;
-
-init:
-			mce_threshold_block_init(&b, offset);
+			offset = prepare_threshold_block(bank, block, address, offset, high);
 		}
 	}
 
@@ -394,16 +515,9 @@
 		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			if (block == 0) {
-				address = MSR_IA32_MCx_MISC(bank);
-			} else if (block == 1) {
-				address = (low & MASK_BLKPTR_LO) >> 21;
-				if (!address)
-					break;
-				address += MCG_XBLK_ADDR;
-			} else {
-				++address;
-			}
+			address = get_block_address(address, low, high, bank, block);
+			if (!address)
+				break;
 
 			if (rdmsr_safe(address, &low, &high))
 				break;
@@ -623,16 +737,11 @@
 	if (err)
 		goto out_free;
 recurse:
-	if (!block) {
-		address = (low & MASK_BLKPTR_LO) >> 21;
-		if (!address)
-			return 0;
-		address += MCG_XBLK_ADDR;
-	} else {
-		++address;
-	}
+	address = get_block_address(address, low, high, bank, ++block);
+	if (!address)
+		return 0;
 
-	err = allocate_threshold_blocks(cpu, bank, ++block, address);
+	err = allocate_threshold_blocks(cpu, bank, block, address);
 	if (err)
 		goto out_free;
 

diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 12402e1..2a0717b 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c

@@ -26,14 +26,12 @@
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
 	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
 
-	printk(KERN_EMERG
-		"CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
-		smp_processor_id(), loaddr, lotype);
+	pr_emerg("CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
+		 smp_processor_id(), loaddr, lotype);
 
 	if (lotype & (1<<5)) {
-		printk(KERN_EMERG
-			"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
-			smp_processor_id());
+		pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+			 smp_processor_id());
 	}
 
 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -61,12 +59,10 @@
 	/* Read registers before enabling: */
 	rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
 	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
-	printk(KERN_INFO
-	       "Intel old style machine check architecture supported.\n");
+	pr_info("Intel old style machine check architecture supported.\n");
 
 	/* Enable MCE: */
 	cr4_set_bits(X86_CR4_MCE);
-	printk(KERN_INFO
-	       "Intel old style machine check reporting enabled on CPU#%d.\n",
-	       smp_processor_id());
+	pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
+		smp_processor_id());
 }

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 2c5aaf8..0b445c2 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c

@@ -190,7 +190,7 @@
 	/* if we just entered the thermal event */
 	if (new_event) {
 		if (event == THERMAL_THROTTLING_EVENT)
-			printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
+			pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package",
 				state->count);
@@ -198,8 +198,7 @@
 	}
 	if (old_event) {
 		if (event == THERMAL_THROTTLING_EVENT)
-			printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
-				this_cpu,
+			pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package");
 		return 1;
 	}
@@ -417,8 +416,8 @@
 
 static void unexpected_thermal_interrupt(void)
 {
-	printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
-			smp_processor_id());
+	pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
+		smp_processor_id());
 }
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -499,7 +498,7 @@
 
 	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		if (system_state == SYSTEM_BOOTING)
-			printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+			pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
 		return;
 	}
 
@@ -557,8 +556,8 @@
 	l = apic_read(APIC_LVTTHMR);
 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 
-	printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
-		       tm2 ? "TM2" : "TM1");
+	pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
+		      tm2 ? "TM2" : "TM1");
 
 	/* enable thermal throttle processing */
 	atomic_set(&therm_throt_en, 1);

diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980..fcf9ae9 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c

@@ -12,8 +12,8 @@
 
 static void default_threshold_interrupt(void)
 {
-	printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
-			 THRESHOLD_APIC_VECTOR);
+	pr_err("Unexpected threshold interrupt at vector %x\n",
+		THRESHOLD_APIC_VECTOR);
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;

diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 01dd870..c6a722e 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c

@@ -17,7 +17,7 @@
 {
 	ist_enter(regs);
 
-	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+	pr_emerg("CPU0: Machine Check Exception.\n");
 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
 	ist_exit(regs);
@@ -39,6 +39,5 @@
 
 	cr4_set_bits(X86_CR4_MCE);
 
-	printk(KERN_INFO
-	       "Winchip machine check reporting enabled on CPU#0.\n");
+	pr_info("Winchip machine check reporting enabled on CPU#0.\n");
 }

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 2233f8a..8581963 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c

@@ -431,10 +431,6 @@
 	else
 		container = cont_va;
 
-	if (ucode_new_rev)
-		pr_info("microcode: updated early to new patch_level=0x%08x\n",
-			ucode_new_rev);
-
 	eax   = cpuid_eax(0x00000001);
 	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 
@@ -469,8 +465,7 @@
 	if (mc && rev < mc->hdr.patch_id) {
 		if (!__apply_microcode_amd(mc)) {
 			ucode_new_rev = mc->hdr.patch_id;
-			pr_info("microcode: reload patch_level=0x%08x\n",
-				ucode_new_rev);
+			pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
 		}
 	}
 }
@@ -793,15 +788,13 @@
 		return -EINVAL;
 	}
 
-	patch->data = kzalloc(patch_size, GFP_KERNEL);
+	patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL);
 	if (!patch->data) {
 		pr_err("Patch data allocation failure.\n");
 		kfree(patch);
 		return -EINVAL;
 	}
 
-	/* All looks ok, copy patch... */
-	memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
 	INIT_LIST_HEAD(&patch->plist);
 	patch->patch_id  = mc_hdr->patch_id;
 	patch->equiv_cpu = proc_id;
@@ -953,10 +946,14 @@
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
 	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
-		pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+		pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
 		return NULL;
 	}
 
+	if (ucode_new_rev)
+		pr_info_once("microcode updated early to new patch_level=0x%08x\n",
+			     ucode_new_rev);
+
 	return &microcode_amd_ops;
 }
 

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index faec712..ac360bf 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c

@@ -43,16 +43,8 @@
 #define MICROCODE_VERSION	"2.01"
 
 static struct microcode_ops	*microcode_ops;
-
 static bool dis_ucode_ldr;
 
-static int __init disable_loader(char *str)
-{
-	dis_ucode_ldr = true;
-	return 1;
-}
-__setup("dis_ucode_ldr", disable_loader);
-
 /*
  * Synchronization.
  *
@@ -81,15 +73,16 @@
 
 static bool __init check_loader_disabled_bsp(void)
 {
+	static const char *__dis_opt_str = "dis_ucode_ldr";
+
 #ifdef CONFIG_X86_32
 	const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
-	const char *opt	    = "dis_ucode_ldr";
-	const char *option  = (const char *)__pa_nodebug(opt);
+	const char *option  = (const char *)__pa_nodebug(__dis_opt_str);
 	bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
 
 #else /* CONFIG_X86_64 */
 	const char *cmdline = boot_command_line;
-	const char *option  = "dis_ucode_ldr";
+	const char *option  = __dis_opt_str;
 	bool *res = &dis_ucode_ldr;
 #endif
 
@@ -479,7 +472,7 @@
 	enum ucode_state ustate;
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	if (uci && uci->valid)
+	if (uci->valid)
 		return UCODE_OK;
 
 	if (collect_cpu_info(cpu))
@@ -630,7 +623,7 @@
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 	int error;
 
-	if (paravirt_enabled() || dis_ucode_ldr)
+	if (dis_ucode_ldr)
 		return -EINVAL;
 
 	if (c->x86_vendor == X86_VENDOR_INTEL)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ee81c54..cbb3cf0 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c

@@ -39,9 +39,15 @@
 #include <asm/setup.h>
 #include <asm/msr.h>
 
-static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+/*
+ * Temporary microcode blobs pointers storage. We note here the pointers to
+ * microcode blobs we've got from whatever storage (detached initrd, builtin).
+ * Later on, we put those into final storage mc_saved_data.mc_saved.
+ */
+static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT];
+
 static struct mc_saved_data {
-	unsigned int mc_saved_count;
+	unsigned int num_saved;
 	struct microcode_intel **mc_saved;
 } mc_saved_data;
 
@@ -78,53 +84,50 @@
 }
 
 static inline void
-copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
-		  unsigned long off, int num_saved)
+copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs,
+	  unsigned long off, int num_saved)
 {
 	int i;
 
 	for (i = 0; i < num_saved; i++)
-		mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
+		mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off);
 }
 
 #ifdef CONFIG_X86_32
 static void
-microcode_phys(struct microcode_intel **mc_saved_tmp,
-	       struct mc_saved_data *mc_saved_data)
+microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs)
 {
 	int i;
 	struct microcode_intel ***mc_saved;
 
-	mc_saved = (struct microcode_intel ***)
-		   __pa_nodebug(&mc_saved_data->mc_saved);
-	for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+	mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved);
+
+	for (i = 0; i < mcs->num_saved; i++) {
 		struct microcode_intel *p;
 
-		p = *(struct microcode_intel **)
-			__pa_nodebug(mc_saved_data->mc_saved + i);
+		p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i);
 		mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
 	}
 }
 #endif
 
 static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
-	       unsigned long initrd_start, struct ucode_cpu_info *uci)
+load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
+	       unsigned long offset, struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int count = mc_saved_data->mc_saved_count;
+	unsigned int count = mcs->num_saved;
 
-	if (!mc_saved_data->mc_saved) {
-		copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
+	if (!mcs->mc_saved) {
+		copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count);
 
 		return load_microcode_early(mc_saved_tmp, count, uci);
 	} else {
 #ifdef CONFIG_X86_32
-		microcode_phys(mc_saved_tmp, mc_saved_data);
+		microcode_phys(mc_saved_tmp, mcs);
 		return load_microcode_early(mc_saved_tmp, count, uci);
 #else
-		return load_microcode_early(mc_saved_data->mc_saved,
-						    count, uci);
+		return load_microcode_early(mcs->mc_saved, count, uci);
 #endif
 	}
 }
@@ -175,25 +178,25 @@
 }
 
 static int
-save_microcode(struct mc_saved_data *mc_saved_data,
+save_microcode(struct mc_saved_data *mcs,
 	       struct microcode_intel **mc_saved_src,
-	       unsigned int mc_saved_count)
+	       unsigned int num_saved)
 {
 	int i, j;
 	struct microcode_intel **saved_ptr;
 	int ret;
 
-	if (!mc_saved_count)
+	if (!num_saved)
 		return -EINVAL;
 
 	/*
 	 * Copy new microcode data.
 	 */
-	saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+	saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL);
 	if (!saved_ptr)
 		return -ENOMEM;
 
-	for (i = 0; i < mc_saved_count; i++) {
+	for (i = 0; i < num_saved; i++) {
 		struct microcode_header_intel *mc_hdr;
 		struct microcode_intel *mc;
 		unsigned long size;
@@ -207,20 +210,18 @@
 		mc_hdr = &mc->hdr;
 		size   = get_totalsize(mc_hdr);
 
-		saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+		saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL);
 		if (!saved_ptr[i]) {
 			ret = -ENOMEM;
 			goto err;
 		}
-
-		memcpy(saved_ptr[i], mc, size);
 	}
 
 	/*
 	 * Point to newly saved microcode.
 	 */
-	mc_saved_data->mc_saved = saved_ptr;
-	mc_saved_data->mc_saved_count = mc_saved_count;
+	mcs->mc_saved  = saved_ptr;
+	mcs->num_saved = num_saved;
 
 	return 0;
 
@@ -284,22 +285,20 @@
  * BSP can stay in the platform.
  */
 static enum ucode_state __init
-get_matching_model_microcode(int cpu, unsigned long start,
-			     void *data, size_t size,
-			     struct mc_saved_data *mc_saved_data,
-			     unsigned long *mc_saved_in_initrd,
+get_matching_model_microcode(unsigned long start, void *data, size_t size,
+			     struct mc_saved_data *mcs, unsigned long *mc_ptrs,
 			     struct ucode_cpu_info *uci)
 {
-	u8 *ucode_ptr = data;
-	unsigned int leftover = size;
-	enum ucode_state state = UCODE_OK;
-	unsigned int mc_size;
-	struct microcode_header_intel *mc_header;
 	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
+	struct microcode_header_intel *mc_header;
+	unsigned int num_saved = mcs->num_saved;
+	enum ucode_state state = UCODE_OK;
+	unsigned int leftover = size;
+	u8 *ucode_ptr = data;
+	unsigned int mc_size;
 	int i;
 
-	while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+	while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) {
 
 		if (leftover < sizeof(mc_header))
 			break;
@@ -318,32 +317,31 @@
 		 * the platform, we need to find and save microcode patches
 		 * with the same family and model as the BSP.
 		 */
-		if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
-			 UCODE_OK) {
+		if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) {
 			ucode_ptr += mc_size;
 			continue;
 		}
 
-		mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
+		num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved);
 
 		ucode_ptr += mc_size;
 	}
 
 	if (leftover) {
 		state = UCODE_ERROR;
-		goto out;
+		return state;
 	}
 
-	if (mc_saved_count == 0) {
+	if (!num_saved) {
 		state = UCODE_NFOUND;
-		goto out;
+		return state;
 	}
 
-	for (i = 0; i < mc_saved_count; i++)
-		mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+	for (i = 0; i < num_saved; i++)
+		mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start;
 
-	mc_saved_data->mc_saved_count = mc_saved_count;
-out:
+	mcs->num_saved = num_saved;
+
 	return state;
 }
 
@@ -373,7 +371,7 @@
 		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
 		csig.pf = 1 << ((val[1] >> 18) & 7);
 	}
-	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
 	sync_core();
@@ -396,11 +394,11 @@
 	unsigned int sig, pf, rev, total_size, data_size, date;
 	struct ucode_cpu_info uci;
 
-	if (mc_saved_data.mc_saved_count == 0) {
+	if (!mc_saved_data.num_saved) {
 		pr_debug("no microcode data saved.\n");
 		return;
 	}
-	pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+	pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved);
 
 	collect_cpu_info_early(&uci);
 
@@ -409,7 +407,7 @@
 	rev = uci.cpu_sig.rev;
 	pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
 
-	for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+	for (i = 0; i < mc_saved_data.num_saved; i++) {
 		struct microcode_header_intel *mc_saved_header;
 		struct extended_sigtable *ext_header;
 		int ext_sigcount;
@@ -465,7 +463,7 @@
 {
 	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
 	unsigned int mc_saved_count_init;
-	unsigned int mc_saved_count;
+	unsigned int num_saved;
 	struct microcode_intel **mc_saved;
 	int ret = 0;
 	int i;
@@ -476,23 +474,23 @@
 	 */
 	mutex_lock(&x86_cpu_microcode_mutex);
 
-	mc_saved_count_init = mc_saved_data.mc_saved_count;
-	mc_saved_count = mc_saved_data.mc_saved_count;
+	mc_saved_count_init = mc_saved_data.num_saved;
+	num_saved = mc_saved_data.num_saved;
 	mc_saved = mc_saved_data.mc_saved;
 
-	if (mc_saved && mc_saved_count)
+	if (mc_saved && num_saved)
 		memcpy(mc_saved_tmp, mc_saved,
-		       mc_saved_count * sizeof(struct microcode_intel *));
+		       num_saved * sizeof(struct microcode_intel *));
 	/*
 	 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
 	 * version.
 	 */
-	mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
+	num_saved = _save_mc(mc_saved_tmp, mc, num_saved);
 
 	/*
 	 * Save the mc_save_tmp in global mc_saved_data.
 	 */
-	ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+	ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved);
 	if (ret) {
 		pr_err("Cannot save microcode patch.\n");
 		goto out;
@@ -536,7 +534,7 @@
 
 static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
 static __init enum ucode_state
-scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
 	       unsigned long start, unsigned long size,
 	       struct ucode_cpu_info *uci)
 {
@@ -551,14 +549,18 @@
 	cd.data = NULL;
 	cd.size = 0;
 
-	cd = find_cpio_data(p, (void *)start, size, &offset);
-	if (!cd.data) {
+	/* try built-in microcode if no initrd */
+	if (!size) {
 		if (!load_builtin_intel_microcode(&cd))
 			return UCODE_ERROR;
+	} else {
+		cd = find_cpio_data(p, (void *)start, size, &offset);
+		if (!cd.data)
+			return UCODE_ERROR;
 	}
 
-	return get_matching_model_microcode(0, start, cd.data, cd.size,
-					    mc_saved_data, initrd, uci);
+	return get_matching_model_microcode(start, cd.data, cd.size,
+					    mcs, mc_ptrs, uci);
 }
 
 /*
@@ -567,14 +569,11 @@
 static void
 print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
 {
-	int cpu = smp_processor_id();
-
-	pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
-		cpu,
-		uci->cpu_sig.rev,
-		date & 0xffff,
-		date >> 24,
-		(date >> 16) & 0xff);
+	pr_info_once("microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+		     uci->cpu_sig.rev,
+		     date & 0xffff,
+		     date >> 24,
+		     (date >> 16) & 0xff);
 }
 
 #ifdef CONFIG_X86_32
@@ -603,19 +602,19 @@
  */
 static void print_ucode(struct ucode_cpu_info *uci)
 {
-	struct microcode_intel *mc_intel;
+	struct microcode_intel *mc;
 	int *delay_ucode_info_p;
 	int *current_mc_date_p;
 
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
+	mc = uci->mc;
+	if (!mc)
 		return;
 
 	delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
 	current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
 
 	*delay_ucode_info_p = 1;
-	*current_mc_date_p = mc_intel->hdr.date;
+	*current_mc_date_p = mc->hdr.date;
 }
 #else
 
@@ -630,37 +629,35 @@
 
 static inline void print_ucode(struct ucode_cpu_info *uci)
 {
-	struct microcode_intel *mc_intel;
+	struct microcode_intel *mc;
 
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
+	mc = uci->mc;
+	if (!mc)
 		return;
 
-	print_ucode_info(uci, mc_intel->hdr.date);
+	print_ucode_info(uci, mc->hdr.date);
 }
 #endif
 
 static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
 {
-	struct microcode_intel *mc_intel;
+	struct microcode_intel *mc;
 	unsigned int val[2];
 
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
+	mc = uci->mc;
+	if (!mc)
 		return 0;
 
 	/* write microcode via MSR 0x79 */
-	native_wrmsr(MSR_IA32_UCODE_WRITE,
-	      (unsigned long) mc_intel->bits,
-	      (unsigned long) mc_intel->bits >> 16 >> 16);
-	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+	native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+	native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
 	sync_core();
 
 	/* get the current revision from MSR 0x8B */
 	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-	if (val[1] != mc_intel->hdr.rev)
+	if (val[1] != mc->hdr.rev)
 		return -1;
 
 #ifdef CONFIG_X86_64
@@ -672,25 +669,26 @@
 	if (early)
 		print_ucode(uci);
 	else
-		print_ucode_info(uci, mc_intel->hdr.date);
+		print_ucode_info(uci, mc->hdr.date);
 
 	return 0;
 }
 
 /*
  * This function converts microcode patch offsets previously stored in
- * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data.
  */
 int __init save_microcode_in_initrd_intel(void)
 {
-	unsigned int count = mc_saved_data.mc_saved_count;
+	unsigned int count = mc_saved_data.num_saved;
 	struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
 	int ret = 0;
 
-	if (count == 0)
+	if (!count)
 		return ret;
 
-	copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
+	copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count);
+
 	ret = save_microcode(&mc_saved_data, mc_saved, count);
 	if (ret)
 		pr_err("Cannot save microcode patches from initrd.\n");
@@ -701,8 +699,7 @@
 }
 
 static void __init
-_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
-		      unsigned long *initrd,
+_load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
 		      unsigned long start, unsigned long size)
 {
 	struct ucode_cpu_info uci;
@@ -710,11 +707,11 @@
 
 	collect_cpu_info_early(&uci);
 
-	ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+	ret = scan_microcode(mcs, mc_ptrs, start, size, &uci);
 	if (ret != UCODE_OK)
 		return;
 
-	ret = load_microcode(mc_saved_data, initrd, start, &uci);
+	ret = load_microcode(mcs, mc_ptrs, start, &uci);
 	if (ret != UCODE_OK)
 		return;
 
@@ -728,53 +725,49 @@
 	struct boot_params *p;
 
 	p	= (struct boot_params *)__pa_nodebug(&boot_params);
-	start	= p->hdr.ramdisk_image;
 	size	= p->hdr.ramdisk_size;
 
-	_load_ucode_intel_bsp(
-			(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
-			(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
-			start, size);
-#else
-	start	= boot_params.hdr.ramdisk_image + PAGE_OFFSET;
-	size	= boot_params.hdr.ramdisk_size;
+	/*
+	 * Set start only if we have an initrd image. We cannot use initrd_start
+	 * because it is not set that early yet.
+	 */
+	start	= (size ? p->hdr.ramdisk_image : 0);
 
-	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
+	_load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+			      (unsigned long *)__pa_nodebug(&mc_tmp_ptrs),
+			      start, size);
+#else
+	size	= boot_params.hdr.ramdisk_size;
+	start	= (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0);
+
+	_load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size);
 #endif
 }
 
 void load_ucode_intel_ap(void)
 {
-	struct mc_saved_data *mc_saved_data_p;
+	unsigned long *mcs_tmp_p;
+	struct mc_saved_data *mcs_p;
 	struct ucode_cpu_info uci;
-	unsigned long *mc_saved_in_initrd_p;
-	unsigned long initrd_start_addr;
 	enum ucode_state ret;
 #ifdef CONFIG_X86_32
-	unsigned long *initrd_start_p;
 
-	mc_saved_in_initrd_p =
-		(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
-	mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
-	initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
-	initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
+	mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs);
+	mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
 #else
-	mc_saved_data_p = &mc_saved_data;
-	mc_saved_in_initrd_p = mc_saved_in_initrd;
-	initrd_start_addr = initrd_start;
+	mcs_tmp_p = mc_tmp_ptrs;
+	mcs_p = &mc_saved_data;
 #endif
 
 	/*
 	 * If there is no valid ucode previously saved in memory, no need to
 	 * update ucode on this AP.
 	 */
-	if (mc_saved_data_p->mc_saved_count == 0)
+	if (!mcs_p->num_saved)
 		return;
 
 	collect_cpu_info_early(&uci);
-	ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
-			     initrd_start_addr, &uci);
-
+	ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci);
 	if (ret != UCODE_OK)
 		return;
 
@@ -786,13 +779,13 @@
 	struct ucode_cpu_info uci;
 	enum ucode_state ret;
 
-	if (!mc_saved_data.mc_saved_count)
+	if (!mc_saved_data.num_saved)
 		return;
 
 	collect_cpu_info_early(&uci);
 
 	ret = load_microcode_early(mc_saved_data.mc_saved,
-				   mc_saved_data.mc_saved_count, &uci);
+				   mc_saved_data.num_saved, &uci);
 	if (ret != UCODE_OK)
 		return;
 
@@ -825,7 +818,7 @@
  * return 0 - no update found
  * return 1 - found update
  */
-static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
+static int get_matching_mc(struct microcode_intel *mc, int cpu)
 {
 	struct cpu_signature cpu_sig;
 	unsigned int csig, cpf, crev;
@@ -836,39 +829,36 @@
 	cpf = cpu_sig.pf;
 	crev = cpu_sig.rev;
 
-	return has_newer_microcode(mc_intel, csig, cpf, crev);
+	return has_newer_microcode(mc, csig, cpf, crev);
 }
 
 static int apply_microcode_intel(int cpu)
 {
-	struct microcode_intel *mc_intel;
+	struct microcode_intel *mc;
 	struct ucode_cpu_info *uci;
+	struct cpuinfo_x86 *c;
 	unsigned int val[2];
-	int cpu_num = raw_smp_processor_id();
-	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-
-	uci = ucode_cpu_info + cpu;
-	mc_intel = uci->mc;
 
 	/* We should bind the task to the CPU */
-	BUG_ON(cpu_num != cpu);
+	if (WARN_ON(raw_smp_processor_id() != cpu))
+		return -1;
 
-	if (mc_intel == NULL)
+	uci = ucode_cpu_info + cpu;
+	mc = uci->mc;
+	if (!mc)
 		return 0;
 
 	/*
 	 * Microcode on this CPU could be updated earlier. Only apply the
-	 * microcode patch in mc_intel when it is newer than the one on this
+	 * microcode patch in mc when it is newer than the one on this
 	 * CPU.
 	 */
-	if (get_matching_mc(mc_intel, cpu) == 0)
+	if (!get_matching_mc(mc, cpu))
 		return 0;
 
 	/* write microcode via MSR 0x79 */
-	wrmsr(MSR_IA32_UCODE_WRITE,
-	      (unsigned long) mc_intel->bits,
-	      (unsigned long) mc_intel->bits >> 16 >> 16);
-	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+	wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+	wrmsrl(MSR_IA32_UCODE_REV, 0);
 
 	/* As documented in the SDM: Do a CPUID 1 here */
 	sync_core();
@@ -876,16 +866,19 @@
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 
-	if (val[1] != mc_intel->hdr.rev) {
+	if (val[1] != mc->hdr.rev) {
 		pr_err("CPU%d update to revision 0x%x failed\n",
-		       cpu_num, mc_intel->hdr.rev);
+		       cpu, mc->hdr.rev);
 		return -1;
 	}
+
 	pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
-		cpu_num, val[1],
-		mc_intel->hdr.date & 0xffff,
-		mc_intel->hdr.date >> 24,
-		(mc_intel->hdr.date >> 16) & 0xff);
+		cpu, val[1],
+		mc->hdr.date & 0xffff,
+		mc->hdr.date >> 24,
+		(mc->hdr.date >> 16) & 0xff);
+
+	c = &cpu_data(cpu);
 
 	uci->cpu_sig.rev = val[1];
 	c->microcode = val[1];

diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index b96896b..2ce1a7d 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c

@@ -49,7 +49,7 @@
 	unsigned long total_size, data_size, ext_table_size;
 	struct microcode_header_intel *mc_header = mc;
 	struct extended_sigtable *ext_header = NULL;
-	int sum, orig_sum, ext_sigcount = 0, i;
+	u32 sum, orig_sum, ext_sigcount = 0, i;
 	struct extended_signature *ext_sig;
 
 	total_size = get_totalsize(mc_header);
@@ -57,69 +57,85 @@
 
 	if (data_size + MC_HEADER_SIZE > total_size) {
 		if (print_err)
-			pr_err("error! Bad data size in microcode data file\n");
+			pr_err("Error: bad microcode data file size.\n");
 		return -EINVAL;
 	}
 
 	if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
 		if (print_err)
-			pr_err("error! Unknown microcode update format\n");
+			pr_err("Error: invalid/unknown microcode update format.\n");
 		return -EINVAL;
 	}
+
 	ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
 	if (ext_table_size) {
+		u32 ext_table_sum = 0;
+		u32 *ext_tablep;
+
 		if ((ext_table_size < EXT_HEADER_SIZE)
 		 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
 			if (print_err)
-				pr_err("error! Small exttable size in microcode data file\n");
+				pr_err("Error: truncated extended signature table.\n");
 			return -EINVAL;
 		}
+
 		ext_header = mc + MC_HEADER_SIZE + data_size;
 		if (ext_table_size != exttable_size(ext_header)) {
 			if (print_err)
-				pr_err("error! Bad exttable size in microcode data file\n");
+				pr_err("Error: extended signature table size mismatch.\n");
 			return -EFAULT;
 		}
+
 		ext_sigcount = ext_header->count;
-	}
 
-	/* check extended table checksum */
-	if (ext_table_size) {
-		int ext_table_sum = 0;
-		int *ext_tablep = (int *)ext_header;
+		/*
+		 * Check extended table checksum: the sum of all dwords that
+		 * comprise a valid table must be 0.
+		 */
+		ext_tablep = (u32 *)ext_header;
 
-		i = ext_table_size / DWSIZE;
+		i = ext_table_size / sizeof(u32);
 		while (i--)
 			ext_table_sum += ext_tablep[i];
+
 		if (ext_table_sum) {
 			if (print_err)
-				pr_warn("aborting, bad extended signature table checksum\n");
+				pr_warn("Bad extended signature table checksum, aborting.\n");
 			return -EINVAL;
 		}
 	}
 
-	/* calculate the checksum */
+	/*
+	 * Calculate the checksum of update data and header. The checksum of
+	 * valid update data and header including the extended signature table
+	 * must be 0.
+	 */
 	orig_sum = 0;
-	i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+	i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
 	while (i--)
-		orig_sum += ((int *)mc)[i];
+		orig_sum += ((u32 *)mc)[i];
+
 	if (orig_sum) {
 		if (print_err)
-			pr_err("aborting, bad checksum\n");
+			pr_err("Bad microcode data checksum, aborting.\n");
 		return -EINVAL;
 	}
+
 	if (!ext_table_size)
 		return 0;
-	/* check extended signature checksum */
+
+	/*
+	 * Check extended signature checksum: 0 => valid.
+	 */
 	for (i = 0; i < ext_sigcount; i++) {
 		ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
 			  EXT_SIGNATURE_SIZE * i;
-		sum = orig_sum
-			- (mc_header->sig + mc_header->pf + mc_header->cksum)
-			+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+
+		sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
+		      (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
 		if (sum) {
 			if (print_err)
-				pr_err("aborting, bad checksum\n");
+				pr_err("Bad extended signature checksum, aborting.\n");
 			return -EINVAL;
 		}
 	}

diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 3f20710..6988c74 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh

@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
 #
 
 IN=$1
@@ -49,8 +49,8 @@
 trap 'rm "$OUT"' EXIT
 
 (
-	echo "#ifndef _ASM_X86_CPUFEATURE_H"
-	echo "#include <asm/cpufeature.h>"
+	echo "#ifndef _ASM_X86_CPUFEATURES_H"
+	echo "#include <asm/cpufeatures.h>"
 	echo "#endif"
 	echo ""
 

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 20e242e..4e7c693 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c

@@ -161,8 +161,8 @@
 	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
 	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
-	printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
-	       ms_hyperv.features, ms_hyperv.hints);
+	pr_info("HyperV: features 0x%x, hints 0x%x\n",
+		ms_hyperv.features, ms_hyperv.hints);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
@@ -174,8 +174,8 @@
 		rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
 		hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
 		lapic_timer_frequency = hv_lapic_frequency;
-		printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
-				lapic_timer_frequency);
+		pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+			lapic_timer_frequency);
 	}
 #endif
 

diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index 316fe3e..3d68993 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c

@@ -103,7 +103,7 @@
 	 */
 	if (type != MTRR_TYPE_WRCOMB &&
 	    (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
-		pr_warning("mtrr: only write-combining%s supported\n",
+		pr_warn("mtrr: only write-combining%s supported\n",
 			   centaur_mcr_type ? " and uncacheable are" : " is");
 		return -EINVAL;
 	}

diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 0d98503..31e951c 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c

@@ -57,9 +57,9 @@
 static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
 
 static int __initdata debug_print;
-#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
 
-#define BIOS_BUG_MSG KERN_WARNING \
+#define BIOS_BUG_MSG \
 	"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
 static int __init
@@ -81,9 +81,9 @@
 						base, base + size);
 	}
 	if (debug_print) {
-		printk(KERN_DEBUG "After WB checking\n");
+		pr_debug("After WB checking\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end);
 	}
 
@@ -101,7 +101,7 @@
 		    (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
 		    (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
 			/* Var MTRR contains UC entry below 1M? Skip it: */
-			printk(BIOS_BUG_MSG, i);
+			pr_warn(BIOS_BUG_MSG, i);
 			if (base + size <= (1<<(20-PAGE_SHIFT)))
 				continue;
 			size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -114,11 +114,11 @@
 				 extra_remove_base + extra_remove_size);
 
 	if  (debug_print) {
-		printk(KERN_DEBUG "After UC checking\n");
+		pr_debug("After UC checking\n");
 		for (i = 0; i < RANGE_NUM; i++) {
 			if (!range[i].end)
 				continue;
-			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end);
 		}
 	}
@@ -126,9 +126,9 @@
 	/* sort the ranges */
 	nr_range = clean_sort_range(range, RANGE_NUM);
 	if  (debug_print) {
-		printk(KERN_DEBUG "After sorting\n");
+		pr_debug("After sorting\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+			pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end);
 	}
 
@@ -544,7 +544,7 @@
 		start_base = to_size_factor(start_base, &start_factor),
 		type = range_state[i].type;
 
-		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+		pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
 			i, start_base, start_factor,
 			size_base, size_factor,
 			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
@@ -713,7 +713,7 @@
 		return 0;
 
 	/* Print original var MTRRs at first, for debugging: */
-	printk(KERN_DEBUG "original variable MTRRs\n");
+	pr_debug("original variable MTRRs\n");
 	print_out_mtrr_range_state();
 
 	memset(range, 0, sizeof(range));
@@ -733,7 +733,7 @@
 					  x_remove_base, x_remove_size);
 
 	range_sums = sum_ranges(range, nr_range);
-	printk(KERN_INFO "total RAM covered: %ldM\n",
+	pr_info("total RAM covered: %ldM\n",
 	       range_sums >> (20 - PAGE_SHIFT));
 
 	if (mtrr_chunk_size && mtrr_gran_size) {
@@ -745,12 +745,11 @@
 
 		if (!result[i].bad) {
 			set_var_mtrr_all(address_bits);
-			printk(KERN_DEBUG "New variable MTRRs\n");
+			pr_debug("New variable MTRRs\n");
 			print_out_mtrr_range_state();
 			return 1;
 		}
-		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
-		       "will find optimal one\n");
+		pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
 	}
 
 	i = 0;
@@ -768,7 +767,7 @@
 				      x_remove_base, x_remove_size, i);
 			if (debug_print) {
 				mtrr_print_out_one_result(i);
-				printk(KERN_INFO "\n");
+				pr_info("\n");
 			}
 
 			i++;
@@ -779,7 +778,7 @@
 	index_good = mtrr_search_optimal_index();
 
 	if (index_good != -1) {
-		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+		pr_info("Found optimal setting for mtrr clean up\n");
 		i = index_good;
 		mtrr_print_out_one_result(i);
 
@@ -790,7 +789,7 @@
 		gran_size <<= 10;
 		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
 		set_var_mtrr_all(address_bits);
-		printk(KERN_DEBUG "New variable MTRRs\n");
+		pr_debug("New variable MTRRs\n");
 		print_out_mtrr_range_state();
 		return 1;
 	} else {
@@ -799,8 +798,8 @@
 			mtrr_print_out_one_result(i);
 	}
 
-	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
-	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+	pr_info("mtrr_cleanup: can not find optimal value\n");
+	pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n");
 
 	return 0;
 }
@@ -918,7 +917,7 @@
 
 	/* kvm/qemu doesn't have mtrr set right, don't trim them all: */
 	if (!highest_pfn) {
-		printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
+		pr_info("CPU MTRRs all blank - virtualized system.\n");
 		return 0;
 	}
 
@@ -973,7 +972,8 @@
 							 end_pfn);
 
 	if (total_trim_size) {
-		pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
+		pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n",
+			total_trim_size >> 20);
 
 		if (!changed_by_mtrr_cleanup)
 			WARN_ON(1);

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index c870af1..fcbcb2f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c

@@ -55,7 +55,7 @@
 
 	rdmsr(MSR_K8_SYSCFG, lo, hi);
 	if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
-		printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
+		pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
 		       " not cleared by BIOS, clearing this bit\n",
 		       smp_processor_id());
 		lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
@@ -501,14 +501,14 @@
 	if (!mask)
 		return;
 	if (mask & MTRR_CHANGE_MASK_FIXED)
-		pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+		pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
 	if (mask & MTRR_CHANGE_MASK_VARIABLE)
-		pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
+		pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n");
 	if (mask & MTRR_CHANGE_MASK_DEFTYPE)
-		pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+		pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
 
-	printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
-	printk(KERN_INFO "mtrr: corrected configuration.\n");
+	pr_info("mtrr: probably your BIOS does not setup all CPUs.\n");
+	pr_info("mtrr: corrected configuration.\n");
 }
 
 /*
@@ -519,8 +519,7 @@
 void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
 {
 	if (wrmsr_safe(msr, a, b) < 0) {
-		printk(KERN_ERR
-			"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+		pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
 			smp_processor_id(), msr, a, b);
 	}
 }
@@ -607,7 +606,7 @@
 		tmp |= ~((1ULL<<(hi - 1)) - 1);
 
 		if (tmp != mask) {
-			printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
+			pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
 			add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 			mask = tmp;
 		}
@@ -858,13 +857,13 @@
 	    boot_cpu_data.x86_model == 1 &&
 	    boot_cpu_data.x86_mask <= 7) {
 		if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
-			pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+			pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
 			return -EINVAL;
 		}
 		if (!(base + size < 0x70000 || base > 0x7003F) &&
 		    (type == MTRR_TYPE_WRCOMB
 		     || type == MTRR_TYPE_WRBACK)) {
-			pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+			pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
 			return -EINVAL;
 		}
 	}
@@ -878,7 +877,7 @@
 	     lbase = lbase >> 1, last = last >> 1)
 		;
 	if (lbase != last) {
-		pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
+		pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
 		return -EINVAL;
 	}
 	return 0;

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 5c3d149..10f8d47 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c

@@ -47,7 +47,7 @@
 #include <linux/smp.h>
 #include <linux/syscore_ops.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
@@ -300,24 +300,24 @@
 		return error;
 
 	if (type >= MTRR_NUM_TYPES) {
-		pr_warning("mtrr: type: %u invalid\n", type);
+		pr_warn("mtrr: type: %u invalid\n", type);
 		return -EINVAL;
 	}
 
 	/* If the type is WC, check that this processor supports it */
 	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
-		pr_warning("mtrr: your processor doesn't support write-combining\n");
+		pr_warn("mtrr: your processor doesn't support write-combining\n");
 		return -ENOSYS;
 	}
 
 	if (!size) {
-		pr_warning("mtrr: zero sized request\n");
+		pr_warn("mtrr: zero sized request\n");
 		return -EINVAL;
 	}
 
 	if ((base | (base + size - 1)) >>
 	    (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
-		pr_warning("mtrr: base or size exceeds the MTRR width\n");
+		pr_warn("mtrr: base or size exceeds the MTRR width\n");
 		return -EINVAL;
 	}
 
@@ -348,7 +348,7 @@
 				} else if (types_compatible(type, ltype))
 					continue;
 			}
-			pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
+			pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
 				" 0x%lx000,0x%lx000\n", base, size, lbase,
 				lsize);
 			goto out;
@@ -357,7 +357,7 @@
 		if (ltype != type) {
 			if (types_compatible(type, ltype))
 				continue;
-			pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+			pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
 				base, size, mtrr_attrib_to_str(ltype),
 				mtrr_attrib_to_str(type));
 			goto out;
@@ -395,7 +395,7 @@
 static int mtrr_check(unsigned long base, unsigned long size)
 {
 	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
-		pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+		pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
 		pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
 		dump_stack();
 		return -1;
@@ -493,16 +493,16 @@
 		}
 	}
 	if (reg >= max) {
-		pr_warning("mtrr: register: %d too big\n", reg);
+		pr_warn("mtrr: register: %d too big\n", reg);
 		goto out;
 	}
 	mtrr_if->get(reg, &lbase, &lsize, &ltype);
 	if (lsize < 1) {
-		pr_warning("mtrr: MTRR %d not used\n", reg);
+		pr_warn("mtrr: MTRR %d not used\n", reg);
 		goto out;
 	}
 	if (mtrr_usage_table[reg] < 1) {
-		pr_warning("mtrr: reg: %d has count=0\n", reg);
+		pr_warn("mtrr: reg: %d has count=0\n", reg);
 		goto out;
 	}
 	if (--mtrr_usage_table[reg] < 1)

diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 819d949..f6f50c4 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c

@@ -51,7 +51,7 @@
 	for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
 		if (!rdrand_long(&tmp)) {
 			clear_cpu_cap(c, X86_FEATURE_RDRAND);
-			printk_once(KERN_WARNING "rdrand: disabled\n");
+			pr_warn_once("rdrand: disabled\n");
 			return;
 		}
 	}

diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 4c60eaf..cd53135 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c

@@ -87,10 +87,10 @@
 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
 
 	if (!printed) {
-		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+		pr_info("CPU: Physical Processor ID: %d\n",
 		       c->phys_proc_id);
 		if (c->x86_max_cores > 1)
-			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+			pr_info("CPU: Processor Core ID: %d\n",
 			       c->cpu_core_id);
 		printed = 1;
 	}

diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 252da7a..3417856 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c

@@ -1,6 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/msr.h>
 #include "cpu.h"
 
@@ -33,7 +33,7 @@
 	if (max >= 0x80860001) {
 		cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
 		if (cpu_rev != 0x02000000) {
-			printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+			pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
 				(cpu_rev >> 24) & 0xff,
 				(cpu_rev >> 16) & 0xff,
 				(cpu_rev >> 8) & 0xff,
@@ -44,10 +44,10 @@
 	if (max >= 0x80860002) {
 		cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
 		if (cpu_rev == 0x02000000) {
-			printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+			pr_info("CPU: Processor revision %08X, %u MHz\n",
 				new_cpu_rev, cpu_freq);
 		}
-		printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+		pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
 		       (cms_rev1 >> 24) & 0xff,
 		       (cms_rev1 >> 16) & 0xff,
 		       (cms_rev1 >> 8) & 0xff,
@@ -76,7 +76,7 @@
 		      (void *)&cpu_info[56],
 		      (void *)&cpu_info[60]);
 		cpu_info[64] = '\0';
-		printk(KERN_INFO "CPU: %s\n", cpu_info);
+		pr_info("CPU: %s\n", cpu_info);
 	}
 
 	/* Unhide possibly hidden capability flags */

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 628a059..364e583 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c

@@ -62,7 +62,7 @@
 	tsc_hz = eax | (((uint64_t)ebx) << 32);
 	do_div(tsc_hz, 1000);
 	BUG_ON(tsc_hz >> 32);
-	printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+	pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
 			 (unsigned long) tsc_hz / 1000,
 			 (unsigned long) tsc_hz % 1000);
 
@@ -84,8 +84,7 @@
 	if (ebx != UINT_MAX)
 		x86_platform.calibrate_tsc = vmware_get_tsc_khz;
 	else
-		printk(KERN_WARNING
-		       "Failed to get TSC freq from the hypervisor\n");
+		pr_warn("Failed to get TSC freq from the hypervisor\n");
 }
 
 /*

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 58f3431..9ef978d 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c

@@ -57,10 +57,9 @@
 	struct kimage *image;
 	/*
 	 * Total number of ram ranges we have after various adjustments for
-	 * GART, crash reserved region etc.
+	 * crash reserved region, etc.
 	 */
 	unsigned int max_nr_ranges;
-	unsigned long gart_start, gart_end;
 
 	/* Pointer to elf header */
 	void *ehdr;
@@ -201,17 +200,6 @@
 	return 0;
 }
 
-static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
-{
-	struct crash_elf_data *ced = arg;
-
-	ced->gart_start = start;
-	ced->gart_end = end;
-
-	/* Not expecting more than 1 gart aperture */
-	return 1;
-}
-
 
 /* Gather all the required information to prepare elf headers for ram regions */
 static void fill_up_crash_elf_data(struct crash_elf_data *ced,
@@ -226,22 +214,6 @@
 
 	ced->max_nr_ranges = nr_ranges;
 
-	/*
-	 * We don't create ELF headers for GART aperture as an attempt
-	 * to dump this memory in second kernel leads to hang/crash.
-	 * If gart aperture is present, one needs to exclude that region
-	 * and that could lead to need of extra phdr.
-	 */
-	walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
-				ced, get_gart_ranges_callback);
-
-	/*
-	 * If we have gart region, excluding that could potentially split
-	 * a memory range, resulting in extra header. Account for  that.
-	 */
-	if (ced->gart_end)
-		ced->max_nr_ranges++;
-
 	/* Exclusion of crash region could split memory ranges */
 	ced->max_nr_ranges++;
 
@@ -350,13 +322,6 @@
 			return ret;
 	}
 
-	/* Exclude GART region */
-	if (ced->gart_end) {
-		ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
-		if (ret)
-			return ret;
-	}
-
 	return ret;
 }
 
@@ -599,12 +564,12 @@
 	/* Add ACPI tables */
 	cmd.type = E820_ACPI;
 	flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-	walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+	walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
 		       memmap_entry_callback);
 
 	/* Add ACPI Non-volatile Storage */
 	cmd.type = E820_NVS;
-	walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+	walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
 			memmap_entry_callback);
 
 	/* Add crashk_low_res region */

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 569c1e4..621b501 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c

@@ -24,6 +24,7 @@
 #include <asm/e820.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
+#include <asm/cpufeature.h>
 
 /*
  * The e820 map is the map that gets modified e.g. with command line parameters
@@ -925,6 +926,41 @@
 	}
 }
 
+static unsigned long e820_type_to_iomem_type(int e820_type)
+{
+	switch (e820_type) {
+	case E820_RESERVED_KERN:
+	case E820_RAM:
+		return IORESOURCE_SYSTEM_RAM;
+	case E820_ACPI:
+	case E820_NVS:
+	case E820_UNUSABLE:
+	case E820_PRAM:
+	case E820_PMEM:
+	default:
+		return IORESOURCE_MEM;
+	}
+}
+
+static unsigned long e820_type_to_iores_desc(int e820_type)
+{
+	switch (e820_type) {
+	case E820_ACPI:
+		return IORES_DESC_ACPI_TABLES;
+	case E820_NVS:
+		return IORES_DESC_ACPI_NV_STORAGE;
+	case E820_PMEM:
+		return IORES_DESC_PERSISTENT_MEMORY;
+	case E820_PRAM:
+		return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
+	case E820_RESERVED_KERN:
+	case E820_RAM:
+	case E820_UNUSABLE:
+	default:
+		return IORES_DESC_NONE;
+	}
+}
+
 static bool do_mark_busy(u32 type, struct resource *res)
 {
 	/* this is the legacy bios/dos rom-shadow + mmio region */
@@ -967,7 +1003,8 @@
 		res->start = e820.map[i].addr;
 		res->end = end;
 
-		res->flags = IORESOURCE_MEM;
+		res->flags = e820_type_to_iomem_type(e820.map[i].type);
+		res->desc = e820_type_to_iores_desc(e820.map[i].type);
 
 		/*
 		 * don't register the region that could be conflicted with

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index d25097c..0b1b9ab 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c

@@ -114,6 +114,10 @@
 	kernel_fpu_disable();
 
 	if (fpu->fpregs_active) {
+		/*
+		 * Ignore return value -- we don't care if reg state
+		 * is clobbered.
+		 */
 		copy_fpregs_to_fpstate(fpu);
 	} else {
 		this_cpu_write(fpu_fpregs_owner_ctx, NULL);
@@ -189,8 +193,12 @@
 
 	preempt_disable();
 	if (fpu->fpregs_active) {
-		if (!copy_fpregs_to_fpstate(fpu))
-			fpregs_deactivate(fpu);
+		if (!copy_fpregs_to_fpstate(fpu)) {
+			if (use_eager_fpu())
+				copy_kernel_to_fpregs(&fpu->state);
+			else
+				fpregs_deactivate(fpu);
+		}
 	}
 	preempt_enable();
 }
@@ -223,14 +231,15 @@
 }
 EXPORT_SYMBOL_GPL(fpstate_init);
 
-/*
- * Copy the current task's FPU state to a new task's FPU context.
- *
- * In both the 'eager' and the 'lazy' case we save hardware registers
- * directly to the destination buffer.
- */
-static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
+	dst_fpu->counter = 0;
+	dst_fpu->fpregs_active = 0;
+	dst_fpu->last_cpu = -1;
+
+	if (!src_fpu->fpstate_active || !cpu_has_fpu)
+		return 0;
+
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
 
 	/*
@@ -243,10 +252,9 @@
 	/*
 	 * Save current FPU registers directly into the child
 	 * FPU context, without any memory-to-memory copying.
-	 *
-	 * If the FPU context got destroyed in the process (FNSAVE
-	 * done on old CPUs) then copy it back into the source
-	 * context and mark the current task for lazy restore.
+	 * In lazy mode, if the FPU context isn't loaded into
+	 * fpregs, CR0.TS will be set and do_device_not_available
+	 * will load the FPU context.
 	 *
 	 * We have to do all this with preemption disabled,
 	 * mostly because of the FNSAVE case, because in that
@@ -259,19 +267,13 @@
 	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
 		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
-		fpregs_deactivate(src_fpu);
+
+		if (use_eager_fpu())
+			copy_kernel_to_fpregs(&src_fpu->state);
+		else
+			fpregs_deactivate(src_fpu);
 	}
 	preempt_enable();
-}
-
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
-{
-	dst_fpu->counter = 0;
-	dst_fpu->fpregs_active = 0;
-	dst_fpu->last_cpu = -1;
-
-	if (src_fpu->fpstate_active && cpu_has_fpu)
-		fpu_copy(dst_fpu, src_fpu);
 
 	return 0;
 }
@@ -409,8 +411,10 @@
 {
 	if (use_xsave())
 		copy_kernel_to_xregs(&init_fpstate.xsave, -1);
-	else
+	else if (static_cpu_has(X86_FEATURE_FXSR))
 		copy_kernel_to_fxregs(&init_fpstate.fxsave);
+	else
+		copy_kernel_to_fregs(&init_fpstate.fsave);
 }
 
 /*
@@ -423,7 +427,7 @@
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 
-	if (!use_eager_fpu()) {
+	if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
 		/* FPU state will be reallocated lazily at the first use. */
 		fpu__drop(fpu);
 	} else {

diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6d9f0a7..54c86ff 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c

@@ -78,13 +78,15 @@
 	cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
 	write_cr0(cr0);
 
-	asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
-		     : "+m" (fsw), "+m" (fcw));
+	if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
+		asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+			     : "+m" (fsw), "+m" (fcw));
 
-	if (fsw == 0 && (fcw & 0x103f) == 0x003f)
-		set_cpu_cap(c, X86_FEATURE_FPU);
-	else
-		clear_cpu_cap(c, X86_FEATURE_FPU);
+		if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+			set_cpu_cap(c, X86_FEATURE_FPU);
+		else
+			clear_cpu_cap(c, X86_FEATURE_FPU);
+	}
 
 #ifndef CONFIG_MATH_EMULATION
 	if (!cpu_has_fpu) {
@@ -132,7 +134,7 @@
 	 * Set up the legacy init FPU context. (xstate init might overwrite this
 	 * with a more modern format, if the CPU supports it.)
 	 */
-	fpstate_init_fxstate(&init_fpstate.fxsave);
+	fpstate_init(&init_fpstate);
 
 	fpu__init_system_mxcsr();
 }
@@ -260,7 +262,10 @@
  * not only saved the restores along the way, but we also have the
  * FPU ready to be used for the original task.
  *
- * 'eager' switching is used on modern CPUs, there we switch the FPU
+ * 'lazy' is deprecated because it's almost never a performance win
+ * and it's much more complicated than 'eager'.
+ *
+ * 'eager' switching is by default on all CPUs, there we switch the FPU
  * state during every context switch, regardless of whether the task
  * has used FPU instructions in that time slice or not. This is done
  * because modern FPU context saving instructions are able to optimize
@@ -271,7 +276,7 @@
  *   to use 'eager' restores, if we detect that a task is using the FPU
  *   frequently. See the fpu->counter logic in fpu/internal.h for that. ]
  */
-static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
+static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
 
 /*
  * Find supported xfeatures based on cpu features and command-line input.
@@ -300,12 +305,6 @@
 static void __init fpu__clear_eager_fpu_features(void)
 {
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
-	setup_clear_cpu_cap(X86_FEATURE_AVX);
-	setup_clear_cpu_cap(X86_FEATURE_AVX2);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
 }
 
 /*
@@ -348,15 +347,9 @@
  */
 static void __init fpu__init_parse_early_param(void)
 {
-	/*
-	 * No need to check "eagerfpu=auto" again, since it is the
-	 * initial default.
-	 */
 	if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
 		eagerfpu = DISABLE;
 		fpu__clear_eager_fpu_features();
-	} else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) {
-		eagerfpu = ENABLE;
 	}
 
 	if (cmdline_find_option_bool(boot_command_line, "no387"))

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d425cda5..6e8354f 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c

@@ -51,6 +51,9 @@
 	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
 	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
 	setup_clear_cpu_cap(X86_FEATURE_MPX);
 	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
 }

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 29408d6..702547c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c

@@ -81,9 +81,9 @@
 static unsigned long text_ip_addr(unsigned long ip)
 {
 	/*
-	 * On x86_64, kernel text mappings are mapped read-only with
-	 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
-	 * of the kernel text mapping to modify the kernel text.
+	 * On x86_64, kernel text mappings are mapped read-only, so we use
+	 * the kernel identity mapping instead of the kernel text mapping
+	 * to modify the kernel text.
 	 *
 	 * For 32bit kernels, these mappings are same and we can use
 	 * kernel identity mapping to modify code.
@@ -697,9 +697,8 @@
 #endif
 
 /* Defined as markers to the end of the ftrace default trampolines */
-extern void ftrace_caller_end(void);
 extern void ftrace_regs_caller_end(void);
-extern void ftrace_return(void);
+extern void ftrace_epilogue(void);
 extern void ftrace_caller_op_ptr(void);
 extern void ftrace_regs_caller_op_ptr(void);
 
@@ -746,7 +745,7 @@
 		op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
 	} else {
 		start_offset = (unsigned long)ftrace_caller;
-		end_offset = (unsigned long)ftrace_caller_end;
+		end_offset = (unsigned long)ftrace_epilogue;
 		op_offset = (unsigned long)ftrace_caller_op_ptr;
 	}
 
@@ -754,7 +753,7 @@
 
 	/*
 	 * Allocate enough size to store the ftrace_caller code,
-	 * the jmp to ftrace_return, as well as the address of
+	 * the jmp to ftrace_epilogue, as well as the address of
 	 * the ftrace_ops this trampoline is used for.
 	 */
 	trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
@@ -772,8 +771,8 @@
 
 	ip = (unsigned long)trampoline + size;
 
-	/* The trampoline ends with a jmp to ftrace_return */
-	jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
+	/* The trampoline ends with a jmp to ftrace_epilogue */
+	jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue);
 	memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
 
 	/*

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2c0f340..1f4422d 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c

@@ -40,13 +40,8 @@
 /* Wipe all early page tables except for the kernel symbol map */
 static void __init reset_early_page_tables(void)
 {
-	unsigned long i;
-
-	for (i = 0; i < PTRS_PER_PGD-1; i++)
-		early_level4_pgt[i].pgd = 0;
-
+	memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
 	next_early_pgt = 0;
-
 	write_cr3(__pa_nodebug(early_level4_pgt));
 }
 
@@ -54,7 +49,6 @@
 int __init early_make_pgtable(unsigned long address)
 {
 	unsigned long physaddr = address - __PAGE_OFFSET;
-	unsigned long i;
 	pgdval_t pgd, *pgd_p;
 	pudval_t pud, *pud_p;
 	pmdval_t pmd, *pmd_p;
@@ -81,8 +75,7 @@
 		}
 
 		pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
-		for (i = 0; i < PTRS_PER_PUD; i++)
-			pud_p[i] = 0;
+		memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
 		*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
 	}
 	pud_p += pud_index(address);
@@ -97,8 +90,7 @@
 		}
 
 		pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
-		for (i = 0; i < PTRS_PER_PMD; i++)
-			pmd_p[i] = 0;
+		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
 		*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
 	}
 	pmd = (physaddr & PMD_MASK) + early_pmd_flags;

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6bc9ae2..54cdbd2 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S

@@ -19,7 +19,7 @@
 #include <asm/setup.h>
 #include <asm/processor-flags.h>
 #include <asm/msr-index.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
 #include <asm/bootparam.h>
@@ -389,6 +389,12 @@
 	/* Make changes effective */
 	wrmsr
 
+	/*
+	 * And make sure that all the mappings we set up have NX set from
+	 * the beginning.
+	 */
+	orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4)
+
 enable_paging:
 
 /*

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e8..22fbf9d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S

@@ -38,7 +38,6 @@
 #define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
 L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
-L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
 L4_START_KERNEL = pgd_index(__START_KERNEL_map)
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
@@ -76,9 +75,7 @@
 	subq	$_text - __START_KERNEL_map, %rbp
 
 	/* Is the address not 2M aligned? */
-	movq	%rbp, %rax
-	andl	$~PMD_PAGE_MASK, %eax
-	testl	%eax, %eax
+	testl	$~PMD_PAGE_MASK, %ebp
 	jnz	bad_address
 
 	/*

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b8e6ff5..be0ebbb 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c

@@ -12,6 +12,7 @@
 #include <linux/pm.h>
 #include <linux/io.h>
 
+#include <asm/cpufeature.h>
 #include <asm/irqdomain.h>
 #include <asm/fixmap.h>
 #include <asm/hpet.h>

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 44256a6..ed15cd48 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c

@@ -750,9 +750,7 @@
 int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
 	int err;
-#ifdef CONFIG_DEBUG_RODATA
 	char opc[BREAK_INSTR_SIZE];
-#endif /* CONFIG_DEBUG_RODATA */
 
 	bpt->type = BP_BREAKPOINT;
 	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -761,7 +759,6 @@
 		return err;
 	err = probe_kernel_write((char *)bpt->bpt_addr,
 				 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
-#ifdef CONFIG_DEBUG_RODATA
 	if (!err)
 		return err;
 	/*
@@ -778,13 +775,12 @@
 	if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
 		return -EINVAL;
 	bpt->type = BP_POKE_BREAKPOINT;
-#endif /* CONFIG_DEBUG_RODATA */
+
 	return err;
 }
 
 int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 {
-#ifdef CONFIG_DEBUG_RODATA
 	int err;
 	char opc[BREAK_INSTR_SIZE];
 
@@ -801,8 +797,8 @@
 	if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
 		goto knl_write;
 	return err;
+
 knl_write:
-#endif /* CONFIG_DEBUG_RODATA */
 	return probe_kernel_write((char *)bpt->bpt_addr,
 				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
 }

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 1deffe6..0f05dee 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c

@@ -988,7 +988,7 @@
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		if (fixup_exception(regs))
+		if (fixup_exception(regs, trapnr))
 			return 1;
 
 		/*

diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 87e1762..ed48a9f 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S

@@ -168,12 +168,14 @@
 	restore_mcount_regs
 
 	/*
-	 * The copied trampoline must call ftrace_return as it
+	 * The copied trampoline must call ftrace_epilogue as it
 	 * still may need to call the function graph tracer.
+	 *
+	 * The code up to this label is copied into trampolines so
+	 * think twice before adding any new code or changing the
+	 * layout here.
 	 */
-GLOBAL(ftrace_caller_end)
-
-GLOBAL(ftrace_return)
+GLOBAL(ftrace_epilogue)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 GLOBAL(ftrace_graph_call)
@@ -244,14 +246,14 @@
 	popfq
 
 	/*
-	 * As this jmp to ftrace_return can be a short jump
+	 * As this jmp to ftrace_epilogue can be a short jump
 	 * it must not be copied into the trampoline.
 	 * The trampoline will add the code to jump
 	 * to the return.
 	 */
 GLOBAL(ftrace_regs_caller_end)
 
-	jmp ftrace_return
+	jmp ftrace_epilogue
 
 END(ftrace_regs_caller)
 

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 30ca760..97340f2 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c

@@ -408,7 +408,7 @@
 	processor.cpuflag = CPU_ENABLED;
 	processor.cpufeature = (boot_cpu_data.x86 << 8) |
 	    (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-	processor.featureflag = boot_cpu_data.x86_capability[0];
+	processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
 	processor.reserved[0] = 0;
 	processor.reserved[1] = 0;
 	for (i = 0; i < 2; i++) {

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 64f9616..7f3550a 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c

@@ -40,7 +40,7 @@
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/msr.h>
 
 static struct class *msr_class;

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8a2cdd7..04b132a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c

@@ -30,6 +30,7 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
+#include <asm/cache.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/nmi.h>
@@ -69,7 +70,7 @@
 
 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
 
-static int ignore_nmis;
+static int ignore_nmis __read_mostly;
 
 int unknown_nmi_panic;
 /*

diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 14415af..92f7014 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c

@@ -13,11 +13,11 @@
 
 static __init int register_e820_pmem(void)
 {
-	char *pmem = "Persistent Memory (legacy)";
 	struct platform_device *pdev;
 	int rc;
 
-	rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found);
+	rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
+				 IORESOURCE_MEM, 0, -1, NULL, found);
 	if (rc <= 0)
 		return 0;
 

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9f7c21c..2915d54 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c

@@ -57,6 +57,9 @@
 	  */
 	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
+#ifdef CONFIG_X86_32
+	.SYSENTER_stack_canary	= STACK_END_MAGIC,
+#endif
 };
 EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
@@ -418,9 +421,9 @@
 	if (!current_set_polling_and_test()) {
 		trace_cpu_idle_rcuidle(1, smp_processor_id());
 		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
-			smp_mb(); /* quirk */
+			mb(); /* quirk */
 			clflush((void *)&current_thread_info()->flags);
-			smp_mb(); /* quirk */
+			mb(); /* quirk */
 		}
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d3d80e6..aa52c10 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c

@@ -152,21 +152,21 @@
 	.name	= "Kernel data",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
 	.name	= "Kernel code",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
 	.name	= "Kernel bss",
 	.start	= 0,
 	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cb6282c..548ddf7 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c

@@ -61,7 +61,38 @@
 	regs->seg = GET_SEG(seg) | 3;			\
 } while (0)
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+#ifdef CONFIG_X86_64
+/*
+ * If regs->ss will cause an IRET fault, change it.  Otherwise leave it
+ * alone.  Using this generally makes no sense unless
+ * user_64bit_mode(regs) would return true.
+ */
+static void force_valid_ss(struct pt_regs *regs)
+{
+	u32 ar;
+	asm volatile ("lar %[old_ss], %[ar]\n\t"
+		      "jz 1f\n\t"		/* If invalid: */
+		      "xorl %[ar], %[ar]\n\t"	/* set ar = 0 */
+		      "1:"
+		      : [ar] "=r" (ar)
+		      : [old_ss] "rm" ((u16)regs->ss));
+
+	/*
+	 * For a valid 64-bit user context, we need DPL 3, type
+	 * read-write data or read-write exp-down data, and S and P
+	 * set.  We can't use VERW because VERW doesn't check the
+	 * P bit.
+	 */
+	ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
+	if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
+	    ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
+		regs->ss = __USER_DS;
+}
+#endif
+
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc,
+			      unsigned long uc_flags)
 {
 	unsigned long buf_val;
 	void __user *buf;
@@ -94,15 +125,18 @@
 		COPY(r15);
 #endif /* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
 		COPY_SEG_CPL3(cs);
 		COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-		/* Kernel saves and restores only the CS segment register on signals,
-		 * which is the bare minimum needed to allow mixed 32/64-bit code.
-		 * App's signal handler can save/restore other segments if needed. */
-		COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+		/*
+		 * Fix up SS if needed for the benefit of old DOSEMU and
+		 * CRIU.
+		 */
+		if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
+			     user_64bit_mode(regs)))
+			force_valid_ss(regs);
+#endif
 
 		get_user_ex(tmpflags, &sc->flags);
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -165,6 +199,7 @@
 		put_user_ex(regs->cs, &sc->cs);
 		put_user_ex(0, &sc->gs);
 		put_user_ex(0, &sc->fs);
+		put_user_ex(regs->ss, &sc->ss);
 #endif /* CONFIG_X86_32 */
 
 		put_user_ex(fpstate, &sc->fpstate);
@@ -403,6 +438,21 @@
 	return 0;
 }
 #else /* !CONFIG_X86_32 */
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+	unsigned long flags;
+
+	if (cpu_has_xsave)
+		flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+	else
+		flags = UC_SIGCONTEXT_SS;
+
+	if (likely(user_64bit_mode(regs)))
+		flags |= UC_STRICT_RESTORE_SS;
+
+	return flags;
+}
+
 static int __setup_rt_frame(int sig, struct ksignal *ksig,
 			    sigset_t *set, struct pt_regs *regs)
 {
@@ -422,10 +472,7 @@
 
 	put_user_try {
 		/* Create the ucontext.  */
-		if (cpu_has_xsave)
-			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-		else
-			put_user_ex(0, &frame->uc.uc_flags);
+		put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
 		save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
@@ -459,10 +506,28 @@
 
 	regs->sp = (unsigned long)frame;
 
-	/* Set up the CS register to run signal handlers in 64-bit mode,
-	   even if the handler happens to be interrupting 32-bit code. */
+	/*
+	 * Set up the CS and SS registers to run signal handlers in
+	 * 64-bit mode, even if the handler happens to be interrupting
+	 * 32-bit or 16-bit code.
+	 *
+	 * SS is subtle.  In 64-bit mode, we don't need any particular
+	 * SS descriptor, but we do need SS to be valid.  It's possible
+	 * that the old SS is entirely bogus -- this can happen if the
+	 * signal we're trying to deliver is #GP or #SS caused by a bad
+	 * SS value.  We also have a compatbility issue here: DOSEMU
+	 * relies on the contents of the SS register indicating the
+	 * SS value at the time of the signal, even though that code in
+	 * DOSEMU predates sigreturn's ability to restore SS.  (DOSEMU
+	 * avoids relying on sigreturn to restore SS; instead it uses
+	 * a trampoline.)  So we do our best: if the old SS was valid,
+	 * we keep it.  Otherwise we replace it.
+	 */
 	regs->cs = __USER_CS;
 
+	if (unlikely(regs->ss != __USER_DS))
+		force_valid_ss(regs);
+
 	return 0;
 }
 #endif /* CONFIG_X86_32 */
@@ -489,10 +554,7 @@
 
 	put_user_try {
 		/* Create the ucontext.  */
-		if (cpu_has_xsave)
-			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-		else
-			put_user_ex(0, &frame->uc.uc_flags);
+		put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
 		compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 		put_user_ex(0, &frame->uc.uc__pad0);
@@ -554,7 +616,11 @@
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->sc))
+	/*
+	 * x86_32 has no uc_flags bits relevant to restore_sigcontext.
+	 * Save a few cycles by skipping the __get_user.
+	 */
+	if (restore_sigcontext(regs, &frame->sc, 0))
 		goto badframe;
 	return regs->ax;
 
@@ -570,16 +636,19 @@
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
 	sigset_t set;
+	unsigned long uc_flags;
 
 	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
+	if (__get_user(uc_flags, &frame->uc.uc_flags))
+		goto badframe;
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
 		goto badframe;
 
 	if (restore_altstack(&frame->uc.uc_stack))
@@ -692,12 +761,15 @@
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_64
+	if (is_ia32_task())
+		return __NR_ia32_restart_syscall;
+#endif
+#ifdef CONFIG_X86_X32_ABI
+	return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#else
 	return __NR_restart_syscall;
-#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
-	return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
-		__NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
-#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+#endif
 }
 
 /*
@@ -763,6 +835,7 @@
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe_x32 __user *frame;
 	sigset_t set;
+	unsigned long uc_flags;
 
 	frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -770,10 +843,12 @@
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
+	if (__get_user(uc_flags, &frame->uc.uc_flags))
+		goto badframe;
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
 		goto badframe;
 
 	if (compat_restore_altstack(&frame->uc.uc_stack))

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 24d57f7..643dbdc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c

@@ -97,6 +97,14 @@
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
+/* Logical package management. We might want to allocate that dynamically */
+static int *physical_to_logical_pkg __read_mostly;
+static unsigned long *physical_package_map __read_mostly;;
+static unsigned long *logical_package_map  __read_mostly;
+static unsigned int max_physical_pkg_id __read_mostly;
+unsigned int __max_logical_packages __read_mostly;
+EXPORT_SYMBOL(__max_logical_packages);
+
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
 	unsigned long flags;
@@ -248,7 +256,98 @@
 	x86_cpuinit.setup_percpu_clockev();
 
 	wmb();
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+int topology_update_package_map(unsigned int apicid, unsigned int cpu)
+{
+	unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+	/* Called from early boot ? */
+	if (!physical_package_map)
+		return 0;
+
+	if (pkg >= max_physical_pkg_id)
+		return -EINVAL;
+
+	/* Set the logical package id */
+	if (test_and_set_bit(pkg, physical_package_map))
+		goto found;
+
+	if (pkg < __max_logical_packages) {
+		set_bit(pkg, logical_package_map);
+		physical_to_logical_pkg[pkg] = pkg;
+		goto found;
+	}
+	new = find_first_zero_bit(logical_package_map, __max_logical_packages);
+	if (new >= __max_logical_packages) {
+		physical_to_logical_pkg[pkg] = -1;
+		pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+			apicid, pkg);
+		return -ENOSPC;
+	}
+	set_bit(new, logical_package_map);
+	pr_info("APIC(%x) Converting physical %u to logical package %u\n",
+		apicid, pkg, new);
+	physical_to_logical_pkg[pkg] = new;
+
+found:
+	cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+	return 0;
+}
+
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+	if (phys_pkg >= max_physical_pkg_id)
+		return -1;
+	return physical_to_logical_pkg[phys_pkg];
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+static void __init smp_init_package_map(void)
+{
+	unsigned int ncpus, cpu;
+	size_t size;
+
+	/*
+	 * Today neither Intel nor AMD support heterogenous systems. That
+	 * might change in the future....
+	 */
+	ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
+	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+
+	/*
+	 * Possibly larger than what we need as the number of apic ids per
+	 * package can be smaller than the actual used apic ids.
+	 */
+	max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
+	size = max_physical_pkg_id * sizeof(unsigned int);
+	physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
+	memset(physical_to_logical_pkg, 0xff, size);
+	size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
+	physical_package_map = kzalloc(size, GFP_KERNEL);
+	size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
+	logical_package_map = kzalloc(size, GFP_KERNEL);
+
+	pr_info("Max logical packages: %u\n", __max_logical_packages);
+
+	for_each_present_cpu(cpu) {
+		unsigned int apicid = apic->cpu_present_to_apicid(cpu);
+
+		if (apicid == BAD_APICID || !apic->apic_id_valid(apicid))
+			continue;
+		if (!topology_update_package_map(apicid, cpu))
+			continue;
+		pr_warn("CPU %u APICId %x disabled\n", cpu, apicid);
+		per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID;
+		set_cpu_possible(cpu, false);
+		set_cpu_present(cpu, false);
+	}
 }
 
 void __init smp_store_boot_cpu_info(void)
@@ -258,6 +357,7 @@
 
 	*c = boot_cpu_data;
 	c->cpu_index = id;
+	smp_init_package_map();
 }
 
 /*

diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 3f92ce0..27538f1 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c

@@ -142,7 +142,6 @@
 	 * by the error message
 	 */
 
-#ifdef CONFIG_DEBUG_RODATA
 	/* Test 3: Check if the .rodata section is executable */
 	if (rodata_test_data != 0xC3) {
 		printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
@@ -151,7 +150,6 @@
 		printk(KERN_ERR "test_nx: .rodata section is executable\n");
 		ret = -ENODEV;
 	}
-#endif
 
 #if 0
 	/* Test 4: Check if the .data section of a module is executable */

diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index 5ecbfe5..cb4a01b 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c

@@ -76,5 +76,5 @@
 }
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
+MODULE_DESCRIPTION("Testcase for marking rodata as read-only");
 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ade185a..06cbe25 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c

@@ -83,32 +83,18 @@
 DECLARE_BITMAP(used_vectors, NR_VECTORS);
 EXPORT_SYMBOL_GPL(used_vectors);
 
-static inline void conditional_sti(struct pt_regs *regs)
+static inline void cond_local_irq_enable(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 }
 
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
-	preempt_count_inc();
-	if (regs->flags & X86_EFLAGS_IF)
-		local_irq_enable();
-}
-
-static inline void conditional_cli(struct pt_regs *regs)
+static inline void cond_local_irq_disable(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
 		local_irq_disable();
 }
 
-static inline void preempt_conditional_cli(struct pt_regs *regs)
-{
-	if (regs->flags & X86_EFLAGS_IF)
-		local_irq_disable();
-	preempt_count_dec();
-}
-
 void ist_enter(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
@@ -199,7 +185,7 @@
 	}
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs)) {
+		if (!fixup_exception(regs, trapnr)) {
 			tsk->thread.error_code = error_code;
 			tsk->thread.trap_nr = trapnr;
 			die(str, regs, error_code);
@@ -262,7 +248,6 @@
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_nr = trapnr;
 
-#ifdef CONFIG_X86_64
 	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 	    printk_ratelimit()) {
 		pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
@@ -271,7 +256,6 @@
 		print_vma_addr(" in ", regs->ip);
 		pr_cont("\n");
 	}
-#endif
 
 	force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
 }
@@ -286,7 +270,7 @@
 
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
 			NOTIFY_STOP) {
-		conditional_sti(regs);
+		cond_local_irq_enable(regs);
 		do_trap(trapnr, signr, str, regs, error_code,
 			fill_trap_info(regs, signr, trapnr, &info));
 	}
@@ -368,7 +352,7 @@
 	if (notify_die(DIE_TRAP, "bounds", regs, error_code,
 			X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
 		return;
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 
 	if (!user_mode(regs))
 		die("bounds", regs, error_code);
@@ -443,7 +427,7 @@
 	struct task_struct *tsk;
 
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 
 	if (v8086_mode(regs)) {
 		local_irq_enable();
@@ -453,7 +437,7 @@
 
 	tsk = current;
 	if (!user_mode(regs)) {
-		if (fixup_exception(regs))
+		if (fixup_exception(regs, X86_TRAP_GP))
 			return;
 
 		tsk->thread.error_code = error_code;
@@ -517,9 +501,11 @@
 	 * as we may switch to the interrupt stack.
 	 */
 	debug_stack_usage_inc();
-	preempt_conditional_sti(regs);
+	preempt_disable();
+	cond_local_irq_enable(regs);
 	do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
-	preempt_conditional_cli(regs);
+	cond_local_irq_disable(regs);
+	preempt_enable_no_resched();
 	debug_stack_usage_dec();
 exit:
 	ist_exit(regs);
@@ -571,6 +557,29 @@
 NOKPROBE_SYMBOL(fixup_bad_iret);
 #endif
 
+static bool is_sysenter_singlestep(struct pt_regs *regs)
+{
+	/*
+	 * We don't try for precision here.  If we're anywhere in the region of
+	 * code that can be single-stepped in the SYSENTER entry path, then
+	 * assume that this is a useless single-step trap due to SYSENTER
+	 * being invoked with TF set.  (We don't know in advance exactly
+	 * which instructions will be hit because BTF could plausibly
+	 * be set.)
+	 */
+#ifdef CONFIG_X86_32
+	return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
+		(unsigned long)__end_SYSENTER_singlestep_region -
+		(unsigned long)__begin_SYSENTER_singlestep_region;
+#elif defined(CONFIG_IA32_EMULATION)
+	return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
+		(unsigned long)__end_entry_SYSENTER_compat -
+		(unsigned long)entry_SYSENTER_compat;
+#else
+	return false;
+#endif
+}
+
 /*
  * Our handling of the processor debug registers is non-trivial.
  * We do not clear them on entry and exit from the kernel. Therefore
@@ -605,11 +614,42 @@
 	ist_enter(regs);
 
 	get_debugreg(dr6, 6);
+	/*
+	 * The Intel SDM says:
+	 *
+	 *   Certain debug exceptions may clear bits 0-3. The remaining
+	 *   contents of the DR6 register are never cleared by the
+	 *   processor. To avoid confusion in identifying debug
+	 *   exceptions, debug handlers should clear the register before
+	 *   returning to the interrupted task.
+	 *
+	 * Keep it simple: clear DR6 immediately.
+	 */
+	set_debugreg(0, 6);
 
 	/* Filter out all the reserved bits which are preset to 1 */
 	dr6 &= ~DR6_RESERVED;
 
 	/*
+	 * The SDM says "The processor clears the BTF flag when it
+	 * generates a debug exception."  Clear TIF_BLOCKSTEP to keep
+	 * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+	 */
+	clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
+
+	if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
+		     is_sysenter_singlestep(regs))) {
+		dr6 &= ~DR_STEP;
+		if (!dr6)
+			goto exit;
+		/*
+		 * else we might have gotten a single-step trap and hit a
+		 * watchpoint at the same time, in which case we should fall
+		 * through and handle the watchpoint.
+		 */
+	}
+
+	/*
 	 * If dr6 has no reason to give us about the origin of this trap,
 	 * then it's very likely the result of an icebp/int01 trap.
 	 * User wants a sigtrap for that.
@@ -617,18 +657,10 @@
 	if (!dr6 && user_mode(regs))
 		user_icebp = 1;
 
-	/* Catch kmemcheck conditions first of all! */
+	/* Catch kmemcheck conditions! */
 	if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
 		goto exit;
 
-	/* DR6 may or may not be cleared by the CPU */
-	set_debugreg(0, 6);
-
-	/*
-	 * The processor cleared BTF, so don't mark that we need it set.
-	 */
-	clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
-
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
 
@@ -648,24 +680,25 @@
 	debug_stack_usage_inc();
 
 	/* It's safe to allow irq's after DR6 has been saved */
-	preempt_conditional_sti(regs);
+	preempt_disable();
+	cond_local_irq_enable(regs);
 
 	if (v8086_mode(regs)) {
 		handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
 					X86_TRAP_DB);
-		preempt_conditional_cli(regs);
+		cond_local_irq_disable(regs);
+		preempt_enable_no_resched();
 		debug_stack_usage_dec();
 		goto exit;
 	}
 
-	/*
-	 * Single-stepping through system calls: ignore any exceptions in
-	 * kernel space, but re-enable TF when returning to user mode.
-	 *
-	 * We already checked v86 mode above, so we can check for kernel mode
-	 * by just checking the CPL of CS.
-	 */
-	if ((dr6 & DR_STEP) && !user_mode(regs)) {
+	if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
+		/*
+		 * Historical junk that used to handle SYSENTER single-stepping.
+		 * This should be unreachable now.  If we survive for a while
+		 * without anyone hitting this warning, we'll turn this into
+		 * an oops.
+		 */
 		tsk->thread.debugreg6 &= ~DR_STEP;
 		set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 		regs->flags &= ~X86_EFLAGS_TF;
@@ -673,10 +706,19 @@
 	si_code = get_si_code(tsk->thread.debugreg6);
 	if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
 		send_sigtrap(tsk, regs, error_code, si_code);
-	preempt_conditional_cli(regs);
+	cond_local_irq_disable(regs);
+	preempt_enable_no_resched();
 	debug_stack_usage_dec();
 
 exit:
+#if defined(CONFIG_X86_32)
+	/*
+	 * This is the most likely code path that involves non-trivial use
+	 * of the SYSENTER stack.  Check that we haven't overrun it.
+	 */
+	WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+	     "Overran or corrupted SYSENTER stack\n");
+#endif
 	ist_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug);
@@ -696,10 +738,10 @@
 
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
 		return;
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs)) {
+		if (!fixup_exception(regs, trapnr)) {
 			task->thread.error_code = error_code;
 			task->thread.trap_nr = trapnr;
 			die(str, regs, error_code);
@@ -743,20 +785,19 @@
 dotraplinkage void
 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 {
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 }
 
 dotraplinkage void
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
-	BUG_ON(use_eager_fpu());
 
 #ifdef CONFIG_MATH_EMULATION
-	if (read_cr0() & X86_CR0_EM) {
+	if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
 		struct math_emu_info info = { };
 
-		conditional_sti(regs);
+		cond_local_irq_enable(regs);
 
 		info.regs = regs;
 		math_emulate(&info);
@@ -765,7 +806,7 @@
 #endif
 	fpu__restore(&current->thread.fpu); /* interrupts still off */
 #ifdef CONFIG_X86_32
-	conditional_sti(regs);
+	cond_local_irq_enable(regs);
 #endif
 }
 NOKPROBE_SYMBOL(do_device_not_available);
@@ -868,7 +909,7 @@
 #endif
 
 #ifdef CONFIG_X86_32
-	set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
+	set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
 	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
 #endif
 

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3d743da..5638044 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c

@@ -43,6 +43,11 @@
 
 int tsc_clocksource_reliable;
 
+static u32 art_to_tsc_numerator;
+static u32 art_to_tsc_denominator;
+static u64 art_to_tsc_offset;
+struct clocksource *art_related_clocksource;
+
 /*
  * Use a ring-buffer like data structure, where a writer advances the head by
  * writing a new data entry and a reader advances the tail when it observes a
@@ -964,6 +969,37 @@
 
 #endif /* CONFIG_CPU_FREQ */
 
+#define ART_CPUID_LEAF (0x15)
+#define ART_MIN_DENOMINATOR (1)
+
+
+/*
+ * If ART is present detect the numerator:denominator to convert to TSC
+ */
+static void detect_art(void)
+{
+	unsigned int unused[2];
+
+	if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
+		return;
+
+	cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
+	      &art_to_tsc_numerator, unused, unused+1);
+
+	/* Don't enable ART in a VM, non-stop TSC required */
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
+	    !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
+	    art_to_tsc_denominator < ART_MIN_DENOMINATOR)
+		return;
+
+	if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset))
+		return;
+
+	/* Make this sticky over multiple CPU init calls */
+	setup_force_cpu_cap(X86_FEATURE_ART);
+}
+
+
 /* clocksource code */
 
 static struct clocksource clocksource_tsc;
@@ -1071,6 +1107,25 @@
 	return 0;
 }
 
+/*
+ * Convert ART to TSC given numerator/denominator found in detect_art()
+ */
+struct system_counterval_t convert_art_to_tsc(cycle_t art)
+{
+	u64 tmp, res, rem;
+
+	rem = do_div(art, art_to_tsc_denominator);
+
+	res = art * art_to_tsc_numerator;
+	tmp = rem * art_to_tsc_numerator;
+
+	do_div(tmp, art_to_tsc_denominator);
+	res += tmp + art_to_tsc_offset;
+
+	return (struct system_counterval_t) {.cs = art_related_clocksource,
+			.cycles = res};
+}
+EXPORT_SYMBOL(convert_art_to_tsc);
 
 static void tsc_refine_calibration_work(struct work_struct *work);
 static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
@@ -1142,6 +1197,8 @@
 		(unsigned long)tsc_khz % 1000);
 
 out:
+	if (boot_cpu_has(X86_FEATURE_ART))
+		art_related_clocksource = &clocksource_tsc;
 	clocksource_register_khz(&clocksource_tsc, tsc_khz);
 }
 
@@ -1235,6 +1292,8 @@
 		mark_tsc_unstable("TSCs unsynchronized");
 
 	check_system_tsc_reliable();
+
+	detect_art();
 }
 
 #ifdef CONFIG_SMP
@@ -1246,14 +1305,14 @@
  */
 unsigned long calibrate_delay_is_known(void)
 {
-	int i, cpu = smp_processor_id();
+	int sibling, cpu = smp_processor_id();
 
 	if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
 		return 0;
 
-	for_each_online_cpu(i)
-		if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
-			return cpu_data(i).loops_per_jiffy;
+	sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+	if (sibling < nr_cpu_ids)
+		return cpu_data(sibling).loops_per_jiffy;
 	return 0;
 }
 #endif

diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 07efb35..014ea59 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S

@@ -30,7 +30,7 @@
  * 	appropriately. Either display a message or halt.
  */
 
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
 verify_cpu:

diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b85..3dce1ca 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c

@@ -362,7 +362,7 @@
 	/* make room for real-mode segments */
 	tsk->thread.sp0 += 16;
 
-	if (static_cpu_has_safe(X86_FEATURE_SEP))
+	if (static_cpu_has(X86_FEATURE_SEP))
 		tsk->thread.sysenter_cs = 0;
 
 	load_sp0(tss, &tsk->thread);

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 74e4bf1..5af9958 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S

@@ -41,29 +41,28 @@
 jiffies_64 = jiffies;
 #endif
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
 /*
- * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
- * we retain large page mappings for boundaries spanning kernel text, rodata
- * and data sections.
+ * On 64-bit, align RODATA to 2MB so we retain large page mappings for
+ * boundaries spanning kernel text, rodata and data sections.
  *
  * However, kernel identity mappings will have different RWX permissions
  * to the pages mapping to text and to the pages padding (which are freed) the
  * text section. Hence kernel identity mappings will be broken to smaller
  * pages. For 64-bit, kernel text and kernel identity mappings are different,
- * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
- * as well as retain 2MB large page mappings for kernel text.
+ * so we can enable protection checks as well as retain 2MB large page
+ * mappings for kernel text.
  */
-#define X64_ALIGN_DEBUG_RODATA_BEGIN	. = ALIGN(HPAGE_SIZE);
+#define X64_ALIGN_RODATA_BEGIN	. = ALIGN(HPAGE_SIZE);
 
-#define X64_ALIGN_DEBUG_RODATA_END				\
+#define X64_ALIGN_RODATA_END					\
 		. = ALIGN(HPAGE_SIZE);				\
 		__end_rodata_hpage_align = .;
 
 #else
 
-#define X64_ALIGN_DEBUG_RODATA_BEGIN
-#define X64_ALIGN_DEBUG_RODATA_END
+#define X64_ALIGN_RODATA_BEGIN
+#define X64_ALIGN_RODATA_END
 
 #endif
 
@@ -112,13 +111,11 @@
 
 	EXCEPTION_TABLE(16) :text = 0x9090
 
-#if defined(CONFIG_DEBUG_RODATA)
 	/* .text should occupy whole number of pages */
 	. = ALIGN(PAGE_SIZE);
-#endif
-	X64_ALIGN_DEBUG_RODATA_BEGIN
+	X64_ALIGN_RODATA_BEGIN
 	RO_DATA(PAGE_SIZE)
-	X64_ALIGN_DEBUG_RODATA_END
+	X64_ALIGN_RODATA_END
 
 	/* Data */
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -195,6 +192,17 @@
 	:init
 #endif
 
+	/*
+	 * Section for code used exclusively before alternatives are run. All
+	 * references to such code must be patched out by alternatives, normally
+	 * by using X86_FEATURE_ALWAYS CPU feature bit.
+	 *
+	 * See static_cpu_has() for an example.
+	 */
+	.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
+		*(.altinstr_aux)
+	}
+
 	INIT_DATA_SECTION(16)
 
 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {

diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index a0695be..cd05942 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c

@@ -37,6 +37,8 @@
 EXPORT_SYMBOL(_copy_from_user);
 EXPORT_SYMBOL(_copy_to_user);
 
+EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1505587..b9b09fe 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c

@@ -650,10 +650,10 @@
 	u16 sel;
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
-	*linear = la;
 	*max_size = 0;
 	switch (mode) {
 	case X86EMUL_MODE_PROT64:
+		*linear = la;
 		if (is_noncanonical_address(la))
 			goto bad;
 
@@ -662,6 +662,7 @@
 			goto bad;
 		break;
 	default:
+		*linear = la = (u32)la;
 		usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
 						addr.seg);
 		if (!usable)
@@ -689,7 +690,6 @@
 			if (size > *max_size)
 				goto bad;
 		}
-		la &= (u32)-1;
 		break;
 	}
 	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 36591fa..3a045f3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c

@@ -1195,7 +1195,7 @@
 static void apic_timer_expired(struct kvm_lapic *apic)
 {
 	struct kvm_vcpu *vcpu = apic->vcpu;
-	wait_queue_head_t *q = &vcpu->wq;
+	struct swait_queue_head *q = &vcpu->wq;
 	struct kvm_timer *ktimer = &apic->lapic_timer;
 
 	if (atomic_read(&apic->lapic_timer.pending))
@@ -1204,8 +1204,8 @@
 	atomic_inc(&apic->lapic_timer.pending);
 	kvm_set_pending_timer(vcpu);
 
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
+	if (swait_active(q))
+		swake_up(q);
 
 	if (apic_lvtt_tscdeadline(apic))
 		ktimer->expired_tscdeadline = ktimer->tscdeadline;

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 95a955d..1e7a49b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c

@@ -3721,13 +3721,15 @@
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
+	bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+
 	/*
 	 * Passing "true" to the last argument is okay; it adds a check
 	 * on bit 8 of the SPTEs which KVM doesn't use anyway.
 	 */
 	__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
 				boot_cpu_data.x86_phys_bits,
-				context->shadow_root_level, context->nx,
+				context->shadow_root_level, uses_nx,
 				guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
 				true);
 }

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6c9fed9..2ce4f05 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h

@@ -249,7 +249,7 @@
 			return ret;
 
 		kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
-		walker->ptes[level] = pte;
+		walker->ptes[level - 1] = pte;
 	}
 	return 0;
 }

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e2951b6..9bd8f44 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c

@@ -596,6 +596,8 @@
 	/* Support for PML */
 #define PML_ENTITY_NUM		512
 	struct page *pml_pg;
+
+	u64 current_tsc_ratio;
 };
 
 enum segment_cache_field {
@@ -1811,6 +1813,13 @@
 			return;
 		}
 		break;
+	case MSR_IA32_PEBS_ENABLE:
+		/* PEBS needs a quiescent period after being disabled (to write
+		 * a record).  Disabling PEBS through VMX MSR swapping doesn't
+		 * provide that period, so a CPU could write host's record into
+		 * guest's memory.
+		 */
+		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 	}
 
 	for (i = 0; i < m->nr; ++i)
@@ -1848,26 +1857,31 @@
 
 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 {
-	u64 guest_efer;
-	u64 ignore_bits;
+	u64 guest_efer = vmx->vcpu.arch.efer;
+	u64 ignore_bits = 0;
 
-	guest_efer = vmx->vcpu.arch.efer;
+	if (!enable_ept) {
+		/*
+		 * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
+		 * host CPUID is more efficient than testing guest CPUID
+		 * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
+		 */
+		if (boot_cpu_has(X86_FEATURE_SMEP))
+			guest_efer |= EFER_NX;
+		else if (!(guest_efer & EFER_NX))
+			ignore_bits |= EFER_NX;
+	}
 
 	/*
-	 * NX is emulated; LMA and LME handled by hardware; SCE meaningless
-	 * outside long mode
+	 * LMA and LME handled by hardware; SCE meaningless outside long mode.
 	 */
-	ignore_bits = EFER_NX | EFER_SCE;
+	ignore_bits |= EFER_SCE;
 #ifdef CONFIG_X86_64
 	ignore_bits |= EFER_LMA | EFER_LME;
 	/* SCE is meaningful only in long mode on Intel */
 	if (guest_efer & EFER_LMA)
 		ignore_bits &= ~(u64)EFER_SCE;
 #endif
-	guest_efer &= ~ignore_bits;
-	guest_efer |= host_efer & ignore_bits;
-	vmx->guest_msrs[efer_offset].data = guest_efer;
-	vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
 	clear_atomic_switch_msr(vmx, MSR_EFER);
 
@@ -1878,16 +1892,21 @@
 	 */
 	if (cpu_has_load_ia32_efer ||
 	    (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
-		guest_efer = vmx->vcpu.arch.efer;
 		if (!(guest_efer & EFER_LMA))
 			guest_efer &= ~EFER_LME;
 		if (guest_efer != host_efer)
 			add_atomic_switch_msr(vmx, MSR_EFER,
 					      guest_efer, host_efer);
 		return false;
-	}
+	} else {
+		guest_efer &= ~ignore_bits;
+		guest_efer |= host_efer & ignore_bits;
 
-	return true;
+		vmx->guest_msrs[efer_offset].data = guest_efer;
+		vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+
+		return true;
+	}
 }
 
 static unsigned long segment_base(u16 selector)
@@ -2127,14 +2146,16 @@
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
 
-		/* Setup TSC multiplier */
-		if (cpu_has_vmx_tsc_scaling())
-			vmcs_write64(TSC_MULTIPLIER,
-				     vcpu->arch.tsc_scaling_ratio);
-
 		vmx->loaded_vmcs->cpu = cpu;
 	}
 
+	/* Setup TSC multiplier */
+	if (kvm_has_tsc_control &&
+	    vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
+		vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
+		vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+	}
+
 	vmx_vcpu_pi_load(vcpu, cpu);
 }
 

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4244c2b..eaf6ee8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -6618,12 +6618,12 @@
 	 * KVM_DEBUGREG_WONT_EXIT again.
 	 */
 	if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
-		int i;
-
 		WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
 		kvm_x86_ops->sync_dirty_debug_regs(vcpu);
-		for (i = 0; i < KVM_NR_DB_REGS; i++)
-			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+		kvm_update_dr0123(vcpu);
+		kvm_update_dr6(vcpu);
+		kvm_update_dr7(vcpu);
+		vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
 	}
 
 	/*

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4ba229a..fd57d3a 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c

@@ -1520,12 +1520,6 @@
 	 */
 	reserve_top_address(lguest_data.reserve_mem);
 
-	/*
-	 * If we don't initialize the lock dependency checker now, it crashes
-	 * atomic_notifier_chain_register, then paravirt_disable_iospace.
-	 */
-	lockdep_init();
-
 	/* Hook in our special panic hypercall code. */
 	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
 
@@ -1535,7 +1529,7 @@
 	 */
 	cpu_detect(&new_cpu_data);
 	/* head.S usually sets up the first capability word, so do it here. */
-	new_cpu_data.x86_capability[0] = cpuid_edx(1);
+	new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 
 	/* Math is always hard! */
 	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);

diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index a2fe51b..65be7cf 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S

@@ -1,5 +1,5 @@
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*

diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 422db00..5cc78bf 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c

@@ -21,12 +21,16 @@
  * @option: option string to look for
  *
  * Returns the position of that @option (starts counting with 1)
- * or 0 on not found.
+ * or 0 on not found.  @option will only be found if it is found
+ * as an entire word in @cmdline.  For instance, if @option="car"
+ * then a cmdline which contains "cart" will not match.
  */
-int cmdline_find_option_bool(const char *cmdline, const char *option)
+static int
+__cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
+			   const char *option)
 {
 	char c;
-	int len, pos = 0, wstart = 0;
+	int pos = 0, wstart = 0;
 	const char *opptr = NULL;
 	enum {
 		st_wordstart = 0,	/* Start of word/after whitespace */
@@ -37,11 +41,11 @@
 	if (!cmdline)
 		return -1;      /* No command line */
 
-	len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE);
-	if (!len)
-		return 0;
-
-	while (len--) {
+	/*
+	 * This 'pos' check ensures we do not overrun
+	 * a non-NULL-terminated 'cmdline'
+	 */
+	while (pos < max_cmdline_size) {
 		c = *(char *)cmdline++;
 		pos++;
 
@@ -58,18 +62,35 @@
 			/* fall through */
 
 		case st_wordcmp:
-			if (!*opptr)
+			if (!*opptr) {
+				/*
+				 * We matched all the way to the end of the
+				 * option we were looking for.  If the
+				 * command-line has a space _or_ ends, then
+				 * we matched!
+				 */
 				if (!c || myisspace(c))
 					return wstart;
-				else
-					state = st_wordskip;
-			else if (!c)
+				/*
+				 * We hit the end of the option, but _not_
+				 * the end of a word on the cmdline.  Not
+				 * a match.
+				 */
+			} else if (!c) {
+				/*
+				 * Hit the NULL terminator on the end of
+				 * cmdline.
+				 */
 				return 0;
-			else if (c != *opptr++)
-				state = st_wordskip;
-			else if (!len)		/* last word and is matching */
-				return wstart;
-			break;
+			} else if (c == *opptr++) {
+				/*
+				 * We are currently matching, so continue
+				 * to the next character on the cmdline.
+				 */
+				break;
+			}
+			state = st_wordskip;
+			/* fall through */
 
 		case st_wordskip:
 			if (!c)
@@ -82,3 +103,8 @@
 
 	return 0;	/* Buffer overrun */
 }
+
+int cmdline_find_option_bool(const char *cmdline, const char *option)
+{
+	return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
+}

diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 009f982..24ef1c2 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S

@@ -1,7 +1,7 @@
 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*

diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 27f89c7..2b0ef26 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S

@@ -10,7 +10,7 @@
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>

diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index e912b2f..2f07c29 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c

@@ -102,7 +102,7 @@
 		 * Use cpu_tss as a cacheline-aligned, seldomly
 		 * accessed per-cpu variable as the monitor target.
 		 */
-		__monitorx(this_cpu_ptr(&cpu_tss), 0, 0);
+		__monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
 
 		/*
 		 * AMD, like Intel, supports the EAX hint and EAX=0xf

diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 16698bb..cbb8ee5 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S

@@ -1,7 +1,7 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 /*
@@ -177,3 +177,120 @@
 .Lend:
 	retq
 ENDPROC(memcpy_orig)
+
+#ifndef CONFIG_UML
+/*
+ * memcpy_mcsafe - memory copy with machine check exception handling
+ * Note that we only catch machine checks when reading the source addresses.
+ * Writes to target are posted and don't generate machine checks.
+ */
+ENTRY(memcpy_mcsafe)
+	cmpl $8, %edx
+	/* Less than 8 bytes? Go to byte copy loop */
+	jb .L_no_whole_words
+
+	/* Check for bad alignment of source */
+	testl $7, %esi
+	/* Already aligned */
+	jz .L_8byte_aligned
+
+	/* Copy one byte at a time until source is 8-byte aligned */
+	movl %esi, %ecx
+	andl $7, %ecx
+	subl $8, %ecx
+	negl %ecx
+	subl %ecx, %edx
+.L_copy_leading_bytes:
+	movb (%rsi), %al
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_copy_leading_bytes
+
+.L_8byte_aligned:
+	/* Figure out how many whole cache lines (64-bytes) to copy */
+	movl %edx, %ecx
+	andl $63, %edx
+	shrl $6, %ecx
+	jz .L_no_whole_cache_lines
+
+	/* Loop copying whole cache lines */
+.L_cache_w0: movq (%rsi), %r8
+.L_cache_w1: movq 1*8(%rsi), %r9
+.L_cache_w2: movq 2*8(%rsi), %r10
+.L_cache_w3: movq 3*8(%rsi), %r11
+	movq %r8, (%rdi)
+	movq %r9, 1*8(%rdi)
+	movq %r10, 2*8(%rdi)
+	movq %r11, 3*8(%rdi)
+.L_cache_w4: movq 4*8(%rsi), %r8
+.L_cache_w5: movq 5*8(%rsi), %r9
+.L_cache_w6: movq 6*8(%rsi), %r10
+.L_cache_w7: movq 7*8(%rsi), %r11
+	movq %r8, 4*8(%rdi)
+	movq %r9, 5*8(%rdi)
+	movq %r10, 6*8(%rdi)
+	movq %r11, 7*8(%rdi)
+	leaq 64(%rsi), %rsi
+	leaq 64(%rdi), %rdi
+	decl %ecx
+	jnz .L_cache_w0
+
+	/* Are there any trailing 8-byte words? */
+.L_no_whole_cache_lines:
+	movl %edx, %ecx
+	andl $7, %edx
+	shrl $3, %ecx
+	jz .L_no_whole_words
+
+	/* Copy trailing words */
+.L_copy_trailing_words:
+	movq (%rsi), %r8
+	mov %r8, (%rdi)
+	leaq 8(%rsi), %rsi
+	leaq 8(%rdi), %rdi
+	decl %ecx
+	jnz .L_copy_trailing_words
+
+	/* Any trailing bytes? */
+.L_no_whole_words:
+	andl %edx, %edx
+	jz .L_done_memcpy_trap
+
+	/* Copy trailing bytes */
+	movl %edx, %ecx
+.L_copy_trailing_bytes:
+	movb (%rsi), %al
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_copy_trailing_bytes
+
+	/* Copy successful. Return true */
+.L_done_memcpy_trap:
+	xorq %rax, %rax
+	ret
+ENDPROC(memcpy_mcsafe)
+
+	.section .fixup, "ax"
+	/* Return false for any failure */
+.L_memcpy_mcsafe_fail:
+	mov	$1, %rax
+	ret
+
+	.previous
+
+	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
+#endif

diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index ca2afdd..90ce01b 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S

@@ -6,7 +6,7 @@
  *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
  */
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 #undef memmove

diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2661fad..c9c8122 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S

@@ -1,7 +1,7 @@
 /* Copyright 2002 Andi Kleen, SuSE Labs */
 
 #include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
 .weak memset

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 4a6f1d9..99bfb19 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c

@@ -358,20 +358,19 @@
 #define pgd_none(a)  pud_none(__pud(pgd_val(a)))
 #endif
 
-#ifdef CONFIG_X86_64
 static inline bool is_hypervisor_range(int idx)
 {
+#ifdef CONFIG_X86_64
 	/*
 	 * ffff800000000000 - ffff87ffffffffff is reserved for
 	 * the hypervisor.
 	 */
-	return paravirt_enabled() &&
-		(idx >= pgd_index(__PAGE_OFFSET) - 16) &&
-		(idx < pgd_index(__PAGE_OFFSET));
-}
+	return	(idx >= pgd_index(__PAGE_OFFSET) - 16) &&
+		(idx <  pgd_index(__PAGE_OFFSET));
 #else
-static inline bool is_hypervisor_range(int idx) { return false; }
+	return false;
 #endif
+}
 
 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 				       bool checkwx)

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 903ec1e..9dd7e4b 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c

@@ -3,6 +3,9 @@
 #include <linux/sort.h>
 #include <asm/uaccess.h>
 
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+			    struct pt_regs *, int);
+
 static inline unsigned long
 ex_insn_addr(const struct exception_table_entry *x)
 {
@@ -13,11 +16,56 @@
 {
 	return (unsigned long)&x->fixup + x->fixup;
 }
-
-int fixup_exception(struct pt_regs *regs)
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
 {
-	const struct exception_table_entry *fixup;
-	unsigned long new_ip;
+	return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
+
+bool ex_handler_default(const struct exception_table_entry *fixup,
+		       struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_default);
+
+bool ex_handler_fault(const struct exception_table_entry *fixup,
+		     struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+	regs->ax = trapnr;
+	return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fault);
+
+bool ex_handler_ext(const struct exception_table_entry *fixup,
+		   struct pt_regs *regs, int trapnr)
+{
+	/* Special hack for uaccess_err */
+	current_thread_info()->uaccess_err = 1;
+	regs->ip = ex_fixup_addr(fixup);
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_ext);
+
+bool ex_has_fault_handler(unsigned long ip)
+{
+	const struct exception_table_entry *e;
+	ex_handler_t handler;
+
+	e = search_exception_tables(ip);
+	if (!e)
+		return false;
+	handler = ex_fixup_handler(e);
+
+	return handler == ex_handler_fault;
+}
+
+int fixup_exception(struct pt_regs *regs, int trapnr)
+{
+	const struct exception_table_entry *e;
+	ex_handler_t handler;
 
 #ifdef CONFIG_PNPBIOS
 	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -33,42 +81,34 @@
 	}
 #endif
 
-	fixup = search_exception_tables(regs->ip);
-	if (fixup) {
-		new_ip = ex_fixup_addr(fixup);
+	e = search_exception_tables(regs->ip);
+	if (!e)
+		return 0;
 
-		if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
-			/* Special hack for uaccess_err */
-			current_thread_info()->uaccess_err = 1;
-			new_ip -= 0x7ffffff0;
-		}
-		regs->ip = new_ip;
-		return 1;
-	}
-
-	return 0;
+	handler = ex_fixup_handler(e);
+	return handler(e, regs, trapnr);
 }
 
 /* Restricted version used during very early boot */
 int __init early_fixup_exception(unsigned long *ip)
 {
-	const struct exception_table_entry *fixup;
+	const struct exception_table_entry *e;
 	unsigned long new_ip;
+	ex_handler_t handler;
 
-	fixup = search_exception_tables(*ip);
-	if (fixup) {
-		new_ip = ex_fixup_addr(fixup);
+	e = search_exception_tables(*ip);
+	if (!e)
+		return 0;
 
-		if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
-			/* uaccess handling not supported during early boot */
-			return 0;
-		}
+	new_ip  = ex_fixup_addr(e);
+	handler = ex_fixup_handler(e);
 
-		*ip = new_ip;
-		return 1;
-	}
+	/* special handling not supported during early boot */
+	if (handler != ex_handler_default)
+		return 0;
 
-	return 0;
+	*ip = new_ip;
+	return 1;
 }
 
 /*
@@ -133,6 +173,8 @@
 		i += 4;
 		p->fixup += i;
 		i += 4;
+		p->handler += i;
+		i += 4;
 	}
 
 	sort(start, finish - start, sizeof(struct exception_table_entry),
@@ -145,6 +187,8 @@
 		i += 4;
 		p->fixup -= i;
 		i += 4;
+		p->handler -= i;
+		i += 4;
 	}
 }
 

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e830c71..03898ae 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c

@@ -663,7 +663,7 @@
 	int sig;
 
 	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs)) {
+	if (fixup_exception(regs, X86_TRAP_PF)) {
 		/*
 		 * Any interrupt that takes a fault gets the fixup. This makes
 		 * the below recursive fault logic only apply to a faults from

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index cb4ef3d..bd7a9b9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c

@@ -388,7 +388,6 @@
 }
 
 pte_t *kmap_pte;
-pgprot_t kmap_prot;
 
 static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
 {
@@ -405,8 +404,6 @@
 	 */
 	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
 	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
-	kmap_prot = PAGE_KERNEL;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -871,7 +868,6 @@
 	return flag;
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 const int rodata_test_data = 0xC3;
 EXPORT_SYMBOL_GPL(rodata_test_data);
 
@@ -960,5 +956,3 @@
 	if (__supported_pte_mask & _PAGE_NX)
 		debug_checkwx();
 }
-#endif
-

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5488d21..214afda 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c

@@ -53,6 +53,7 @@
 #include <asm/numa.h>
 #include <asm/cacheflush.h>
 #include <asm/init.h>
+#include <asm/uv/uv.h>
 #include <asm/setup.h>
 
 #include "mm_internal.h"
@@ -1074,7 +1075,6 @@
 	mem_init_print_info(NULL);
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 const int rodata_test_data = 0xC3;
 EXPORT_SYMBOL_GPL(rodata_test_data);
 
@@ -1166,8 +1166,6 @@
 	debug_checkwx();
 }
 
-#endif
-
 int kern_addr_valid(unsigned long addr)
 {
 	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -1206,26 +1204,13 @@
 
 static unsigned long probe_memory_block_size(void)
 {
-	/* start from 2g */
-	unsigned long bz = 1UL<<31;
+	unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
 
-	if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) {
-		pr_info("Using 2GB memory block size for large-memory system\n");
-		return 2UL * 1024 * 1024 * 1024;
-	}
+	/* if system is UV or has 64GB of RAM or more, use large blocks */
+	if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
+		bz = 2UL << 30; /* 2GB */
 
-	/* less than 64g installed */
-	if ((max_pfn << PAGE_SHIFT) < (16UL << 32))
-		return MIN_MEMORY_BLOCK_SIZE;
-
-	/* get the tail size */
-	while (bz > MIN_MEMORY_BLOCK_SIZE) {
-		if (!((max_pfn << PAGE_SHIFT) & (bz - 1)))
-			break;
-		bz >>= 1;
-	}
-
-	printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20);
+	pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
 
 	return bz;
 }

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d470cf2..1b1110f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c

@@ -120,11 +120,22 @@
 	kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
 			(void *)KASAN_SHADOW_END);
 
-	memset(kasan_zero_page, 0, PAGE_SIZE);
-
 	load_cr3(init_level4_pgt);
 	__flush_tlb_all();
-	init_task.kasan_depth = 0;
 
+	/*
+	 * kasan_zero_page has been used as early shadow memory, thus it may
+	 * contain some garbage. Now we can clear and write protect it, since
+	 * after the TLB flush no one should write to it.
+	 */
+	memset(kasan_zero_page, 0, PAGE_SIZE);
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
+		set_pte(&kasan_zero_pte[i], pte);
+	}
+	/* Flush TLBs again to be sure that write protection applied. */
+	__flush_tlb_all();
+
+	init_task.kasan_depth = 0;
 	pr_info("KernelAddressSanitizer initialized\n");
 }

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 637ab34..ddb2244 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c

@@ -33,7 +33,7 @@
 struct kmmio_fault_page {
 	struct list_head list;
 	struct kmmio_fault_page *release_next;
-	unsigned long page; /* location of the fault page */
+	unsigned long addr; /* the requested address */
 	pteval_t old_presence; /* page presence prior to arming */
 	bool armed;
 
@@ -70,9 +70,16 @@
 static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
 static LIST_HEAD(kmmio_probes);
 
-static struct list_head *kmmio_page_list(unsigned long page)
+static struct list_head *kmmio_page_list(unsigned long addr)
 {
-	return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
+
+	if (!pte)
+		return NULL;
+	addr &= page_level_mask(l);
+
+	return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
 }
 
 /* Accessed per-cpu */
@@ -98,15 +105,19 @@
 }
 
 /* You must be holding RCU read lock. */
-static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
 {
 	struct list_head *head;
 	struct kmmio_fault_page *f;
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
 
-	page &= PAGE_MASK;
-	head = kmmio_page_list(page);
+	if (!pte)
+		return NULL;
+	addr &= page_level_mask(l);
+	head = kmmio_page_list(addr);
 	list_for_each_entry_rcu(f, head, list) {
-		if (f->page == page)
+		if (f->addr == addr)
 			return f;
 	}
 	return NULL;
@@ -137,10 +148,10 @@
 static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
 	unsigned int level;
-	pte_t *pte = lookup_address(f->page, &level);
+	pte_t *pte = lookup_address(f->addr, &level);
 
 	if (!pte) {
-		pr_err("no pte for page 0x%08lx\n", f->page);
+		pr_err("no pte for addr 0x%08lx\n", f->addr);
 		return -1;
 	}
 
@@ -156,7 +167,7 @@
 		return -1;
 	}
 
-	__flush_tlb_one(f->page);
+	__flush_tlb_one(f->addr);
 	return 0;
 }
 
@@ -176,12 +187,12 @@
 	int ret;
 	WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
 	if (f->armed) {
-		pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
-			   f->page, f->count, !!f->old_presence);
+		pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
+			   f->addr, f->count, !!f->old_presence);
 	}
 	ret = clear_page_presence(f, true);
-	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
-		  f->page);
+	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
+		  f->addr);
 	f->armed = true;
 	return ret;
 }
@@ -191,7 +202,7 @@
 {
 	int ret = clear_page_presence(f, false);
 	WARN_ONCE(ret < 0,
-			KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+			KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
 	f->armed = false;
 }
 
@@ -215,6 +226,12 @@
 	struct kmmio_context *ctx;
 	struct kmmio_fault_page *faultpage;
 	int ret = 0; /* default to fault not handled */
+	unsigned long page_base = addr;
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
+	if (!pte)
+		return -EINVAL;
+	page_base &= page_level_mask(l);
 
 	/*
 	 * Preemption is now disabled to prevent process switch during
@@ -227,7 +244,7 @@
 	preempt_disable();
 	rcu_read_lock();
 
-	faultpage = get_kmmio_fault_page(addr);
+	faultpage = get_kmmio_fault_page(page_base);
 	if (!faultpage) {
 		/*
 		 * Either this page fault is not caused by kmmio, or
@@ -239,7 +256,7 @@
 
 	ctx = &get_cpu_var(kmmio_ctx);
 	if (ctx->active) {
-		if (addr == ctx->addr) {
+		if (page_base == ctx->addr) {
 			/*
 			 * A second fault on the same page means some other
 			 * condition needs handling by do_page_fault(), the
@@ -267,9 +284,9 @@
 	ctx->active++;
 
 	ctx->fpage = faultpage;
-	ctx->probe = get_kmmio_probe(addr);
+	ctx->probe = get_kmmio_probe(page_base);
 	ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
-	ctx->addr = addr;
+	ctx->addr = page_base;
 
 	if (ctx->probe && ctx->probe->pre_handler)
 		ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -354,12 +371,11 @@
 }
 
 /* You must be holding kmmio_lock. */
-static int add_kmmio_fault_page(unsigned long page)
+static int add_kmmio_fault_page(unsigned long addr)
 {
 	struct kmmio_fault_page *f;
 
-	page &= PAGE_MASK;
-	f = get_kmmio_fault_page(page);
+	f = get_kmmio_fault_page(addr);
 	if (f) {
 		if (!f->count)
 			arm_kmmio_fault_page(f);
@@ -372,26 +388,25 @@
 		return -1;
 
 	f->count = 1;
-	f->page = page;
+	f->addr = addr;
 
 	if (arm_kmmio_fault_page(f)) {
 		kfree(f);
 		return -1;
 	}
 
-	list_add_rcu(&f->list, kmmio_page_list(f->page));
+	list_add_rcu(&f->list, kmmio_page_list(f->addr));
 
 	return 0;
 }
 
 /* You must be holding kmmio_lock. */
-static void release_kmmio_fault_page(unsigned long page,
+static void release_kmmio_fault_page(unsigned long addr,
 				struct kmmio_fault_page **release_list)
 {
 	struct kmmio_fault_page *f;
 
-	page &= PAGE_MASK;
-	f = get_kmmio_fault_page(page);
+	f = get_kmmio_fault_page(addr);
 	if (!f)
 		return;
 
@@ -420,18 +435,27 @@
 	int ret = 0;
 	unsigned long size = 0;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+	unsigned int l;
+	pte_t *pte;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	if (get_kmmio_probe(p->addr)) {
 		ret = -EEXIST;
 		goto out;
 	}
+
+	pte = lookup_address(p->addr, &l);
+	if (!pte) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	kmmio_count++;
 	list_add_rcu(&p->list, &kmmio_probes);
 	while (size < size_lim) {
 		if (add_kmmio_fault_page(p->addr + size))
 			pr_err("Unable to set page fault.\n");
-		size += PAGE_SIZE;
+		size += page_level_size(l);
 	}
 out:
 	spin_unlock_irqrestore(&kmmio_lock, flags);
@@ -506,11 +530,17 @@
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	struct kmmio_fault_page *release_list = NULL;
 	struct kmmio_delayed_release *drelease;
+	unsigned int l;
+	pte_t *pte;
+
+	pte = lookup_address(p->addr, &l);
+	if (!pte)
+		return;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	while (size < size_lim) {
 		release_kmmio_fault_page(p->addr + size, &release_list);
-		size += PAGE_SIZE;
+		size += page_level_size(l);
 	}
 	list_del_rcu(&p->list);
 	kmmio_count--;

diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 96bd1e2..d2dc043 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c

@@ -71,12 +71,12 @@
 
 	if (mmap_is_ia32())
 #ifdef CONFIG_COMPAT
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
 #else
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 #endif
 	else
-		rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+		rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
 
 	return rnd << PAGE_SHIFT;
 }
@@ -94,18 +94,6 @@
 }
 
 /*
- * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
- * does, but not when emulating X86_32
- */
-static unsigned long mmap_legacy_base(unsigned long rnd)
-{
-	if (mmap_is_ia32())
-		return TASK_UNMAPPED_BASE;
-	else
-		return TASK_UNMAPPED_BASE + rnd;
-}
-
-/*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
@@ -116,7 +104,7 @@
 	if (current->flags & PF_RANDOMIZE)
 		random_factor = arch_mmap_rnd();
 
-	mm->mmap_legacy_base = mmap_legacy_base(random_factor);
+	mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mm->mmap_legacy_base;

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index b2fd67d..ef05755 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c

@@ -123,7 +123,7 @@
 		break;
 	}
 
-	if (regno > nr_registers) {
+	if (regno >= nr_registers) {
 		WARN_ONCE(1, "decoded an instruction with an invalid register");
 		return -EINVAL;
 	}

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index d04f809..f70c1ff 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c

@@ -465,46 +465,67 @@
 	return true;
 }
 
+/*
+ * Mark all currently memblock-reserved physical memory (which covers the
+ * kernel's own memory ranges) as hot-unswappable.
+ */
 static void __init numa_clear_kernel_node_hotplug(void)
 {
-	int i, nid;
-	nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
-	phys_addr_t start, end;
-	struct memblock_region *r;
+	nodemask_t reserved_nodemask = NODE_MASK_NONE;
+	struct memblock_region *mb_region;
+	int i;
 
 	/*
+	 * We have to do some preprocessing of memblock regions, to
+	 * make them suitable for reservation.
+	 *
 	 * At this time, all memory regions reserved by memblock are
-	 * used by the kernel. Set the nid in memblock.reserved will
-	 * mark out all the nodes the kernel resides in.
+	 * used by the kernel, but those regions are not split up
+	 * along node boundaries yet, and don't necessarily have their
+	 * node ID set yet either.
+	 *
+	 * So iterate over all memory known to the x86 architecture,
+	 * and use those ranges to set the nid in memblock.reserved.
+	 * This will split up the memblock regions along node
+	 * boundaries and will set the node IDs as well.
 	 */
 	for (i = 0; i < numa_meminfo.nr_blks; i++) {
-		struct numa_memblk *mb = &numa_meminfo.blk[i];
+		struct numa_memblk *mb = numa_meminfo.blk + i;
+		int ret;
 
-		memblock_set_node(mb->start, mb->end - mb->start,
-				  &memblock.reserved, mb->nid);
+		ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
+		WARN_ON_ONCE(ret);
 	}
 
 	/*
-	 * Mark all kernel nodes.
+	 * Now go over all reserved memblock regions, to construct a
+	 * node mask of all kernel reserved memory areas.
 	 *
-	 * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo
-	 * may not include all the memblock.reserved memory ranges because
-	 * trim_snb_memory() reserves specific pages for Sandy Bridge graphics.
+	 * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
+	 *   numa_meminfo might not include all memblock.reserved
+	 *   memory ranges, because quirks such as trim_snb_memory()
+	 *   reserve specific pages for Sandy Bridge graphics. ]
 	 */
-	for_each_memblock(reserved, r)
-		if (r->nid != MAX_NUMNODES)
-			node_set(r->nid, numa_kernel_nodes);
+	for_each_memblock(reserved, mb_region) {
+		if (mb_region->nid != MAX_NUMNODES)
+			node_set(mb_region->nid, reserved_nodemask);
+	}
 
-	/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
+	/*
+	 * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
+	 * belonging to the reserved node mask.
+	 *
+	 * Note that this will include memory regions that reside
+	 * on nodes that contain kernel memory - entire nodes
+	 * become hot-unpluggable:
+	 */
 	for (i = 0; i < numa_meminfo.nr_blks; i++) {
-		nid = numa_meminfo.blk[i].nid;
-		if (!node_isset(nid, numa_kernel_nodes))
+		struct numa_memblk *mb = numa_meminfo.blk + i;
+
+		if (!node_isset(mb->nid, reserved_nodemask))
 			continue;
 
-		start = numa_meminfo.blk[i].start;
-		end = numa_meminfo.blk[i].end;
-
-		memblock_clear_hotplug(start, end - start);
+		memblock_clear_hotplug(mb->start, mb->end - mb->start);
 	}
 }
 

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2440814..007ebe2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c

@@ -283,7 +283,7 @@
 		   __pa_symbol(__end_rodata) >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_RW;
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
 	/*
 	 * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
 	 * kernel text mappings for the large page aligned text, rodata sections
@@ -419,24 +419,30 @@
 phys_addr_t slow_virt_to_phys(void *__virt_addr)
 {
 	unsigned long virt_addr = (unsigned long)__virt_addr;
-	unsigned long phys_addr, offset;
+	phys_addr_t phys_addr;
+	unsigned long offset;
 	enum pg_level level;
 	pte_t *pte;
 
 	pte = lookup_address(virt_addr, &level);
 	BUG_ON(!pte);
 
+	/*
+	 * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
+	 * before being left-shifted PAGE_SHIFT bits -- this trick is to
+	 * make 32-PAE kernel work correctly.
+	 */
 	switch (level) {
 	case PG_LEVEL_1G:
-		phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+		phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
 		offset = virt_addr & ~PUD_PAGE_MASK;
 		break;
 	case PG_LEVEL_2M:
-		phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+		phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
 		offset = virt_addr & ~PMD_PAGE_MASK;
 		break;
 	default:
-		phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+		phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
 		offset = virt_addr & ~PAGE_MASK;
 	}
 
@@ -1122,8 +1128,10 @@
 	/*
 	 * Ignore all non primary paths.
 	 */
-	if (!primary)
+	if (!primary) {
+		cpa->numpages = 1;
 		return 0;
+	}
 
 	/*
 	 * Ignore the NULL PTE for kernel identity mapping, as it is expected

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f4ae536..04e2e71 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c

@@ -943,7 +943,7 @@
 			return -EINVAL;
 	}
 
-	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+	*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
 			 cachemode2protval(pcm));
 
 	return 0;
@@ -959,7 +959,7 @@
 
 	/* Set prot based on lookup */
 	pcm = lookup_memtype(pfn_t_to_phys(pfn));
-	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+	*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
 			 cachemode2protval(pcm));
 
 	return 0;

diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 92e2eac..8bea847 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c

@@ -4,6 +4,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/proto.h>
+#include <asm/cpufeature.h>
 
 static int disable_nx;
 
@@ -31,9 +32,8 @@
 
 void x86_configure_nx(void)
 {
-	if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
-		__supported_pte_mask |= _PAGE_NX;
-	else
+	/* If disable_nx is set, clear NX on all new mappings going forward. */
+	if (disable_nx)
 		__supported_pte_mask &= ~_PAGE_NX;
 }
 

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 50d86c0..660a83c 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c

@@ -24,7 +24,6 @@
 #include <asm/nmi.h>
 #include <asm/apic.h>
 #include <asm/processor.h>
-#include <asm/cpufeature.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2879efc..d34b511 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c

@@ -711,28 +711,22 @@
 	return 0;
 }
 
-int pcibios_alloc_irq(struct pci_dev *dev)
-{
-	/*
-	 * If the PCI device was already claimed by core code and has
-	 * MSI enabled, probing of the pcibios IRQ will overwrite
-	 * dev->irq.  So bail out if MSI is already enabled.
-	 */
-	if (pci_dev_msi_enabled(dev))
-		return -EBUSY;
-
-	return pcibios_enable_irq(dev);
-}
-
-void pcibios_free_irq(struct pci_dev *dev)
-{
-	if (pcibios_disable_irq)
-		pcibios_disable_irq(dev);
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
-	return pci_enable_resources(dev, mask);
+	int err;
+
+	if ((err = pci_enable_resources(dev, mask)) < 0)
+		return err;
+
+	if (!pci_dev_msi_enabled(dev))
+		return pcibios_enable_irq(dev);
+	return 0;
+}
+
+void pcibios_disable_device (struct pci_dev *dev)
+{
+	if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+		pcibios_disable_irq(dev);
 }
 
 int pci_ext_cfg_avail(void)

diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 0d24e7c..8b93e63 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c

@@ -215,7 +215,7 @@
 	int polarity;
 	int ret;
 
-	if (pci_has_managed_irq(dev))
+	if (dev->irq_managed && dev->irq > 0)
 		return 0;
 
 	switch (intel_mid_identify_cpu()) {
@@ -256,13 +256,10 @@
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (pci_has_managed_irq(dev)) {
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
 		dev->irq_managed = 0;
-		/*
-		 * Don't reset dev->irq here, otherwise
-		 * intel_mid_pci_irq_enable() will fail on next call.
-		 */
 	}
 }
 

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 32e7034..9bd1154 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c

@@ -1202,7 +1202,7 @@
 			struct pci_dev *temp_dev;
 			int irq;
 
-			if (pci_has_managed_irq(dev))
+			if (dev->irq_managed && dev->irq > 0)
 				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
@@ -1230,7 +1230,8 @@
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
-				pci_set_managed_irq(dev, irq);
+				dev->irq_managed = 1;
+				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
 				return 0;
@@ -1256,10 +1257,24 @@
 	return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef CONFIG_PM
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-	if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) {
+	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
-		pci_reset_managed_irq(dev);
+		dev->irq = 0;
+		dev->irq_managed = 0;
 	}
 }

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index ff31ab4..beac4df 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c

@@ -196,7 +196,10 @@
 	return 0;
 
 error:
-	dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+	if (ret == -ENOSYS)
+		dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+	else if (ret)
+		dev_err(&dev->dev, "Xen PCI frontend error: %d!\n", ret);
 free:
 	kfree(v);
 	return ret;

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 2d66db8..ed30e79 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c

@@ -131,6 +131,27 @@
 EXPORT_SYMBOL_GPL(efi_query_variable_store);
 
 /*
+ * Helper function for efi_reserve_boot_services() to figure out if we
+ * can free regions in efi_free_boot_services().
+ *
+ * Use this function to ensure we do not free regions owned by somebody
+ * else. We must only reserve (and then free) regions:
+ *
+ * - Not within any part of the kernel
+ * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc)
+ */
+static bool can_free_region(u64 start, u64 size)
+{
+	if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
+		return false;
+
+	if (!e820_all_mapped(start, start+size, E820_RAM))
+		return false;
+
+	return true;
+}
+
+/*
  * The UEFI specification makes it clear that the operating system is free to do
  * whatever it wants with boot services code after ExitBootServices() has been
  * called. Ignoring this recommendation a significant bunch of EFI implementations 
@@ -147,26 +168,50 @@
 		efi_memory_desc_t *md = p;
 		u64 start = md->phys_addr;
 		u64 size = md->num_pages << EFI_PAGE_SHIFT;
+		bool already_reserved;
 
 		if (md->type != EFI_BOOT_SERVICES_CODE &&
 		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
-		/* Only reserve where possible:
-		 * - Not within any already allocated areas
-		 * - Not over any memory area (really needed, if above?)
-		 * - Not within any part of the kernel
-		 * - Not the bios reserved area
-		*/
-		if ((start + size > __pa_symbol(_text)
-				&& start <= __pa_symbol(_end)) ||
-			!e820_all_mapped(start, start+size, E820_RAM) ||
-			memblock_is_region_reserved(start, size)) {
-			/* Could not reserve, skip it */
-			md->num_pages = 0;
-			memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n",
-				     start, start+size-1);
-		} else
+
+		already_reserved = memblock_is_region_reserved(start, size);
+
+		/*
+		 * Because the following memblock_reserve() is paired
+		 * with free_bootmem_late() for this region in
+		 * efi_free_boot_services(), we must be extremely
+		 * careful not to reserve, and subsequently free,
+		 * critical regions of memory (like the kernel image) or
+		 * those regions that somebody else has already
+		 * reserved.
+		 *
+		 * A good example of a critical region that must not be
+		 * freed is page zero (first 4Kb of memory), which may
+		 * contain boot services code/data but is marked
+		 * E820_RESERVED by trim_bios_range().
+		 */
+		if (!already_reserved) {
 			memblock_reserve(start, size);
+
+			/*
+			 * If we are the first to reserve the region, no
+			 * one else cares about it. We own it and can
+			 * free it later.
+			 */
+			if (can_free_region(start, size))
+				continue;
+		}
+
+		/*
+		 * We don't own the region. We must not free it.
+		 *
+		 * Setting this bit for a boot services region really
+		 * doesn't make sense as far as the firmware is
+		 * concerned, but it does provide us with a way to tag
+		 * those regions that must not be paired with
+		 * free_bootmem_late().
+		 */
+		md->attribute |= EFI_MEMORY_RUNTIME;
 	}
 }
 
@@ -183,8 +228,8 @@
 		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
 
-		/* Could not reserve boot area */
-		if (!size)
+		/* Do not free, someone else owns it: */
+		if (md->attribute & EFI_MEMORY_RUNTIME)
 			continue;
 
 		free_bootmem_late(start, size);

diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 76b6632..1865c19 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c

@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/string.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
@@ -35,6 +35,11 @@
 #define BIOS_SIGNATURE_COREBOOT		0x500
 #define BIOS_REGION_SIZE		0x10000
 
+/*
+ * This driver is not modular, but to keep back compatibility
+ * with existing use cases, continuing with module_param is
+ * the easiest way forward.
+ */
 static bool force = 0;
 module_param(force, bool, 0444);
 /* FIXME: Award bios is not automatically detected as Alix platform */
@@ -192,9 +197,4 @@
 
 	return 0;
 }
-
-module_init(alix_init);
-
-MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>");
-MODULE_DESCRIPTION("PCEngines ALIX System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(alix_init);

diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index aa733fb..4fcdb91 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c

@@ -19,7 +19,6 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/string.h>
-#include <linux/module.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
@@ -120,9 +119,4 @@
 
 	return 0;
 }
-
-module_init(geos_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Traverse Technologies Geos System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(geos_init);

diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 927e38c..a2f6b98 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c

@@ -20,7 +20,6 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/string.h>
-#include <linux/module.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
@@ -146,9 +145,4 @@
 
 	return 0;
 }
-
-module_init(net5501_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Soekris net5501 System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(net5501_init);

diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index 23381d2..1eb47b6 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c

@@ -52,10 +52,7 @@
 	/* mark tsc clocksource as reliable */
 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
 
-	if (fast_calibrate)
-		return fast_calibrate;
-
-	return 0;
+	return fast_calibrate;
 }
 
 static void __init penwell_arch_setup(void)

diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c
index aaca917..bd1adc6 100644
--- a/arch/x86/platform/intel-mid/mrfl.c
+++ b/arch/x86/platform/intel-mid/mrfl.c

@@ -81,10 +81,7 @@
 	/* mark tsc clocksource as reliable */
 	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
 
-	if (fast_calibrate)
-		return fast_calibrate;
-
-	return 0;
+	return fast_calibrate;
 }
 
 static void __init tangier_arch_setup(void)

diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index c61b6c3..17d6d22 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c

@@ -1,5 +1,5 @@
 /**
- * imr.c
+ * imr.c -- Intel Isolated Memory Region driver
  *
  * Copyright(c) 2013 Intel Corporation.
  * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
@@ -31,7 +31,6 @@
 #include <linux/debugfs.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/module.h>
 #include <linux/types.h>
 
 struct imr_device {
@@ -135,11 +134,9 @@
  * @idev:	pointer to imr_device structure.
  * @imr_id:	IMR entry to write.
  * @imr:	IMR structure representing address and access masks.
- * @lock:	indicates if the IMR lock bit should be applied.
  * @return:	0 on success or error code passed from mbi_iosf on failure.
  */
-static int imr_write(struct imr_device *idev, u32 imr_id,
-		     struct imr_regs *imr, bool lock)
+static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
 {
 	unsigned long flags;
 	u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
@@ -163,15 +160,6 @@
 	if (ret)
 		goto failed;
 
-	/* Lock bit must be set separately to addr_lo address bits. */
-	if (lock) {
-		imr->addr_lo |= IMR_LOCK;
-		ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE,
-				     reg - IMR_NUM_REGS, imr->addr_lo);
-		if (ret)
-			goto failed;
-	}
-
 	local_irq_restore(flags);
 	return 0;
 failed:
@@ -270,17 +258,6 @@
 }
 
 /**
- * imr_debugfs_unregister - unregister debugfs hooks.
- *
- * @idev:	pointer to imr_device structure.
- * @return:
- */
-static void imr_debugfs_unregister(struct imr_device *idev)
-{
-	debugfs_remove(idev->file);
-}
-
-/**
  * imr_check_params - check passed address range IMR alignment and non-zero size
  *
  * @base:	base address of intended IMR.
@@ -334,11 +311,10 @@
  * @size:	physical size of region in bytes must be aligned to 1KiB.
  * @read_mask:	read access mask.
  * @write_mask:	write access mask.
- * @lock:	indicates whether or not to permanently lock this region.
  * @return:	zero on success or negative value indicating error.
  */
 int imr_add_range(phys_addr_t base, size_t size,
-		  unsigned int rmask, unsigned int wmask, bool lock)
+		  unsigned int rmask, unsigned int wmask)
 {
 	phys_addr_t end;
 	unsigned int i;
@@ -411,7 +387,7 @@
 	imr.rmask = rmask;
 	imr.wmask = wmask;
 
-	ret = imr_write(idev, reg, &imr, lock);
+	ret = imr_write(idev, reg, &imr);
 	if (ret < 0) {
 		/*
 		 * In the highly unlikely event iosf_mbi_write failed
@@ -422,7 +398,7 @@
 		imr.addr_hi = 0;
 		imr.rmask = IMR_READ_ACCESS_ALL;
 		imr.wmask = IMR_WRITE_ACCESS_ALL;
-		imr_write(idev, reg, &imr, false);
+		imr_write(idev, reg, &imr);
 	}
 failed:
 	mutex_unlock(&idev->lock);
@@ -518,7 +494,7 @@
 	imr.rmask = IMR_READ_ACCESS_ALL;
 	imr.wmask = IMR_WRITE_ACCESS_ALL;
 
-	ret = imr_write(idev, reg, &imr, false);
+	ret = imr_write(idev, reg, &imr);
 
 failed:
 	mutex_unlock(&idev->lock);
@@ -592,14 +568,14 @@
 	end = (unsigned long)__end_rodata - 1;
 
 	/*
-	 * Setup a locked IMR around the physical extent of the kernel
+	 * Setup an unlocked IMR around the physical extent of the kernel
 	 * from the beginning of the .text secton to the end of the
 	 * .rodata section as one physically contiguous block.
 	 *
 	 * We don't round up @size since it is already PAGE_SIZE aligned.
 	 * See vmlinux.lds.S for details.
 	 */
-	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
 	if (ret < 0) {
 		pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
 			size / 1024, start, end);
@@ -614,7 +590,6 @@
 	{ X86_VENDOR_INTEL, 5, 9 },	/* Intel Quark SoC X1000. */
 	{}
 };
-MODULE_DEVICE_TABLE(x86cpu, imr_ids);
 
 /**
  * imr_init - entry point for IMR driver.
@@ -640,22 +615,4 @@
 	imr_fixup_memmap(idev);
 	return 0;
 }
-
-/**
- * imr_exit - exit point for IMR code.
- *
- * Deregisters debugfs, leave IMR state as-is.
- *
- * return:
- */
-static void __exit imr_exit(void)
-{
-	imr_debugfs_unregister(&imr_dev);
-}
-
-module_init(imr_init);
-module_exit(imr_exit);
-
-MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
-MODULE_DESCRIPTION("Intel Isolated Memory Region driver");
-MODULE_LICENSE("Dual BSD/GPL");
+device_initcall(imr_init);

diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 278e4da..f5bad40 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c

@@ -1,5 +1,5 @@
 /**
- * imr_selftest.c
+ * imr_selftest.c -- Intel Isolated Memory Region self-test driver
  *
  * Copyright(c) 2013 Intel Corporation.
  * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
@@ -15,7 +15,6 @@
 #include <asm/imr.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/module.h>
 #include <linux/types.h>
 
 #define SELFTEST KBUILD_MODNAME ": "
@@ -61,30 +60,30 @@
 	int ret;
 
 	/* Test zero zero. */
-	ret = imr_add_range(0, 0, 0, 0, false);
+	ret = imr_add_range(0, 0, 0, 0);
 	imr_self_test_result(ret < 0, "zero sized IMR\n");
 
 	/* Test exact overlap. */
-	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
 	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
 
 	/* Test overlap with base inside of existing. */
 	base += size - IMR_ALIGN;
-	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
 	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
 
 	/* Test overlap with end inside of existing. */
 	base -= size + IMR_ALIGN * 2;
-	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
 	imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
 
 	/* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
 	ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
-			    IMR_WRITE_ACCESS_ALL, false);
+			    IMR_WRITE_ACCESS_ALL);
 	imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
 
 	/* Test that a 1 KiB IMR @ zero with CPU only will work. */
-	ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false);
+	ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU);
 	imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
 	if (ret >= 0) {
 		ret = imr_remove_range(0, IMR_ALIGN);
@@ -93,8 +92,7 @@
 
 	/* Test 2 KiB works. */
 	size = IMR_ALIGN * 2;
-	ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL,
-			    IMR_WRITE_ACCESS_ALL, false);
+	ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL);
 	imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
 	if (ret >= 0) {
 		ret = imr_remove_range(0, size);
@@ -106,7 +104,6 @@
 	{ X86_VENDOR_INTEL, 5, 9 },	/* Intel Quark SoC X1000. */
 	{}
 };
-MODULE_DEVICE_TABLE(x86cpu, imr_ids);
 
 /**
  * imr_self_test_init - entry point for IMR driver.
@@ -125,13 +122,4 @@
  *
  * return:
  */
-static void __exit imr_self_test_exit(void)
-{
-}
-
-module_init(imr_self_test_init);
-module_exit(imr_self_test_exit);
-
-MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
-MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver");
-MODULE_LICENSE("Dual BSD/GPL");
+device_initcall(imr_self_test_init);

diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 174781a..00c3190 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h

@@ -3,7 +3,7 @@
 
 #include <asm/asm.h>
 #include <asm/segment.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
 #include <asm/cmpxchg.h>
 #include <asm/nops.h>
 

diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
index 8502ad3..5adb6a2 100644
--- a/arch/x86/um/os-Linux/task_size.c
+++ b/arch/x86/um/os-Linux/task_size.c

@@ -109,7 +109,7 @@
 		exit(1);
 	}
 
-	printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT);
+	printf("0x%lx\n", bottom << UM_KERN_PAGE_SHIFT);
 	printf("Locating the top of the address space ... ");
 	fflush(stdout);
 
@@ -134,7 +134,7 @@
 		exit(1);
 	}
 	top <<= UM_KERN_PAGE_SHIFT;
-	printf("0x%x\n", top);
+	printf("0x%lx\n", top);
 
 	return top;
 }

diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 439c099..bfce503 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c

@@ -25,11 +25,11 @@
 
 #define old_mmap sys_old_mmap
 
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_32.h>
 
 #undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
+#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 

diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index b74ea6c..f306413 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c

@@ -35,14 +35,11 @@
 #define stub_execveat sys_execveat
 #define stub_rt_sigreturn sys_rt_sigreturn
 
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_64.h>
 
 #undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
+#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 

diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index ce7e360..470564b 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c

@@ -9,14 +9,12 @@
 #include <asm/types.h>
 
 #ifdef __i386__
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls[] = {
 #include <asm/syscalls_32.h>
 };
 #else
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
 static char syscalls[] = {
 #include <asm/syscalls_64.h>
 };

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d09e4c9..2c26108 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c

@@ -1654,7 +1654,7 @@
 	cpu_detect(&new_cpu_data);
 	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
 	new_cpu_data.wp_works_ok = 1;
-	new_cpu_data.x86_capability[0] = cpuid_edx(1);
+	new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 #endif
 
 	if (xen_start_info->mod_start) {

diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 724a087..9466354 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c

@@ -11,7 +11,7 @@
 #include "pmu.h"
 
 /* x86_pmu.handle_irq definition */
-#include "../kernel/cpu/perf_event.h"
+#include "../events/perf_event.h"
 
 #define XENPMU_IRQ_PROCESSING    1
 struct xenpmu {

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3f4ebf0..3c6d17f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c

@@ -112,7 +112,7 @@
 		xen_pvh_secondary_vcpu_init(cpu);
 #endif
 	cpu_bringup();
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 static void xen_smp_intr_free(unsigned int cpu)

diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index 4d02e38..fc4ad21 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c

@@ -157,7 +157,7 @@
 
 	complete(&cpu_running);
 
-	cpu_startup_entry(CPUHP_ONLINE);
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 static void mx_cpu_start(void *p)

diff --git a/block/Kconfig b/block/Kconfig
index 161491d..0363cd7 100644
--- a/block/Kconfig
+++ b/block/Kconfig

@@ -88,6 +88,19 @@
 	T10/SCSI Data Integrity Field or the T13/ATA External Path
 	Protection.  If in doubt, say N.
 
+config BLK_DEV_DAX
+	bool "Block device DAX support"
+	depends on FS_DAX
+	depends on BROKEN
+	help
+	  When DAX support is available (CONFIG_FS_DAX) raw block
+	  devices can also support direct userspace access to the
+	  storage capacity via MMAP(2) similar to a file on a
+	  DAX-enabled filesystem.  However, the DAX I/O-path disables
+	  some standard I/O-statistics, and the MMAP(2) path has some
+	  operational differences due to bypassing the page
+	  cache.  If in doubt, say N.
+
 config BLK_DEV_THROTTLING
 	bool "Block layer bio throttling support"
 	depends on BLK_CGROUP=y

diff --git a/block/blk-map.c b/block/blk-map.c
index f565e11..a54f054 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c

@@ -57,6 +57,49 @@
 	return ret;
 }
 
+static int __blk_rq_map_user_iov(struct request *rq,
+		struct rq_map_data *map_data, struct iov_iter *iter,
+		gfp_t gfp_mask, bool copy)
+{
+	struct request_queue *q = rq->q;
+	struct bio *bio, *orig_bio;
+	int ret;
+
+	if (copy)
+		bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
+	else
+		bio = bio_map_user_iov(q, iter, gfp_mask);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	if (map_data && map_data->null_mapped)
+		bio_set_flag(bio, BIO_NULL_MAPPED);
+
+	iov_iter_advance(iter, bio->bi_iter.bi_size);
+	if (map_data)
+		map_data->offset += bio->bi_iter.bi_size;
+
+	orig_bio = bio;
+	blk_queue_bounce(q, &bio);
+
+	/*
+	 * We link the bounce buffer in and could have to traverse it
+	 * later so we have to get a ref to prevent it from being freed
+	 */
+	bio_get(bio);
+
+	ret = blk_rq_append_bio(q, rq, bio);
+	if (ret) {
+		bio_endio(bio);
+		__blk_rq_unmap_user(orig_bio);
+		bio_put(bio);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
  * @q:		request queue where request should be inserted
@@ -82,10 +125,11 @@
 			struct rq_map_data *map_data,
 			const struct iov_iter *iter, gfp_t gfp_mask)
 {
-	struct bio *bio;
-	int unaligned = 0;
-	struct iov_iter i;
 	struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0};
+	bool copy = (q->dma_pad_mask & iter->count) || map_data;
+	struct bio *bio = NULL;
+	struct iov_iter i;
+	int ret;
 
 	if (!iter || !iter->count)
 		return -EINVAL;
@@ -101,42 +145,29 @@
 		 */
 		if ((uaddr & queue_dma_alignment(q)) ||
 		    iovec_gap_to_prv(q, &prv, &iov))
-			unaligned = 1;
+			copy = true;
 
 		prv.iov_base = iov.iov_base;
 		prv.iov_len = iov.iov_len;
 	}
 
-	if (unaligned || (q->dma_pad_mask & iter->count) || map_data)
-		bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
-	else
-		bio = bio_map_user_iov(q, iter, gfp_mask);
-
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	if (map_data && map_data->null_mapped)
-		bio_set_flag(bio, BIO_NULL_MAPPED);
-
-	if (bio->bi_iter.bi_size != iter->count) {
-		/*
-		 * Grab an extra reference to this bio, as bio_unmap_user()
-		 * expects to be able to drop it twice as it happens on the
-		 * normal IO completion path
-		 */
-		bio_get(bio);
-		bio_endio(bio);
-		__blk_rq_unmap_user(bio);
-		return -EINVAL;
-	}
+	i = *iter;
+	do {
+		ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
+		if (ret)
+			goto unmap_rq;
+		if (!bio)
+			bio = rq->bio;
+	} while (iov_iter_count(&i));
 
 	if (!bio_flagged(bio, BIO_USER_MAPPED))
 		rq->cmd_flags |= REQ_COPY_USER;
-
-	blk_queue_bounce(q, &bio);
-	bio_get(bio);
-	blk_rq_bio_prep(q, rq, bio);
 	return 0;
+
+unmap_rq:
+	__blk_rq_unmap_user(bio);
+	rq->bio = NULL;
+	return -EINVAL;
 }
 EXPORT_SYMBOL(blk_rq_map_user_iov);
 

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 888a7fe..2613531 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c

@@ -304,7 +304,6 @@
 				   struct bio *nxt)
 {
 	struct bio_vec end_bv = { NULL }, nxt_bv;
-	struct bvec_iter iter;
 
 	if (!blk_queue_cluster(q))
 		return 0;
@@ -316,11 +315,8 @@
 	if (!bio_has_data(bio))
 		return 1;
 
-	bio_for_each_segment(end_bv, bio, iter)
-		if (end_bv.bv_len == iter.bi_size)
-			break;
-
-	nxt_bv = bio_iovec(nxt);
+	bio_get_last_bvec(bio, &end_bv);
+	bio_get_first_bvec(nxt, &nxt_bv);
 
 	if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
 		return 0;

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 296b7a1..b6f7fa3 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c

@@ -62,7 +62,7 @@
 	if (count < 0) {
 		return NULL;
 	} else if (count > 0) {
-		resources = kmalloc(count * sizeof(struct resource),
+		resources = kzalloc(count * sizeof(struct resource),
 				    GFP_KERNEL);
 		if (!resources) {
 			dev_err(&adev->dev, "No memory for resources\n");

diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index 3052185..d48cbed 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c

@@ -269,8 +269,7 @@
 	 */
 	if (ACPI_SUCCESS(status) &&
 	    possible_method_call && (node->type == ACPI_TYPE_METHOD)) {
-		if (GET_CURRENT_ARG_TYPE(walk_state->arg_types) ==
-		    ARGP_SUPERNAME) {
+		if (walk_state->opcode == AML_UNLOAD_OP) {
 			/*
 			 * acpi_ps_get_next_namestring has increased the AML pointer,
 			 * so we need to restore the saved AML pointer for method call.
@@ -697,7 +696,7 @@
  *
  * PARAMETERS:  walk_state          - Current state
  *              parser_state        - Current parser state object
- *              arg_type            - The parser argument type (ARGP_*)
+ *              arg_type            - The argument type (AML_*_ARG)
  *              return_arg          - Where the next arg is returned
  *
  * RETURN:      Status, and an op object containing the next argument.
@@ -817,9 +816,9 @@
 				return_ACPI_STATUS(AE_NO_MEMORY);
 			}
 
-			/* super_name allows argument to be a method call */
+			/* To support super_name arg of Unload */
 
-			if (arg_type == ARGP_SUPERNAME) {
+			if (walk_state->opcode == AML_UNLOAD_OP) {
 				status =
 				    acpi_ps_get_next_namepath(walk_state,
 							      parser_state, arg,

diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 0431883..559c117 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c

@@ -519,7 +519,7 @@
 			     u64 param3, u64 param4)
 {
 	int rc;
-	unsigned long pfn;
+	u64 base_addr, size;
 
 	/* If user manually set "flags", make sure it is legal */
 	if (flags && (flags &
@@ -545,10 +545,17 @@
 	/*
 	 * Disallow crazy address masks that give BIOS leeway to pick
 	 * injection address almost anywhere. Insist on page or
-	 * better granularity and that target address is normal RAM.
+	 * better granularity and that target address is normal RAM or
+	 * NVDIMM.
 	 */
-	pfn = PFN_DOWN(param1 & param2);
-	if (!page_is_ram(pfn) || ((param2 & PAGE_MASK) != PAGE_MASK))
+	base_addr = param1 & param2;
+	size = ~param2 + 1;
+
+	if (((param2 & PAGE_MASK) != PAGE_MASK) ||
+	    ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+				!= REGION_INTERSECTS) &&
+	     (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
+				!= REGION_INTERSECTS)))
 		return -EINVAL;
 
 inject:

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index ad6d8c6..35947ac 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c

@@ -469,37 +469,16 @@
 	nfit_mem->bdw = NULL;
 }
 
-static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
+static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
 {
 	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
 	struct nfit_memdev *nfit_memdev;
 	struct nfit_flush *nfit_flush;
-	struct nfit_dcr *nfit_dcr;
 	struct nfit_bdw *nfit_bdw;
 	struct nfit_idt *nfit_idt;
 	u16 idt_idx, range_index;
 
-	list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
-		if (nfit_dcr->dcr->region_index != dcr)
-			continue;
-		nfit_mem->dcr = nfit_dcr->dcr;
-		break;
-	}
-
-	if (!nfit_mem->dcr) {
-		dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n",
-				spa->range_index, __to_nfit_memdev(nfit_mem)
-				? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR");
-		return -ENODEV;
-	}
-
-	/*
-	 * We've found enough to create an nvdimm, optionally
-	 * find an associated BDW
-	 */
-	list_add(&nfit_mem->list, &acpi_desc->dimms);
-
 	list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
 		if (nfit_bdw->bdw->region_index != dcr)
 			continue;
@@ -508,12 +487,12 @@
 	}
 
 	if (!nfit_mem->bdw)
-		return 0;
+		return;
 
 	nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
 
 	if (!nfit_mem->spa_bdw)
-		return 0;
+		return;
 
 	range_index = nfit_mem->spa_bdw->range_index;
 	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
@@ -538,8 +517,6 @@
 		}
 		break;
 	}
-
-	return 0;
 }
 
 static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
@@ -548,7 +525,6 @@
 	struct nfit_mem *nfit_mem, *found;
 	struct nfit_memdev *nfit_memdev;
 	int type = nfit_spa_type(spa);
-	u16 dcr;
 
 	switch (type) {
 	case NFIT_SPA_DCR:
@@ -559,14 +535,18 @@
 	}
 
 	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-		int rc;
+		struct nfit_dcr *nfit_dcr;
+		u32 device_handle;
+		u16 dcr;
 
 		if (nfit_memdev->memdev->range_index != spa->range_index)
 			continue;
 		found = NULL;
 		dcr = nfit_memdev->memdev->region_index;
+		device_handle = nfit_memdev->memdev->device_handle;
 		list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
-			if (__to_nfit_memdev(nfit_mem)->region_index == dcr) {
+			if (__to_nfit_memdev(nfit_mem)->device_handle
+					== device_handle) {
 				found = nfit_mem;
 				break;
 			}
@@ -579,6 +559,31 @@
 			if (!nfit_mem)
 				return -ENOMEM;
 			INIT_LIST_HEAD(&nfit_mem->list);
+			list_add(&nfit_mem->list, &acpi_desc->dimms);
+		}
+
+		list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+			if (nfit_dcr->dcr->region_index != dcr)
+				continue;
+			/*
+			 * Record the control region for the dimm.  For
+			 * the ACPI 6.1 case, where there are separate
+			 * control regions for the pmem vs blk
+			 * interfaces, be sure to record the extended
+			 * blk details.
+			 */
+			if (!nfit_mem->dcr)
+				nfit_mem->dcr = nfit_dcr->dcr;
+			else if (nfit_mem->dcr->windows == 0
+					&& nfit_dcr->dcr->windows)
+				nfit_mem->dcr = nfit_dcr->dcr;
+			break;
+		}
+
+		if (dcr && !nfit_mem->dcr) {
+			dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
+					spa->range_index, dcr);
+			return -ENODEV;
 		}
 
 		if (type == NFIT_SPA_DCR) {
@@ -595,6 +600,7 @@
 				nfit_mem->idt_dcr = nfit_idt->idt;
 				break;
 			}
+			nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
 		} else {
 			/*
 			 * A single dimm may belong to multiple SPA-PM
@@ -603,13 +609,6 @@
 			 */
 			nfit_mem->memdev_pmem = nfit_memdev->memdev;
 		}
-
-		if (found)
-			continue;
-
-		rc = nfit_mem_add(acpi_desc, nfit_mem, spa);
-		if (rc)
-			return rc;
 	}
 
 	return 0;
@@ -1504,9 +1503,7 @@
 		case 1:
 			/* ARS unsupported, but we should never get here */
 			return 0;
-		case 2:
-			return -EINVAL;
-		case 3:
+		case 6:
 			/* ARS is in progress */
 			msleep(1000);
 			break;
@@ -1517,13 +1514,13 @@
 }
 
 static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
-		struct nd_cmd_ars_status *cmd)
+		struct nd_cmd_ars_status *cmd, u32 size)
 {
 	int rc;
 
 	while (1) {
 		rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
-			sizeof(*cmd));
+			size);
 		if (rc || cmd->status & 0xffff)
 			return -ENXIO;
 
@@ -1538,6 +1535,8 @@
 		case 2:
 			/* No ARS performed for the current boot */
 			return 0;
+		case 3:
+			/* TODO: error list overflow support */
 		default:
 			return -ENXIO;
 		}
@@ -1581,6 +1580,7 @@
 	struct nd_cmd_ars_start *ars_start = NULL;
 	struct nd_cmd_ars_cap *ars_cap = NULL;
 	u64 start, len, cur, remaining;
+	u32 ars_status_size;
 	int rc;
 
 	ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
@@ -1590,14 +1590,21 @@
 	start = ndr_desc->res->start;
 	len = ndr_desc->res->end - ndr_desc->res->start + 1;
 
+	/*
+	 * If ARS is unimplemented, unsupported, or if the 'Persistent Memory
+	 * Scrub' flag in extended status is not set, skip this but continue
+	 * initialization
+	 */
 	rc = ars_get_cap(nd_desc, ars_cap, start, len);
+	if (rc == -ENOTTY) {
+		dev_dbg(acpi_desc->dev,
+			"Address Range Scrub is not implemented, won't create an error list\n");
+		rc = 0;
+		goto out;
+	}
 	if (rc)
 		goto out;
 
-	/*
-	 * If ARS is unsupported, or if the 'Persistent Memory Scrub' flag in
-	 * extended status is not set, skip this but continue initialization
-	 */
 	if ((ars_cap->status & 0xffff) ||
 		!(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
 		dev_warn(acpi_desc->dev,
@@ -1610,14 +1617,14 @@
 	 * Check if a full-range ARS has been run. If so, use those results
 	 * without having to start a new ARS.
 	 */
-	ars_status = kzalloc(ars_cap->max_ars_out + sizeof(*ars_status),
-			GFP_KERNEL);
+	ars_status_size = ars_cap->max_ars_out;
+	ars_status = kzalloc(ars_status_size, GFP_KERNEL);
 	if (!ars_status) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
-	rc = ars_get_status(nd_desc, ars_status);
+	rc = ars_get_status(nd_desc, ars_status, ars_status_size);
 	if (rc)
 		goto out;
 
@@ -1647,7 +1654,7 @@
 		if (rc)
 			goto out;
 
-		rc = ars_get_status(nd_desc, ars_status);
+		rc = ars_get_status(nd_desc, ars_status, ars_status_size);
 		if (rc)
 			goto out;
 

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index d30184c..c8e169e 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c

@@ -406,7 +406,7 @@
 		return 0;
 	}
 
-	if (pci_has_managed_irq(dev))
+	if (dev->irq_managed && dev->irq > 0)
 		return 0;
 
 	entry = acpi_pci_irq_lookup(dev, pin);
@@ -451,7 +451,8 @@
 		kfree(entry);
 		return rc;
 	}
-	pci_set_managed_irq(dev, rc);
+	dev->irq = rc;
+	dev->irq_managed = 1;
 
 	if (link)
 		snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -474,9 +475,17 @@
 	u8 pin;
 
 	pin = dev->pin;
-	if (!pin || !pci_has_managed_irq(dev))
+	if (!pin || !dev->irq_managed || dev->irq <= 0)
 		return;
 
+	/* Keep IOAPIC pin configuration when suspending */
+	if (dev->dev.power.is_prepared)
+		return;
+#ifdef	CONFIG_PM
+	if (dev->dev.power.runtime_status == RPM_SUSPENDING)
+		return;
+#endif
+
 	entry = acpi_pci_irq_lookup(dev, pin);
 	if (!entry)
 		return;
@@ -496,6 +505,6 @@
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
 	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
-		pci_reset_managed_irq(dev);
+		dev->irq_managed = 0;
 	}
 }

diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index fa28635..ededa90 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c

@@ -4,7 +4,6 @@
  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2002       Dominik Brodowski <devel@brodo.de>
- *  Copyright (c) 2015, The Linux Foundation. All rights reserved.
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
@@ -438,6 +437,7 @@
  * enabled system.
  */
 
+#define ACPI_MAX_IRQS		256
 #define ACPI_MAX_ISA_IRQ	16
 
 #define PIRQ_PENALTY_PCI_AVAILABLE	(0)
@@ -447,7 +447,7 @@
 #define PIRQ_PENALTY_ISA_USED		(16*16*16*16*16)
 #define PIRQ_PENALTY_ISA_ALWAYS		(16*16*16*16*16*16)
 
-static int acpi_irq_isa_penalty[ACPI_MAX_ISA_IRQ] = {
+static int acpi_irq_penalty[ACPI_MAX_IRQS] = {
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ0 timer */
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ1 keyboard */
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ2 cascade */
@@ -464,68 +464,9 @@
 	PIRQ_PENALTY_ISA_USED,		/* IRQ13 fpe, sometimes */
 	PIRQ_PENALTY_ISA_USED,		/* IRQ14 ide0 */
 	PIRQ_PENALTY_ISA_USED,		/* IRQ15 ide1 */
+	/* >IRQ15 */
 };
 
-struct irq_penalty_info {
-	int irq;
-	int penalty;
-	struct list_head node;
-};
-
-static LIST_HEAD(acpi_irq_penalty_list);
-
-static int acpi_irq_get_penalty(int irq)
-{
-	struct irq_penalty_info *irq_info;
-
-	if (irq < ACPI_MAX_ISA_IRQ)
-		return acpi_irq_isa_penalty[irq];
-
-	list_for_each_entry(irq_info, &acpi_irq_penalty_list, node) {
-		if (irq_info->irq == irq)
-			return irq_info->penalty;
-	}
-
-	return 0;
-}
-
-static int acpi_irq_set_penalty(int irq, int new_penalty)
-{
-	struct irq_penalty_info *irq_info;
-
-	/* see if this is a ISA IRQ */
-	if (irq < ACPI_MAX_ISA_IRQ) {
-		acpi_irq_isa_penalty[irq] = new_penalty;
-		return 0;
-	}
-
-	/* next, try to locate from the dynamic list */
-	list_for_each_entry(irq_info, &acpi_irq_penalty_list, node) {
-		if (irq_info->irq == irq) {
-			irq_info->penalty  = new_penalty;
-			return 0;
-		}
-	}
-
-	/* nope, let's allocate a slot for this IRQ */
-	irq_info = kzalloc(sizeof(*irq_info), GFP_KERNEL);
-	if (!irq_info)
-		return -ENOMEM;
-
-	irq_info->irq = irq;
-	irq_info->penalty = new_penalty;
-	list_add_tail(&irq_info->node, &acpi_irq_penalty_list);
-
-	return 0;
-}
-
-static void acpi_irq_add_penalty(int irq, int penalty)
-{
-	int curpen = acpi_irq_get_penalty(irq);
-
-	acpi_irq_set_penalty(irq, curpen + penalty);
-}
-
 int __init acpi_irq_penalty_init(void)
 {
 	struct acpi_pci_link *link;
@@ -546,16 +487,15 @@
 			    link->irq.possible_count;
 
 			for (i = 0; i < link->irq.possible_count; i++) {
-				if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ) {
-					int irqpos = link->irq.possible[i];
-
-					acpi_irq_add_penalty(irqpos, penalty);
-				}
+				if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ)
+					acpi_irq_penalty[link->irq.
+							 possible[i]] +=
+					    penalty;
 			}
 
 		} else if (link->irq.active) {
-			acpi_irq_add_penalty(link->irq.active,
-					     PIRQ_PENALTY_PCI_POSSIBLE);
+			acpi_irq_penalty[link->irq.active] +=
+			    PIRQ_PENALTY_PCI_POSSIBLE;
 		}
 	}
 
@@ -607,12 +547,12 @@
 		 * the use of IRQs 9, 10, 11, and >15.
 		 */
 		for (i = (link->irq.possible_count - 1); i >= 0; i--) {
-			if (acpi_irq_get_penalty(irq) >
-			    acpi_irq_get_penalty(link->irq.possible[i]))
+			if (acpi_irq_penalty[irq] >
+			    acpi_irq_penalty[link->irq.possible[i]])
 				irq = link->irq.possible[i];
 		}
 	}
-	if (acpi_irq_get_penalty(irq) >= PIRQ_PENALTY_ISA_ALWAYS) {
+	if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
 		printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
 			    "Try pci=noacpi or acpi=off\n",
 			    acpi_device_name(link->device),
@@ -628,8 +568,7 @@
 			    acpi_device_bid(link->device));
 		return -ENODEV;
 	} else {
-		acpi_irq_add_penalty(link->irq.active, PIRQ_PENALTY_PCI_USING);
-
+		acpi_irq_penalty[link->irq.active] += PIRQ_PENALTY_PCI_USING;
 		printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
 		       acpi_device_name(link->device),
 		       acpi_device_bid(link->device), link->irq.active);
@@ -839,7 +778,7 @@
 }
 
 /*
- * modify penalty from cmdline
+ * modify acpi_irq_penalty[] from cmdline
  */
 static int __init acpi_irq_penalty_update(char *str, int used)
 {
@@ -857,10 +796,13 @@
 		if (irq < 0)
 			continue;
 
+		if (irq >= ARRAY_SIZE(acpi_irq_penalty))
+			continue;
+
 		if (used)
-			acpi_irq_add_penalty(irq, PIRQ_PENALTY_ISA_USED);
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
 		else
-			acpi_irq_set_penalty(irq, PIRQ_PENALTY_PCI_AVAILABLE);
+			acpi_irq_penalty[irq] = PIRQ_PENALTY_PCI_AVAILABLE;
 
 		if (retval != 2)	/* no next number */
 			break;
@@ -877,15 +819,18 @@
  */
 void acpi_penalize_isa_irq(int irq, int active)
 {
-	if (irq >= 0)
-		acpi_irq_add_penalty(irq, active ?
-			PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
+	if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
+		if (active)
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
+		else
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
+	}
 }
 
 bool acpi_isa_irq_available(int irq)
 {
-	return irq >= 0 &&
-		(acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
+	return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
+			    acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
 }
 
 /*
@@ -895,18 +840,13 @@
  */
 void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
 {
-	int penalty;
-
-	if (irq < 0)
-		return;
-
-	if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
-	    polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
-		penalty = PIRQ_PENALTY_ISA_ALWAYS;
-	else
-		penalty = PIRQ_PENALTY_PCI_USING;
-
-	acpi_irq_add_penalty(irq, penalty);
+	if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
+		if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
+		    polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_ALWAYS;
+		else
+			acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
+	}
 }
 
 /*

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index a39e85f..7d00b7a 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c

@@ -2074,7 +2074,7 @@
 			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 
-			ptr += sizeof(void *);
+			ptr += sizeof(cookie);
 			list_for_each_entry(w, &proc->delivered_death, entry) {
 				struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work);
 

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 546a369..146dc0b 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c

@@ -367,15 +367,21 @@
 	{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/
 	{ PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
 	{ PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
 	{ PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/
 
 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
@@ -1325,6 +1331,44 @@
 {}
 #endif
 
+#ifdef CONFIG_ARM64
+/*
+ * Due to ERRATA#22536, ThunderX needs to handle HOST_IRQ_STAT differently.
+ * Workaround is to make sure all pending IRQs are served before leaving
+ * handler.
+ */
+static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
+{
+	struct ata_host *host = dev_instance;
+	struct ahci_host_priv *hpriv;
+	unsigned int rc = 0;
+	void __iomem *mmio;
+	u32 irq_stat, irq_masked;
+	unsigned int handled = 1;
+
+	VPRINTK("ENTER\n");
+	hpriv = host->private_data;
+	mmio = hpriv->mmio;
+	irq_stat = readl(mmio + HOST_IRQ_STAT);
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	do {
+		irq_masked = irq_stat & hpriv->port_map;
+		spin_lock(&host->lock);
+		rc = ahci_handle_port_intr(host, irq_masked);
+		if (!rc)
+			handled = 0;
+		writel(irq_stat, mmio + HOST_IRQ_STAT);
+		irq_stat = readl(mmio + HOST_IRQ_STAT);
+		spin_unlock(&host->lock);
+	} while (irq_stat);
+	VPRINTK("EXIT\n");
+
+	return IRQ_RETVAL(handled);
+}
+#endif
+
 /*
  * ahci_init_msix() - optionally enable per-port MSI-X otherwise defer
  * to single msi.
@@ -1560,6 +1604,11 @@
 	if (ahci_broken_devslp(pdev))
 		hpriv->flags |= AHCI_HFLAG_NO_DEVSLP;
 
+#ifdef CONFIG_ARM64
+	if (pdev->vendor == 0x177d && pdev->device == 0xa01c)
+		hpriv->irq_handler = ahci_thunderx_irq_handler;
+#endif
+
 	/* save initial config */
 	ahci_pci_save_initial_config(pdev, hpriv);
 

diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index a44c75d..167ba7e 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h

@@ -240,8 +240,7 @@
 						        error-handling stage) */
 	AHCI_HFLAG_NO_DEVSLP		= (1 << 17), /* no device sleep */
 	AHCI_HFLAG_NO_FBS		= (1 << 18), /* no FBS */
-	AHCI_HFLAG_EDGE_IRQ		= (1 << 19), /* HOST_IRQ_STAT behaves as
-							Edge Triggered */
+
 #ifdef CONFIG_PCI_MSI
 	AHCI_HFLAG_MULTI_MSI		= (1 << 20), /* multiple PCI MSIs */
 	AHCI_HFLAG_MULTI_MSIX		= (1 << 21), /* per-port MSI-X */
@@ -361,6 +360,7 @@
 	 * be overridden anytime before the host is activated.
 	 */
 	void			(*start_engine)(struct ata_port *ap);
+	irqreturn_t 		(*irq_handler)(int irq, void *dev_instance);
 };
 
 #ifdef CONFIG_PCI_MSI
@@ -424,6 +424,7 @@
 void ahci_print_info(struct ata_host *host, const char *scc_s);
 int ahci_host_activate(struct ata_host *host, struct scsi_host_template *sht);
 void ahci_error_handler(struct ata_port *ap);
+u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked);
 
 static inline void __iomem *__ahci_port_base(struct ata_host *host,
 					     unsigned int port_no)

diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index e2c6d9e..8e3f7fa 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c

@@ -548,6 +548,88 @@
 	return rc;
 }
 
+/**
+ * xgene_ahci_handle_broken_edge_irq - Handle the broken irq.
+ * @ata_host: Host that recieved the irq
+ * @irq_masked: HOST_IRQ_STAT value
+ *
+ * For hardware with broken edge trigger latch
+ * the HOST_IRQ_STAT register misses the edge interrupt
+ * when clearing of HOST_IRQ_STAT register and hardware
+ * reporting the PORT_IRQ_STAT register at the
+ * same clock cycle.
+ * As such, the algorithm below outlines the workaround.
+ *
+ * 1. Read HOST_IRQ_STAT register and save the state.
+ * 2. Clear the HOST_IRQ_STAT register.
+ * 3. Read back the HOST_IRQ_STAT register.
+ * 4. If HOST_IRQ_STAT register equals to zero, then
+ *    traverse the rest of port's PORT_IRQ_STAT register
+ *    to check if an interrupt is triggered at that point else
+ *    go to step 6.
+ * 5. If PORT_IRQ_STAT register of rest ports is not equal to zero
+ *    then update the state of HOST_IRQ_STAT saved in step 1.
+ * 6. Handle port interrupts.
+ * 7. Exit
+ */
+static int xgene_ahci_handle_broken_edge_irq(struct ata_host *host,
+					     u32 irq_masked)
+{
+	struct ahci_host_priv *hpriv = host->private_data;
+	void __iomem *port_mmio;
+	int i;
+
+	if (!readl(hpriv->mmio + HOST_IRQ_STAT)) {
+		for (i = 0; i < host->n_ports; i++) {
+			if (irq_masked & (1 << i))
+				continue;
+
+			port_mmio = ahci_port_base(host->ports[i]);
+			if (readl(port_mmio + PORT_IRQ_STAT))
+				irq_masked |= (1 << i);
+		}
+	}
+
+	return ahci_handle_port_intr(host, irq_masked);
+}
+
+static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
+{
+	struct ata_host *host = dev_instance;
+	struct ahci_host_priv *hpriv;
+	unsigned int rc = 0;
+	void __iomem *mmio;
+	u32 irq_stat, irq_masked;
+
+	VPRINTK("ENTER\n");
+
+	hpriv = host->private_data;
+	mmio = hpriv->mmio;
+
+	/* sigh.  0xffffffff is a valid return from h/w */
+	irq_stat = readl(mmio + HOST_IRQ_STAT);
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	irq_masked = irq_stat & hpriv->port_map;
+
+	spin_lock(&host->lock);
+
+	/*
+	 * HOST_IRQ_STAT behaves as edge triggered latch meaning that
+	 * it should be cleared before all the port events are cleared.
+	 */
+	writel(irq_stat, mmio + HOST_IRQ_STAT);
+
+	rc = xgene_ahci_handle_broken_edge_irq(host, irq_masked);
+
+	spin_unlock(&host->lock);
+
+	VPRINTK("EXIT\n");
+
+	return IRQ_RETVAL(rc);
+}
+
 static struct ata_port_operations xgene_ahci_v1_ops = {
 	.inherits = &ahci_ops,
 	.host_stop = xgene_ahci_host_stop,
@@ -779,7 +861,8 @@
 		hpriv->flags = AHCI_HFLAG_NO_NCQ;
 		break;
 	case XGENE_AHCI_V2:
-		hpriv->flags |= AHCI_HFLAG_YES_FBS | AHCI_HFLAG_EDGE_IRQ;
+		hpriv->flags |= AHCI_HFLAG_YES_FBS;
+		hpriv->irq_handler = xgene_ahci_irq_intr;
 		break;
 	default:
 		break;

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 4029679..85ea514 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c

@@ -113,6 +113,7 @@
 				    const char *buf, size_t size);
 static ssize_t ahci_show_em_supported(struct device *dev,
 				      struct device_attribute *attr, char *buf);
+static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance);
 
 static DEVICE_ATTR(ahci_host_caps, S_IRUGO, ahci_show_host_caps, NULL);
 static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL);
@@ -512,6 +513,9 @@
 
 	if (!hpriv->start_engine)
 		hpriv->start_engine = ahci_start_engine;
+
+	if (!hpriv->irq_handler)
+		hpriv->irq_handler = ahci_single_level_irq_intr;
 }
 EXPORT_SYMBOL_GPL(ahci_save_initial_config);
 
@@ -1164,8 +1168,7 @@
 
 	/* mark esata ports */
 	tmp = readl(port_mmio + PORT_CMD);
-	if ((tmp & PORT_CMD_HPCP) ||
-	    ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)))
+	if ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS))
 		ap->pflags |= ATA_PFLAG_EXTERNAL;
 }
 
@@ -1846,7 +1849,7 @@
 	return IRQ_HANDLED;
 }
 
-static u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
+u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
 {
 	unsigned int i, handled = 0;
 
@@ -1872,43 +1875,7 @@
 
 	return handled;
 }
-
-static irqreturn_t ahci_single_edge_irq_intr(int irq, void *dev_instance)
-{
-	struct ata_host *host = dev_instance;
-	struct ahci_host_priv *hpriv;
-	unsigned int rc = 0;
-	void __iomem *mmio;
-	u32 irq_stat, irq_masked;
-
-	VPRINTK("ENTER\n");
-
-	hpriv = host->private_data;
-	mmio = hpriv->mmio;
-
-	/* sigh.  0xffffffff is a valid return from h/w */
-	irq_stat = readl(mmio + HOST_IRQ_STAT);
-	if (!irq_stat)
-		return IRQ_NONE;
-
-	irq_masked = irq_stat & hpriv->port_map;
-
-	spin_lock(&host->lock);
-
-	/*
-	 * HOST_IRQ_STAT behaves as edge triggered latch meaning that
-	 * it should be cleared before all the port events are cleared.
-	 */
-	writel(irq_stat, mmio + HOST_IRQ_STAT);
-
-	rc = ahci_handle_port_intr(host, irq_masked);
-
-	spin_unlock(&host->lock);
-
-	VPRINTK("EXIT\n");
-
-	return IRQ_RETVAL(rc);
-}
+EXPORT_SYMBOL_GPL(ahci_handle_port_intr);
 
 static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
 {
@@ -2535,14 +2502,18 @@
 	int irq = hpriv->irq;
 	int rc;
 
-	if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX))
+	if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX)) {
+		if (hpriv->irq_handler)
+			dev_warn(host->dev, "both AHCI_HFLAG_MULTI_MSI flag set \
+				 and custom irq handler implemented\n");
+
 		rc = ahci_host_activate_multi_irqs(host, sht);
-	else if (hpriv->flags & AHCI_HFLAG_EDGE_IRQ)
-		rc = ata_host_activate(host, irq, ahci_single_edge_irq_intr,
+	} else {
+		rc = ata_host_activate(host, irq, hpriv->irq_handler,
 				       IRQF_SHARED, sht);
-	else
-		rc = ata_host_activate(host, irq, ahci_single_level_irq_intr,
-				       IRQF_SHARED, sht);
+	}
+
+
 	return rc;
 }
 EXPORT_SYMBOL_GPL(ahci_host_activate);

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7e959f9..e417e1a 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c

@@ -675,19 +675,18 @@
 int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
 		     int cmd, void __user *arg)
 {
-	int val = -EINVAL, rc = -EINVAL;
+	unsigned long val;
+	int rc = -EINVAL;
 	unsigned long flags;
 
 	switch (cmd) {
-	case ATA_IOC_GET_IO32:
+	case HDIO_GET_32BIT:
 		spin_lock_irqsave(ap->lock, flags);
 		val = ata_ioc32(ap);
 		spin_unlock_irqrestore(ap->lock, flags);
-		if (copy_to_user(arg, &val, 1))
-			return -EFAULT;
-		return 0;
+		return put_user(val, (unsigned long __user *)arg);
 
-	case ATA_IOC_SET_IO32:
+	case HDIO_SET_32BIT:
 		val = (unsigned long) arg;
 		rc = 0;
 		spin_lock_irqsave(ap->lock, flags);

diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index 12fe0f3..c8b6a78 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c

@@ -32,6 +32,8 @@
 #include <linux/libata.h>
 #include <scsi/scsi_host.h>
 
+#include <asm/mach-rc32434/rb.h>
+
 #define DRV_NAME	"pata-rb532-cf"
 #define DRV_VERSION	"0.1.0"
 #define DRV_DESC	"PATA driver for RouterBOARD 532 Compact Flash"
@@ -107,6 +109,7 @@
 	int gpio;
 	struct resource *res;
 	struct ata_host *ah;
+	struct cf_device *pdata;
 	struct rb532_cf_info *info;
 	int ret;
 
@@ -122,7 +125,13 @@
 		return -ENOENT;
 	}
 
-	gpio = irq_to_gpio(irq);
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform data specified\n");
+		return -EINVAL;
+	}
+
+	gpio = pdata->gpio_pin;
 	if (gpio < 0) {
 		dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq);
 		return -ENOENT;

diff --git a/drivers/base/property.c b/drivers/base/property.c
index c359351..a163f2c5 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c

@@ -218,7 +218,7 @@
 	bool ret;
 
 	ret = __fwnode_property_present(fwnode, propname);
-	if (ret == false && fwnode && fwnode->secondary)
+	if (ret == false && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
 		ret = __fwnode_property_present(fwnode->secondary, propname);
 	return ret;
 }
@@ -423,7 +423,7 @@
 	int _ret_;									\
 	_ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_,		\
 				 _val_, _nval_);					\
-	if (_ret_ == -EINVAL && _fwnode_ && _fwnode_->secondary)			\
+	if (_ret_ == -EINVAL && _fwnode_ && !IS_ERR_OR_NULL(_fwnode_->secondary))	\
 		_ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_,	\
 				_proptype_, _val_, _nval_);				\
 	_ret_;										\
@@ -593,7 +593,7 @@
 	int ret;
 
 	ret = __fwnode_property_read_string_array(fwnode, propname, val, nval);
-	if (ret == -EINVAL && fwnode && fwnode->secondary)
+	if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
 		ret = __fwnode_property_read_string_array(fwnode->secondary,
 							  propname, val, nval);
 	return ret;
@@ -621,7 +621,7 @@
 	int ret;
 
 	ret = __fwnode_property_read_string(fwnode, propname, val);
-	if (ret == -EINVAL && fwnode && fwnode->secondary)
+	if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
 		ret = __fwnode_property_read_string(fwnode->secondary,
 						    propname, val);
 	return ret;

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d0da5d8..b583e53 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c

@@ -1819,6 +1819,28 @@
 EXPORT_SYMBOL(get_random_int);
 
 /*
+ * Same as get_random_int(), but returns unsigned long.
+ */
+unsigned long get_random_long(void)
+{
+	__u32 *hash;
+	unsigned long ret;
+
+	if (arch_get_random_long(&ret))
+		return ret;
+
+	hash = get_cpu_var(get_random_int_hash);
+
+	hash[0] += current->pid + jiffies + random_get_entropy();
+	md5_transform(hash, random_int_secret);
+	ret = *(unsigned long *)hash;
+	put_cpu_var(get_random_int_hash);
+
+	return ret;
+}
+EXPORT_SYMBOL(get_random_long);
+
+/*
  * randomize_range() returns a start address such that
  *
  *    [...... <range> .....]

diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index 1c30038..cc73929 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c

@@ -460,7 +460,8 @@
 
 	parent = clk_hw_get_parent(hw);
 
-	if (clk_hw_get_rate(hw) == clk_get_rate(dd->clk_bypass)) {
+	if (clk_hw_get_rate(hw) ==
+	    clk_hw_get_rate(__clk_get_hw(dd->clk_bypass))) {
 		WARN_ON(parent != __clk_get_hw(dd->clk_bypass));
 		r = _omap3_noncore_dpll_bypass(clk);
 	} else {

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 33db740..c346be6 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig

@@ -160,6 +160,7 @@
 config CLKSRC_LPC32XX
 	bool "Clocksource for LPC32XX" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS && HAS_IOMEM
+	depends on ARM
 	select CLKSRC_MMIO
 	select CLKSRC_OF
 	help

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index c64d543..f0dd9d4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c

@@ -32,6 +32,14 @@
 #define CNTTIDR		0x08
 #define CNTTIDR_VIRT(n)	(BIT(1) << ((n) * 4))
 
+#define CNTACR(n)	(0x40 + ((n) * 4))
+#define CNTACR_RPCT	BIT(0)
+#define CNTACR_RVCT	BIT(1)
+#define CNTACR_RFRQ	BIT(2)
+#define CNTACR_RVOFF	BIT(3)
+#define CNTACR_RWVT	BIT(4)
+#define CNTACR_RWPT	BIT(5)
+
 #define CNTVCT_LO	0x08
 #define CNTVCT_HI	0x0c
 #define CNTFRQ		0x10
@@ -266,10 +274,12 @@
 		if (arch_timer_use_virtual) {
 			clk->irq = arch_timer_ppi[VIRT_PPI];
 			clk->set_state_shutdown = arch_timer_shutdown_virt;
+			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt;
 			clk->set_next_event = arch_timer_set_next_event_virt;
 		} else {
 			clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
 			clk->set_state_shutdown = arch_timer_shutdown_phys;
+			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys;
 			clk->set_next_event = arch_timer_set_next_event_phys;
 		}
 	} else {
@@ -279,10 +289,12 @@
 		clk->cpumask = cpu_all_mask;
 		if (arch_timer_mem_use_virtual) {
 			clk->set_state_shutdown = arch_timer_shutdown_virt_mem;
+			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem;
 			clk->set_next_event =
 				arch_timer_set_next_event_virt_mem;
 		} else {
 			clk->set_state_shutdown = arch_timer_shutdown_phys_mem;
+			clk->set_state_oneshot_stopped = arch_timer_shutdown_phys_mem;
 			clk->set_next_event =
 				arch_timer_set_next_event_phys_mem;
 		}
@@ -757,7 +769,6 @@
 	}
 
 	cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
-	iounmap(cntctlbase);
 
 	/*
 	 * Try to find a virtual capable frame. Otherwise fall back to a
@@ -765,20 +776,31 @@
 	 */
 	for_each_available_child_of_node(np, frame) {
 		int n;
+		u32 cntacr;
 
 		if (of_property_read_u32(frame, "frame-number", &n)) {
 			pr_err("arch_timer: Missing frame-number\n");
-			of_node_put(best_frame);
 			of_node_put(frame);
-			return;
+			goto out;
 		}
 
-		if (cnttidr & CNTTIDR_VIRT(n)) {
+		/* Try enabling everything, and see what sticks */
+		cntacr = CNTACR_RFRQ | CNTACR_RWPT | CNTACR_RPCT |
+			 CNTACR_RWVT | CNTACR_RVOFF | CNTACR_RVCT;
+		writel_relaxed(cntacr, cntctlbase + CNTACR(n));
+		cntacr = readl_relaxed(cntctlbase + CNTACR(n));
+
+		if ((cnttidr & CNTTIDR_VIRT(n)) &&
+		    !(~cntacr & (CNTACR_RWVT | CNTACR_RVCT))) {
 			of_node_put(best_frame);
 			best_frame = frame;
 			arch_timer_mem_use_virtual = true;
 			break;
 		}
+
+		if (~cntacr & (CNTACR_RWPT | CNTACR_RPCT))
+			continue;
+
 		of_node_put(best_frame);
 		best_frame = of_node_get(frame);
 	}
@@ -786,24 +808,26 @@
 	base = arch_counter_base = of_iomap(best_frame, 0);
 	if (!base) {
 		pr_err("arch_timer: Can't map frame's registers\n");
-		of_node_put(best_frame);
-		return;
+		goto out;
 	}
 
 	if (arch_timer_mem_use_virtual)
 		irq = irq_of_parse_and_map(best_frame, 1);
 	else
 		irq = irq_of_parse_and_map(best_frame, 0);
-	of_node_put(best_frame);
+
 	if (!irq) {
 		pr_err("arch_timer: Frame missing %s irq",
 		       arch_timer_mem_use_virtual ? "virt" : "phys");
-		return;
+		goto out;
 	}
 
 	arch_timer_detect_rate(base, np);
 	arch_timer_mem_register(base, irq);
 	arch_timer_common_init();
+out:
+	iounmap(cntctlbase);
+	of_node_put(best_frame);
 }
 CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
 		       arch_timer_mem_init);

diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index d189d8c..9df0d16 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c

@@ -16,6 +16,7 @@
 #include <linux/clockchips.h>
 #include <linux/cpu.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/of.h>
@@ -174,6 +175,7 @@
 	clk->set_state_shutdown = gt_clockevent_shutdown;
 	clk->set_state_periodic = gt_clockevent_set_periodic;
 	clk->set_state_oneshot = gt_clockevent_shutdown;
+	clk->set_state_oneshot_stopped = gt_clockevent_shutdown;
 	clk->set_next_event = gt_clockevent_set_next_event;
 	clk->cpumask = cpumask_of(cpu);
 	clk->rating = 300;
@@ -221,6 +223,21 @@
 }
 #endif
 
+static unsigned long gt_read_long(void)
+{
+	return readl_relaxed(gt_base + GT_COUNTER0);
+}
+
+static struct delay_timer gt_delay_timer = {
+	.read_current_timer = gt_read_long,
+};
+
+static void __init gt_delay_timer_init(void)
+{
+	gt_delay_timer.freq = gt_clk_rate;
+	register_current_timer_delay(&gt_delay_timer);
+}
+
 static void __init gt_clocksource_init(void)
 {
 	writel(0, gt_base + GT_CONTROL);
@@ -317,6 +334,7 @@
 	/* Immediately configure the timer on the boot CPU */
 	gt_clocksource_init();
 	gt_clockevents_init(this_cpu_ptr(gt_evt));
+	gt_delay_timer_init();
 
 	return;
 

diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index ff44082..be09bc0 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c

@@ -313,6 +313,7 @@
 	.set_state_periodic	= mct_set_state_periodic,
 	.set_state_shutdown	= mct_set_state_shutdown,
 	.set_state_oneshot	= mct_set_state_shutdown,
+	.set_state_oneshot_stopped = mct_set_state_shutdown,
 	.tick_resume		= mct_set_state_shutdown,
 };
 
@@ -452,6 +453,7 @@
 	evt->set_state_periodic = set_state_periodic;
 	evt->set_state_shutdown = set_state_shutdown;
 	evt->set_state_oneshot = set_state_shutdown;
+	evt->set_state_oneshot_stopped = set_state_shutdown;
 	evt->tick_resume = set_state_shutdown;
 	evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
 	evt->rating = 450;

diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c
index 8c77a52..b991b28 100644
--- a/drivers/clocksource/rockchip_timer.c
+++ b/drivers/clocksource/rockchip_timer.c

@@ -122,23 +122,23 @@
 	pclk = of_clk_get_by_name(np, "pclk");
 	if (IS_ERR(pclk)) {
 		pr_err("Failed to get pclk for '%s'\n", TIMER_NAME);
-		return;
+		goto out_unmap;
 	}
 
 	if (clk_prepare_enable(pclk)) {
 		pr_err("Failed to enable pclk for '%s'\n", TIMER_NAME);
-		return;
+		goto out_unmap;
 	}
 
 	timer_clk = of_clk_get_by_name(np, "timer");
 	if (IS_ERR(timer_clk)) {
 		pr_err("Failed to get timer clock for '%s'\n", TIMER_NAME);
-		return;
+		goto out_timer_clk;
 	}
 
 	if (clk_prepare_enable(timer_clk)) {
 		pr_err("Failed to enable timer clock\n");
-		return;
+		goto out_timer_clk;
 	}
 
 	bc_timer.freq = clk_get_rate(timer_clk);
@@ -146,7 +146,7 @@
 	irq = irq_of_parse_and_map(np, 0);
 	if (!irq) {
 		pr_err("Failed to map interrupts for '%s'\n", TIMER_NAME);
-		return;
+		goto out_irq;
 	}
 
 	ce->name = TIMER_NAME;
@@ -164,10 +164,19 @@
 	ret = request_irq(irq, rk_timer_interrupt, IRQF_TIMER, TIMER_NAME, ce);
 	if (ret) {
 		pr_err("Failed to initialize '%s': %d\n", TIMER_NAME, ret);
-		return;
+		goto out_irq;
 	}
 
 	clockevents_config_and_register(ce, bc_timer.freq, 1, UINT_MAX);
+
+	return;
+
+out_irq:
+	clk_disable_unprepare(timer_clk);
+out_timer_clk:
+	clk_disable_unprepare(pclk);
+out_unmap:
+	iounmap(bc_timer.base);
 }
 
 CLOCKSOURCE_OF_DECLARE(rk_timer, "rockchip,rk3288-timer", rk_timer_init);

diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c
index 1316876..daae61e 100644
--- a/drivers/clocksource/time-lpc32xx.c
+++ b/drivers/clocksource/time-lpc32xx.c

@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/clockchips.h>
 #include <linux/clocksource.h>
+#include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
@@ -43,6 +44,7 @@
 struct lpc32xx_clock_event_ddata {
 	struct clock_event_device evtdev;
 	void __iomem *base;
+	u32 ticks_per_jiffy;
 };
 
 /* Needed for the sched clock */
@@ -53,6 +55,15 @@
 	return readl(clocksource_timer_counter);
 }
 
+static unsigned long lpc32xx_delay_timer_read(void)
+{
+	return readl(clocksource_timer_counter);
+}
+
+static struct delay_timer lpc32xx_delay_timer = {
+	.read_current_timer = lpc32xx_delay_timer_read,
+};
+
 static int lpc32xx_clkevt_next_event(unsigned long delta,
 				     struct clock_event_device *evtdev)
 {
@@ -60,14 +71,13 @@
 		container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev);
 
 	/*
-	 * Place timer in reset and program the delta in the prescale
-	 * register (PR). When the prescale counter matches the value
-	 * in PR the counter register is incremented and the compare
-	 * match will trigger. After setup the timer is released from
-	 * reset and enabled.
+	 * Place timer in reset and program the delta in the match
+	 * channel 0 (MR0). When the timer counter matches the value
+	 * in MR0 register the match will trigger an interrupt.
+	 * After setup the timer is released from reset and enabled.
 	 */
 	writel_relaxed(LPC32XX_TIMER_TCR_CRST, ddata->base + LPC32XX_TIMER_TCR);
-	writel_relaxed(delta, ddata->base + LPC32XX_TIMER_PR);
+	writel_relaxed(delta, ddata->base + LPC32XX_TIMER_MR0);
 	writel_relaxed(LPC32XX_TIMER_TCR_CEN, ddata->base + LPC32XX_TIMER_TCR);
 
 	return 0;
@@ -86,11 +96,39 @@
 
 static int lpc32xx_clkevt_oneshot(struct clock_event_device *evtdev)
 {
+	struct lpc32xx_clock_event_ddata *ddata =
+		container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev);
+
 	/*
 	 * When using oneshot, we must also disable the timer
 	 * to wait for the first call to set_next_event().
 	 */
-	return lpc32xx_clkevt_shutdown(evtdev);
+	writel_relaxed(0, ddata->base + LPC32XX_TIMER_TCR);
+
+	/* Enable interrupt, reset on match and stop on match (MCR). */
+	writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R |
+		       LPC32XX_TIMER_MCR_MR0S, ddata->base + LPC32XX_TIMER_MCR);
+	return 0;
+}
+
+static int lpc32xx_clkevt_periodic(struct clock_event_device *evtdev)
+{
+	struct lpc32xx_clock_event_ddata *ddata =
+		container_of(evtdev, struct lpc32xx_clock_event_ddata, evtdev);
+
+	/* Enable interrupt and reset on match. */
+	writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R,
+		       ddata->base + LPC32XX_TIMER_MCR);
+
+	/*
+	 * Place timer in reset and program the delta in the match
+	 * channel 0 (MR0).
+	 */
+	writel_relaxed(LPC32XX_TIMER_TCR_CRST, ddata->base + LPC32XX_TIMER_TCR);
+	writel_relaxed(ddata->ticks_per_jiffy, ddata->base + LPC32XX_TIMER_MR0);
+	writel_relaxed(LPC32XX_TIMER_TCR_CEN, ddata->base + LPC32XX_TIMER_TCR);
+
+	return 0;
 }
 
 static irqreturn_t lpc32xx_clock_event_handler(int irq, void *dev_id)
@@ -108,11 +146,13 @@
 static struct lpc32xx_clock_event_ddata lpc32xx_clk_event_ddata = {
 	.evtdev = {
 		.name			= "lpc3220 clockevent",
-		.features		= CLOCK_EVT_FEAT_ONESHOT,
+		.features		= CLOCK_EVT_FEAT_ONESHOT |
+					  CLOCK_EVT_FEAT_PERIODIC,
 		.rating			= 300,
 		.set_next_event		= lpc32xx_clkevt_next_event,
 		.set_state_shutdown	= lpc32xx_clkevt_shutdown,
 		.set_state_oneshot	= lpc32xx_clkevt_oneshot,
+		.set_state_periodic	= lpc32xx_clkevt_periodic,
 	},
 };
 
@@ -162,6 +202,8 @@
 	}
 
 	clocksource_timer_counter = base + LPC32XX_TIMER_TC;
+	lpc32xx_delay_timer.freq = rate;
+	register_current_timer_delay(&lpc32xx_delay_timer);
 	sched_clock_register(lpc32xx_read_sched_clock, 32, rate);
 
 	return 0;
@@ -210,18 +252,16 @@
 
 	/*
 	 * Disable timer and clear any pending interrupt (IR) on match
-	 * channel 0 (MR0). Configure a compare match value of 1 on MR0
-	 * and enable interrupt, reset on match and stop on match (MCR).
+	 * channel 0 (MR0). Clear the prescaler as it's not used.
 	 */
 	writel_relaxed(0, base + LPC32XX_TIMER_TCR);
+	writel_relaxed(0, base + LPC32XX_TIMER_PR);
 	writel_relaxed(0, base + LPC32XX_TIMER_CTCR);
 	writel_relaxed(LPC32XX_TIMER_IR_MR0INT, base + LPC32XX_TIMER_IR);
-	writel_relaxed(1, base + LPC32XX_TIMER_MR0);
-	writel_relaxed(LPC32XX_TIMER_MCR_MR0I | LPC32XX_TIMER_MCR_MR0R |
-		       LPC32XX_TIMER_MCR_MR0S, base + LPC32XX_TIMER_MCR);
 
 	rate = clk_get_rate(clk);
 	lpc32xx_clk_event_ddata.base = base;
+	lpc32xx_clk_event_ddata.ticks_per_jiffy = DIV_ROUND_CLOSEST(rate, HZ);
 	clockevents_config_and_register(&lpc32xx_clk_event_ddata.evtdev,
 					rate, 1, -1);
 

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 659879a..f935110 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig

@@ -296,6 +296,7 @@
 config QORIQ_CPUFREQ
 	tristate "CPU frequency scaling driver for Freescale QorIQ SoCs"
 	depends on OF && COMMON_CLK && (PPC_E500MC || ARM)
+	depends on !CPU_THERMAL || THERMAL
 	select CLK_QORIQ
 	help
 	  This adds the CPUFreq driver support for Freescale QorIQ SoCs

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 0031069..14b1f93 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm

@@ -84,10 +84,10 @@
 	  SoCs.
 
 config ARM_MT8173_CPUFREQ
-	bool "Mediatek MT8173 CPUFreq support"
+	tristate "Mediatek MT8173 CPUFreq support"
 	depends on ARCH_MEDIATEK && REGULATOR
 	depends on ARM64 || (ARM_CPU_TOPOLOGY && COMPILE_TEST)
-	depends on !CPU_THERMAL || THERMAL=y
+	depends on !CPU_THERMAL || THERMAL
 	select PM_OPP
 	help
 	  This adds the CPUFreq driver support for Mediatek MT8173 SoC.

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cd83d47..3a4b39a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c

@@ -1431,7 +1431,7 @@
 	if (!all_cpu_data)
 		return -ENOMEM;
 
-	if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
+	if (static_cpu_has(X86_FEATURE_HWP) && !no_hwp) {
 		pr_info("intel_pstate: HWP enabled\n");
 		hwp_active++;
 	}

diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 1efba34..2058e6d 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c

@@ -17,6 +17,7 @@
 #include <linux/cpu_cooling.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>

diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
index 848b93ee..fe9dce0 100644
--- a/drivers/devfreq/tegra-devfreq.c
+++ b/drivers/devfreq/tegra-devfreq.c

@@ -500,6 +500,8 @@
 	clk_set_min_rate(tegra->emc_clock, rate);
 	clk_set_rate(tegra->emc_clock, 0);
 
+	*freq = rate;
+
 	return 0;
 }
 

diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 64f5d1b..8e304b1 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c

@@ -176,6 +176,7 @@
 #define AT_XDMAC_MAX_CHAN	0x20
 #define AT_XDMAC_MAX_CSIZE	16	/* 16 data */
 #define AT_XDMAC_MAX_DWIDTH	8	/* 64 bits */
+#define AT_XDMAC_RESIDUE_MAX_RETRIES	5
 
 #define AT_XDMAC_DMA_BUSWIDTHS\
 	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
@@ -1395,8 +1396,8 @@
 	struct at_xdmac_desc	*desc, *_desc;
 	struct list_head	*descs_list;
 	enum dma_status		ret;
-	int			residue;
-	u32			cur_nda, mask, value;
+	int			residue, retry;
+	u32			cur_nda, check_nda, cur_ubc, mask, value;
 	u8			dwidth = 0;
 	unsigned long		flags;
 
@@ -1433,7 +1434,42 @@
 			cpu_relax();
 	}
 
+	/*
+	 * When processing the residue, we need to read two registers but we
+	 * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
+	 * we stand in the descriptor list and AT_XDMAC_CUBC is used
+	 * to know how many data are remaining for the current descriptor.
+	 * Since the dma channel is not paused to not loose data, between the
+	 * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
+	 * descriptor.
+	 * For that reason, after reading AT_XDMAC_CUBC, we check if we are
+	 * still using the same descriptor by reading a second time
+	 * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
+	 * read again AT_XDMAC_CUBC.
+	 * Memory barriers are used to ensure the read order of the registers.
+	 * A max number of retries is set because unlikely it can never ends if
+	 * we are transferring a lot of data with small buffers.
+	 */
 	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+	rmb();
+	cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
+		rmb();
+		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+
+		if (likely(cur_nda == check_nda))
+			break;
+
+		cur_nda = check_nda;
+		rmb();
+		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+	}
+
+	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
+		ret = DMA_ERROR;
+		goto spin_unlock;
+	}
+
 	/*
 	 * Remove size of all microblocks already transferred and the current
 	 * one. Then add the remaining size to transfer of the current
@@ -1446,7 +1482,7 @@
 		if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
 			break;
 	}
-	residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth;
+	residue += cur_ubc << dwidth;
 
 	dma_set_residue(txstate, residue);
 

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 2209f75..aac85c3 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c

@@ -522,6 +522,8 @@
 			chan_dbg(chan, "LD %p callback\n", desc);
 			txd->callback(txd->callback_param);
 		}
+
+		dma_descriptor_unmap(txd);
 	}
 
 	/* Run any dependencies */

diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index e4f4312..f039cfa 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c

@@ -1300,10 +1300,10 @@
 	 * note: writecombine gives slightly better performance, but
 	 * requires that we explicitly flush the writes
 	 */
-	adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
-							  plat_data->pool_size,
-							  &adev->dma_desc_pool,
-							  GFP_KERNEL);
+	adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev,
+						plat_data->pool_size,
+						&adev->dma_desc_pool,
+						GFP_KERNEL);
 	if (!adev->dma_desc_pool_virt) {
 		ret = -ENOMEM;
 		goto err_free_adev;

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 14091f8..3922a5d 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c

@@ -964,8 +964,8 @@
 	 * requires that we explicitly flush the writes
 	 */
 	mv_chan->dma_desc_pool_virt =
-	  dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE,
-				 &mv_chan->dma_desc_pool, GFP_KERNEL);
+	  dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool,
+		       GFP_KERNEL);
 	if (!mv_chan->dma_desc_pool_virt)
 		return ERR_PTR(-ENOMEM);
 

diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index f2a0310..debca82 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c

@@ -583,6 +583,8 @@
 		(PXA_DCMD_LENGTH & sizeof(u32));
 	if (flags & DMA_PREP_INTERRUPT)
 		updater->dcmd |= PXA_DCMD_ENDIRQEN;
+	if (sw_desc->cyclic)
+		sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first;
 }
 
 static bool is_desc_completed(struct virt_dma_desc *vd)
@@ -673,6 +675,10 @@
 		dev_dbg(&chan->vc.chan.dev->device,
 			"%s(): checking txd %p[%x]: completed=%d\n",
 			__func__, vd, vd->tx.cookie, is_desc_completed(vd));
+		if (to_pxad_sw_desc(vd)->cyclic) {
+			vchan_cyclic_callback(vd);
+			break;
+		}
 		if (is_desc_completed(vd)) {
 			list_del(&vd->node);
 			vchan_cookie_complete(vd);
@@ -1080,7 +1086,7 @@
 		return NULL;
 
 	pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
-	dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH | period_len);
+	dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len);
 	dev_dbg(&chan->vc.chan.dev->device,
 		"%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n",
 		__func__, (unsigned long)buf_addr, len, period_len, dir, flags);

diff --git a/drivers/dma/qcom_bam_dma.c b/drivers/dma/qcom_bam_dma.c
index 5a250cd..d34aef7 100644
--- a/drivers/dma/qcom_bam_dma.c
+++ b/drivers/dma/qcom_bam_dma.c

@@ -502,8 +502,8 @@
 		return 0;
 
 	/* allocate FIFO descriptor space, but only if necessary */
-	bchan->fifo_virt = dma_alloc_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
-				&bchan->fifo_phys, GFP_KERNEL);
+	bchan->fifo_virt = dma_alloc_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+					&bchan->fifo_phys, GFP_KERNEL);
 
 	if (!bchan->fifo_virt) {
 		dev_err(bdev->dev, "Failed to allocate desc fifo\n");
@@ -538,8 +538,8 @@
 	bam_reset_channel(bchan);
 	spin_unlock_irqrestore(&bchan->vc.lock, flags);
 
-	dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
-				bchan->fifo_phys);
+	dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
+		    bchan->fifo_phys);
 	bchan->fifo_virt = NULL;
 
 	/* mask irq for pipe/channel */
@@ -1231,9 +1231,9 @@
 		bam_dma_terminate_all(&bdev->channels[i].vc.chan);
 		tasklet_kill(&bdev->channels[i].vc.task);
 
-		dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
-			bdev->channels[i].fifo_virt,
-			bdev->channels[i].fifo_phys);
+		dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+			    bdev->channels[i].fifo_virt,
+			    bdev->channels[i].fifo_phys);
 	}
 
 	tasklet_kill(&bdev->task);

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index e3a945c..49768c0 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c

@@ -147,6 +147,135 @@
 	"Status Register File",
 };
 
+/* Scalable MCA error strings */
+static const char * const f17h_ls_mce_desc[] = {
+	"Load queue parity",
+	"Store queue parity",
+	"Miss address buffer payload parity",
+	"L1 TLB parity",
+	"",						/* reserved */
+	"DC tag error type 6",
+	"DC tag error type 1",
+	"Internal error type 1",
+	"Internal error type 2",
+	"Sys Read data error thread 0",
+	"Sys read data error thread 1",
+	"DC tag error type 2",
+	"DC data error type 1 (poison comsumption)",
+	"DC data error type 2",
+	"DC data error type 3",
+	"DC tag error type 4",
+	"L2 TLB parity",
+	"PDC parity error",
+	"DC tag error type 3",
+	"DC tag error type 5",
+	"L2 fill data error",
+};
+
+static const char * const f17h_if_mce_desc[] = {
+	"microtag probe port parity error",
+	"IC microtag or full tag multi-hit error",
+	"IC full tag parity",
+	"IC data array parity",
+	"Decoupling queue phys addr parity error",
+	"L0 ITLB parity error",
+	"L1 ITLB parity error",
+	"L2 ITLB parity error",
+	"BPQ snoop parity on Thread 0",
+	"BPQ snoop parity on Thread 1",
+	"L1 BTB multi-match error",
+	"L2 BTB multi-match error",
+};
+
+static const char * const f17h_l2_mce_desc[] = {
+	"L2M tag multi-way-hit error",
+	"L2M tag ECC error",
+	"L2M data ECC error",
+	"HW assert",
+};
+
+static const char * const f17h_de_mce_desc[] = {
+	"uop cache tag parity error",
+	"uop cache data parity error",
+	"Insn buffer parity error",
+	"Insn dispatch queue parity error",
+	"Fetch address FIFO parity",
+	"Patch RAM data parity",
+	"Patch RAM sequencer parity",
+	"uop buffer parity"
+};
+
+static const char * const f17h_ex_mce_desc[] = {
+	"Watchdog timeout error",
+	"Phy register file parity",
+	"Flag register file parity",
+	"Immediate displacement register file parity",
+	"Address generator payload parity",
+	"EX payload parity",
+	"Checkpoint queue parity",
+	"Retire dispatch queue parity",
+};
+
+static const char * const f17h_fp_mce_desc[] = {
+	"Physical register file parity",
+	"Freelist parity error",
+	"Schedule queue parity",
+	"NSQ parity error",
+	"Retire queue parity",
+	"Status register file parity",
+};
+
+static const char * const f17h_l3_mce_desc[] = {
+	"Shadow tag macro ECC error",
+	"Shadow tag macro multi-way-hit error",
+	"L3M tag ECC error",
+	"L3M tag multi-way-hit error",
+	"L3M data ECC error",
+	"XI parity, L3 fill done channel error",
+	"L3 victim queue parity",
+	"L3 HW assert",
+};
+
+static const char * const f17h_cs_mce_desc[] = {
+	"Illegal request from transport layer",
+	"Address violation",
+	"Security violation",
+	"Illegal response from transport layer",
+	"Unexpected response",
+	"Parity error on incoming request or probe response data",
+	"Parity error on incoming read response data",
+	"Atomic request parity",
+	"ECC error on probe filter access",
+};
+
+static const char * const f17h_pie_mce_desc[] = {
+	"HW assert",
+	"Internal PIE register security violation",
+	"Error on GMI link",
+	"Poison data written to internal PIE register",
+};
+
+static const char * const f17h_umc_mce_desc[] = {
+	"DRAM ECC error",
+	"Data poison error on DRAM",
+	"SDP parity error",
+	"Advanced peripheral bus error",
+	"Command/address parity error",
+	"Write data CRC error",
+};
+
+static const char * const f17h_pb_mce_desc[] = {
+	"Parameter Block RAM ECC error",
+};
+
+static const char * const f17h_psp_mce_desc[] = {
+	"PSP RAM ECC or parity error",
+};
+
+static const char * const f17h_smu_mce_desc[] = {
+	"SMU RAM ECC or parity error",
+};
+
 static bool f12h_mc0_mce(u16 ec, u8 xec)
 {
 	bool ret = false;
@@ -691,6 +820,177 @@
 	pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
 }
 
+static void decode_f17h_core_errors(const char *ip_name, u8 xec,
+				   unsigned int mca_type)
+{
+	const char * const *error_desc_array;
+	size_t len;
+
+	pr_emerg(HW_ERR "%s Error: ", ip_name);
+
+	switch (mca_type) {
+	case SMCA_LS:
+		error_desc_array = f17h_ls_mce_desc;
+		len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
+
+		if (xec == 0x4) {
+			pr_cont("Unrecognized LS MCA error code.\n");
+			return;
+		}
+		break;
+
+	case SMCA_IF:
+		error_desc_array = f17h_if_mce_desc;
+		len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
+		break;
+
+	case SMCA_L2_CACHE:
+		error_desc_array = f17h_l2_mce_desc;
+		len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
+		break;
+
+	case SMCA_DE:
+		error_desc_array = f17h_de_mce_desc;
+		len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
+		break;
+
+	case SMCA_EX:
+		error_desc_array = f17h_ex_mce_desc;
+		len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
+		break;
+
+	case SMCA_FP:
+		error_desc_array = f17h_fp_mce_desc;
+		len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
+		break;
+
+	case SMCA_L3_CACHE:
+		error_desc_array = f17h_l3_mce_desc;
+		len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
+		break;
+
+	default:
+		pr_cont("Corrupted MCA core error info.\n");
+		return;
+	}
+
+	if (xec > len) {
+		pr_cont("Unrecognized %s MCA bank error code.\n",
+			 amd_core_mcablock_names[mca_type]);
+		return;
+	}
+
+	pr_cont("%s.\n", error_desc_array[xec]);
+}
+
+static void decode_df_errors(u8 xec, unsigned int mca_type)
+{
+	const char * const *error_desc_array;
+	size_t len;
+
+	pr_emerg(HW_ERR "Data Fabric Error: ");
+
+	switch (mca_type) {
+	case  SMCA_CS:
+		error_desc_array = f17h_cs_mce_desc;
+		len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
+		break;
+
+	case SMCA_PIE:
+		error_desc_array = f17h_pie_mce_desc;
+		len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
+		break;
+
+	default:
+		pr_cont("Corrupted MCA Data Fabric info.\n");
+		return;
+	}
+
+	if (xec > len) {
+		pr_cont("Unrecognized %s MCA bank error code.\n",
+			 amd_df_mcablock_names[mca_type]);
+		return;
+	}
+
+	pr_cont("%s.\n", error_desc_array[xec]);
+}
+
+/* Decode errors according to Scalable MCA specification */
+static void decode_smca_errors(struct mce *m)
+{
+	u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
+	unsigned int hwid, mca_type, i;
+	u8 xec = XEC(m->status, xec_mask);
+	const char * const *error_desc_array;
+	const char *ip_name;
+	u32 low, high;
+	size_t len;
+
+	if (rdmsr_safe(addr, &low, &high)) {
+		pr_emerg("Invalid IP block specified, error information is unreliable.\n");
+		return;
+	}
+
+	hwid = high & MCI_IPID_HWID;
+	mca_type = (high & MCI_IPID_MCATYPE) >> 16;
+
+	pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
+
+	/*
+	 * Based on hwid and mca_type values, decode errors from respective IPs.
+	 * Note: mca_type values make sense only in the context of an hwid.
+	 */
+	for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
+		if (amd_hwids[i].hwid == hwid)
+			break;
+
+	switch (i) {
+	case SMCA_F17H_CORE:
+		ip_name = (mca_type == SMCA_L3_CACHE) ?
+			  "L3 Cache" : "F17h Core";
+		return decode_f17h_core_errors(ip_name, xec, mca_type);
+		break;
+
+	case SMCA_DF:
+		return decode_df_errors(xec, mca_type);
+		break;
+
+	case SMCA_UMC:
+		error_desc_array = f17h_umc_mce_desc;
+		len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
+		break;
+
+	case SMCA_PB:
+		error_desc_array = f17h_pb_mce_desc;
+		len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
+		break;
+
+	case SMCA_PSP:
+		error_desc_array = f17h_psp_mce_desc;
+		len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
+		break;
+
+	case SMCA_SMU:
+		error_desc_array = f17h_smu_mce_desc;
+		len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
+		break;
+
+	default:
+		pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
+		return;
+	}
+
+	ip_name = amd_hwids[i].name;
+	pr_emerg(HW_ERR "%s Error: ", ip_name);
+
+	if (xec > len) {
+		pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
+		return;
+	}
+
+	pr_cont("%s.\n", error_desc_array[xec]);
+}
+
 static inline void amd_decode_err_code(u16 ec)
 {
 	if (INT_ERROR(ec)) {
@@ -752,6 +1052,7 @@
 	struct mce *m = (struct mce *)data;
 	struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
 	int ecc;
+	u32 ebx = cpuid_ebx(0x80000007);
 
 	if (amd_filter_mce(m))
 		return NOTIFY_STOP;
@@ -769,11 +1070,20 @@
 		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
 		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
 
-	if (c->x86 == 0x15 || c->x86 == 0x16)
+	if (c->x86 >= 0x15)
 		pr_cont("|%s|%s",
 			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
 			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
 
+	if (!!(ebx & BIT(3))) {
+		u32 low, high;
+		u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+
+		if (!rdmsr_safe(addr, &low, &high) &&
+		    (low & MCI_CONFIG_MCAX))
+			pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
+	}
+
 	/* do the two bits[14:13] together */
 	ecc = (m->status >> 45) & 0x3;
 	if (ecc)
@@ -784,6 +1094,11 @@
 	if (m->status & MCI_STATUS_ADDRV)
 		pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
 
+	if (!!(ebx & BIT(3))) {
+		decode_smca_errors(m);
+		goto err_code;
+	}
+
 	if (!fam_ops)
 		goto err_code;
 
@@ -834,6 +1149,7 @@
 static int __init mce_amd_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u32 ebx;
 
 	if (c->x86_vendor != X86_VENDOR_AMD)
 		return -ENODEV;
@@ -888,10 +1204,18 @@
 		fam_ops->mc2_mce = f16h_mc2_mce;
 		break;
 
+	case 0x17:
+		ebx = cpuid_ebx(0x80000007);
+		xec_mask = 0x3f;
+		if (!(ebx & BIT(3))) {
+			printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
+			goto err_out;
+		}
+		break;
+
 	default:
 		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
-		kfree(fam_ops);
-		fam_ops = NULL;
+		goto err_out;
 	}
 
 	pr_info("MCE: In-kernel MCE decoding enabled.\n");
@@ -899,6 +1223,11 @@
 	mce_register_decode_chain(&amd_mce_dec_nb);
 
 	return 0;
+
+err_out:
+	kfree(fam_ops);
+	fam_ops = NULL;
+	return -EINVAL;
 }
 early_initcall(mce_amd_init);
 

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index e438ee5..93f0d41 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c

@@ -1574,7 +1574,7 @@
 				for (cha = 0; cha < KNL_MAX_CHAS; cha++) {
 					if (knl_get_mc_route(target,
 						mc_route_reg[cha]) == channel
-						&& participants[channel]) {
+						&& !participants[channel]) {
 						participant_count++;
 						participants[channel] = 1;
 						break;
@@ -1839,8 +1839,8 @@
 		edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
 			 n_tads, gb, (mb*1000)/1024,
 			 ((u64)tmp_mb) << 20L,
-			 (u32)TAD_SOCK(reg),
-			 (u32)TAD_CH(reg),
+			 (u32)(1 << TAD_SOCK(reg)),
+			 (u32)TAD_CH(reg) + 1,
 			 (u32)TAD_TGT0(reg),
 			 (u32)TAD_TGT1(reg),
 			 (u32)TAD_TGT2(reg),
@@ -2118,7 +2118,7 @@
 	}
 
 	ch_way = TAD_CH(reg) + 1;
-	sck_way = TAD_SOCK(reg) + 1;
+	sck_way = 1 << TAD_SOCK(reg);
 
 	if (ch_way == 3)
 		idx = addr >> 6;
@@ -2175,7 +2175,7 @@
 		 n_tads,
 		 addr,
 		 limit,
-		 (u32)TAD_SOCK(reg),
+		 sck_way,
 		 ch_way,
 		 offset,
 		 idx,
@@ -2190,18 +2190,12 @@
 			offset, addr);
 		return -EINVAL;
 	}
-	addr -= offset;
-	/* Store the low bits [0:6] of the addr */
-	ch_addr = addr & 0x7f;
-	/* Remove socket wayness and remove 6 bits */
-	addr >>= 6;
-	addr = div_u64(addr, sck_xch);
-#if 0
-	/* Divide by channel way */
-	addr = addr / ch_way;
-#endif
-	/* Recover the last 6 bits */
-	ch_addr |= addr << 6;
+
+	ch_addr = addr - offset;
+	ch_addr >>= (6 + shiftup);
+	ch_addr /= ch_way * sck_way;
+	ch_addr <<= (6 + shiftup);
+	ch_addr |= addr & ((1 << (6 + shiftup)) - 1);
 
 	/*
 	 * Step 3) Decode rank

diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index cf41440..d9ab0cd 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c

@@ -196,6 +196,44 @@
 	return 0;
 }
 
+static void gpio_rcar_irq_bus_lock(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+	pm_runtime_get_sync(&p->pdev->dev);
+}
+
+static void gpio_rcar_irq_bus_sync_unlock(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+	pm_runtime_put(&p->pdev->dev);
+}
+
+
+static int gpio_rcar_irq_request_resources(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+	int error;
+
+	error = pm_runtime_get_sync(&p->pdev->dev);
+	if (error < 0)
+		return error;
+
+	return 0;
+}
+
+static void gpio_rcar_irq_release_resources(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+	struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+	pm_runtime_put(&p->pdev->dev);
+}
+
 static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id)
 {
 	struct gpio_rcar_priv *p = dev_id;
@@ -450,6 +488,10 @@
 	irq_chip->irq_unmask = gpio_rcar_irq_enable;
 	irq_chip->irq_set_type = gpio_rcar_irq_set_type;
 	irq_chip->irq_set_wake = gpio_rcar_irq_set_wake;
+	irq_chip->irq_bus_lock = gpio_rcar_irq_bus_lock;
+	irq_chip->irq_bus_sync_unlock = gpio_rcar_irq_bus_sync_unlock;
+	irq_chip->irq_request_resources = gpio_rcar_irq_request_resources;
+	irq_chip->irq_release_resources = gpio_rcar_irq_release_resources;
 	irq_chip->flags	= IRQCHIP_SET_TYPE_MASKED | IRQCHIP_MASK_ON_SUSPEND;
 
 	ret = gpiochip_add_data(gpio_chip, p);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 89c3dd6..119cdc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c

@@ -77,7 +77,7 @@
 			} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
 				/* Don't try to start link training before we
 				 * have the dpcd */
-				if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+				if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
 					return;
 
 				/* set it to OFF so that drm_helper_connector_dpms()

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index acd066d0..1846d65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c

@@ -72,8 +72,8 @@
 
 	struct drm_crtc *crtc = &amdgpuCrtc->base;
 	unsigned long flags;
-	unsigned i;
-	int vpos, hpos, stat, min_udelay;
+	unsigned i, repcnt = 4;
+	int vpos, hpos, stat, min_udelay = 0;
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
 	amdgpu_flip_wait_fence(adev, &work->excl);
@@ -96,7 +96,7 @@
 	 * In practice this won't execute very often unless on very fast
 	 * machines because the time window for this to happen is very small.
 	 */
-	for (;;) {
+	while (amdgpuCrtc->enabled && --repcnt) {
 		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
 		 * start in hpos, and to the "fudged earlier" vblank start in
 		 * vpos.
@@ -112,12 +112,24 @@
 			break;
 
 		/* Sleep at least until estimated real start of hw vblank */
-		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+		if (min_udelay > vblank->framedur_ns / 2000) {
+			/* Don't wait ridiculously long - something is wrong */
+			repcnt = 0;
+			break;
+		}
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		usleep_range(min_udelay, 2 * min_udelay);
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 	};
 
+	if (!repcnt)
+		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+				 "framedur %d, linedur %d, stat %d, vpos %d, "
+				 "hpos %d\n", work->crtc_id, min_udelay,
+				 vblank->framedur_ns / 1000,
+				 vblank->linedur_ns / 1000, stat, vpos, hpos);
+
 	/* do the flip (mmio) */
 	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
 	/* set the flip status */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7380f78..d20c2a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

@@ -596,7 +596,8 @@
 		break;
 	}
 	ttm_eu_backoff_reservation(&ticket, &list);
-	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
+	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
+	    !amdgpu_vm_debug)
 		amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
 
 	drm_gem_object_unreference_unlocked(gobj);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 7d8d84e..95a4a25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c

@@ -113,6 +113,10 @@
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return snprintf(buf, PAGE_SIZE, "off\n");
+
 	if (adev->pp_enabled) {
 		enum amd_dpm_forced_level level;
 
@@ -140,6 +144,11 @@
 	enum amdgpu_dpm_forced_level level;
 	int ret = 0;
 
+	/* Can't force performance level when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (strncmp("low", buf, strlen("low")) == 0) {
 		level = AMDGPU_DPM_FORCED_LEVEL_LOW;
 	} else if (strncmp("high", buf, strlen("high")) == 0) {
@@ -157,6 +166,7 @@
 		mutex_lock(&adev->pm.mutex);
 		if (adev->pm.dpm.thermal_active) {
 			count = -EINVAL;
+			mutex_unlock(&adev->pm.mutex);
 			goto fail;
 		}
 		ret = amdgpu_dpm_force_performance_level(adev, level);
@@ -167,8 +177,6 @@
 		mutex_unlock(&adev->pm.mutex);
 	}
 fail:
-	mutex_unlock(&adev->pm.mutex);
-
 	return count;
 }
 
@@ -182,8 +190,14 @@
 				      char *buf)
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
 	int temp;
 
+	/* Can't get temperature when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
 	if (!adev->pp_enabled && !adev->pm.funcs->get_temperature)
 		temp = 0;
 	else
@@ -634,11 +648,6 @@
 
 	/* update display watermarks based on new power state */
 	amdgpu_display_bandwidth_update(adev);
-	/* update displays */
-	amdgpu_dpm_display_configuration_changed(adev);
-
-	adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
-	adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
 
 	/* wait for the rings to drain */
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -655,6 +664,12 @@
 
 	amdgpu_dpm_post_set_power_state(adev);
 
+	/* update displays */
+	amdgpu_dpm_display_configuration_changed(adev);
+
+	adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
+	adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
+
 	if (adev->pm.funcs->force_performance_level) {
 		if (adev->pm.dpm.thermal_active) {
 			enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level;
@@ -847,12 +862,16 @@
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
+	struct drm_device *ddev = adev->ddev;
 
 	if (!adev->pm.dpm_enabled) {
 		seq_printf(m, "dpm not enabled\n");
 		return 0;
 	}
-	if (adev->pp_enabled) {
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
+		seq_printf(m, "PX asic powered off\n");
+	} else if (adev->pp_enabled) {
 		amdgpu_dpm_debugfs_print_current_performance_level(adev, m);
 	} else {
 		mutex_lock(&adev->pm.mutex);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index b9d0d55..3cb6d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c

@@ -143,8 +143,10 @@
 					adev->powerplay.pp_handle);
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled)
+	if (adev->pp_enabled) {
 		amdgpu_pm_sysfs_init(adev);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL);
+	}
 #endif
 	return ret;
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 21aacc1..bf731e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c

@@ -265,15 +265,27 @@
 	unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
 	unsigned lane_num, i, max_pix_clock;
 
-	for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
-		for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
-			max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+	if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+	    ENCODER_OBJECT_ID_NUTMEG) {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			max_pix_clock = (lane_num * 270000 * 8) / bpp;
 			if (max_pix_clock >= pix_clock) {
 				*dp_lanes = lane_num;
-				*dp_rate = link_rates[i];
+				*dp_rate = 270000;
 				return 0;
 			}
 		}
+	} else {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+				max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+				if (max_pix_clock >= pix_clock) {
+					*dp_lanes = lane_num;
+					*dp_rate = link_rates[i];
+					return 0;
+				}
+			}
+		}
 	}
 
 	return -EINVAL;

diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 9056355..e7ef226 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c

@@ -2202,8 +2202,7 @@
 							    AMD_PG_STATE_GATE);
 
 				cz_enable_vce_dpm(adev, false);
-				/* TODO: to figure out why vce can't be poweroff. */
-				/* cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF); */
+				cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF);
 				pi->vce_power_gated = true;
 			} else {
 				cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerON);
@@ -2226,10 +2225,8 @@
 		}
 	} else { /*pi->caps_vce_pg*/
 		cz_update_vce_dpm(adev);
-		cz_enable_vce_dpm(adev, true);
+		cz_enable_vce_dpm(adev, !gate);
 	}
-
-	return;
 }
 
 const struct amd_ip_funcs cz_dpm_ip_funcs = {

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7732059..06602df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

@@ -3628,6 +3628,19 @@
 					unsigned vm_id, uint64_t pd_addr)
 {
 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
+				 WAIT_REG_MEM_ENGINE(usepfp)));   /* pfp or me */
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, 4); /* poll interval */
+
 	if (usepfp) {
 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8f8ec37..7086ac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

@@ -4809,7 +4809,8 @@
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
-		 WAIT_REG_MEM_FUNCTION(3))); /* equal */
+				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
+				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
 	amdgpu_ring_write(ring, addr & 0xfffffffc);
 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
 	amdgpu_ring_write(ring, seq);
@@ -4995,7 +4996,7 @@
 	case AMDGPU_IRQ_STATE_ENABLE:
 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-					    PRIV_REG_INT_ENABLE, 0);
+					    PRIV_REG_INT_ENABLE, 1);
 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
 		break;
 	default:

diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index aa67244..589599f 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c

@@ -402,8 +402,11 @@
 
 		data.requested_ui_label = power_state_convert(ps);
 		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+		break;
 	}
-	break;
+	case AMD_PP_EVENT_COMPLETE_INIT:
+		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+		break;
 	default:
 		break;
 	}

diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
index 83be3cf..6b52c78 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c

@@ -165,6 +165,7 @@
 };
 
 static const pem_event_action *complete_init_event[] = {
+	unblock_adjust_power_state_tasks,
 	adjust_power_state_tasks,
 	enable_gfx_clock_gating_tasks,
 	enable_gfx_voltage_island_power_gating_tasks,

diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
index 52a3efc..46410e3 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c

@@ -31,7 +31,7 @@
 static int pem_init(struct pp_eventmgr *eventmgr)
 {
 	int result = 0;
-	struct pem_event_data event_data;
+	struct pem_event_data event_data = { {0} };
 
 	/* Initialize PowerPlay feature info */
 	pem_init_feature_info(eventmgr);
@@ -52,7 +52,7 @@
 
 static void pem_fini(struct pp_eventmgr *eventmgr)
 {
-	struct pem_event_data event_data;
+	struct pem_event_data event_data = { {0} };
 
 	pem_uninit_featureInfo(eventmgr);
 	pem_unregister_interrupts(eventmgr);

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
index ad77008..ff08ce4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c

@@ -226,7 +226,7 @@
 		}
 	} else {
 		cz_dpm_update_vce_dpm(hwmgr);
-		cz_enable_disable_vce_dpm(hwmgr, true);
+		cz_enable_disable_vce_dpm(hwmgr, !bgate);
 		return 0;
 	}
 

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 9759009..b1480ac 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c

@@ -227,7 +227,7 @@
 	} while (ast_read32(ast, 0x10000) != 0x01);
 	data = ast_read32(ast, 0x10004);
 
-	if (data & 0x400)
+	if (data & 0x40)
 		ast->dram_bus_width = 16;
 	else
 		ast->dram_bus_width = 32;

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index e5df53b..1f500a1 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c

@@ -109,8 +109,8 @@
 	if (IS_ERR(cma_obj))
 		return cma_obj;
 
-	cma_obj->vaddr = dma_alloc_writecombine(drm->dev, size,
-			&cma_obj->paddr, GFP_KERNEL | __GFP_NOWARN);
+	cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
+				      GFP_KERNEL | __GFP_NOWARN);
 	if (!cma_obj->vaddr) {
 		dev_err(drm->dev, "failed to allocate buffer with size %zu\n",
 			size);
@@ -192,8 +192,8 @@
 	cma_obj = to_drm_gem_cma_obj(gem_obj);
 
 	if (cma_obj->vaddr) {
-		dma_free_writecombine(gem_obj->dev->dev, cma_obj->base.size,
-				      cma_obj->vaddr, cma_obj->paddr);
+		dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
+			    cma_obj->vaddr, cma_obj->paddr);
 	} else if (gem_obj->import_attach) {
 		drm_prime_gem_destroy(gem_obj, cma_obj->sgt);
 	}
@@ -324,9 +324,8 @@
 	vma->vm_flags &= ~VM_PFNMAP;
 	vma->vm_pgoff = 0;
 
-	ret = dma_mmap_writecombine(cma_obj->base.dev->dev, vma,
-				    cma_obj->vaddr, cma_obj->paddr,
-				    vma->vm_end - vma->vm_start);
+	ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr,
+			  cma_obj->paddr, vma->vm_end - vma->vm_start);
 	if (ret)
 		drm_gem_vm_close(vma);
 

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index a33162c..3c1ce44 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c

@@ -1113,8 +1113,8 @@
 	if (!cmdbuf)
 		return NULL;
 
-	cmdbuf->vaddr = dma_alloc_writecombine(gpu->dev, size, &cmdbuf->paddr,
-					       GFP_KERNEL);
+	cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
+				     GFP_KERNEL);
 	if (!cmdbuf->vaddr) {
 		kfree(cmdbuf);
 		return NULL;
@@ -1128,8 +1128,8 @@
 
 void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 {
-	dma_free_writecombine(cmdbuf->gpu->dev, cmdbuf->size,
-			      cmdbuf->vaddr, cmdbuf->paddr);
+	dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
+		    cmdbuf->paddr);
 	kfree(cmdbuf);
 }
 

diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 34e3874..f8ee740 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c

@@ -1382,8 +1382,16 @@
 	drm_connector_cleanup(connector);
 }
 
+static int tda998x_connector_dpms(struct drm_connector *connector, int mode)
+{
+	if (drm_core_check_feature(connector->dev, DRIVER_ATOMIC))
+		return drm_atomic_helper_connector_dpms(connector, mode);
+	else
+		return drm_helper_connector_dpms(connector, mode);
+}
+
 static const struct drm_connector_funcs tda998x_connector_funcs = {
-	.dpms = drm_atomic_helper_connector_dpms,
+	.dpms = tda998x_connector_dpms,
 	.reset = drm_atomic_helper_connector_reset,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = tda998x_connector_detect,

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0fc38bb..cf39ed3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c

@@ -825,8 +825,11 @@
 		}
 
 		for_each_pipe(dev_priv, pipe) {
-			if (!intel_display_power_is_enabled(dev_priv,
-						POWER_DOMAIN_PIPE(pipe))) {
+			enum intel_display_power_domain power_domain;
+
+			power_domain = POWER_DOMAIN_PIPE(pipe);
+			if (!intel_display_power_get_if_enabled(dev_priv,
+								power_domain)) {
 				seq_printf(m, "Pipe %c power disabled\n",
 					   pipe_name(pipe));
 				continue;
@@ -840,6 +843,8 @@
 			seq_printf(m, "Pipe %c IER:\t%08x\n",
 				   pipe_name(pipe),
 				   I915_READ(GEN8_DE_PIPE_IER(pipe)));
+
+			intel_display_power_put(dev_priv, power_domain);
 		}
 
 		seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
@@ -3985,6 +3990,7 @@
 	struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe];
 	struct intel_crtc *crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev,
 									pipe));
+	enum intel_display_power_domain power_domain;
 	u32 val = 0; /* shut up gcc */
 	int ret;
 
@@ -3995,7 +4001,8 @@
 	if (pipe_crc->source && source)
 		return -EINVAL;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) {
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) {
 		DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n");
 		return -EIO;
 	}
@@ -4012,7 +4019,7 @@
 		ret = ivb_pipe_crc_ctl_reg(dev, pipe, &source, &val);
 
 	if (ret != 0)
-		return ret;
+		goto out;
 
 	/* none -> real source transition */
 	if (source) {
@@ -4024,8 +4031,10 @@
 		entries = kcalloc(INTEL_PIPE_CRC_ENTRIES_NR,
 				  sizeof(pipe_crc->entries[0]),
 				  GFP_KERNEL);
-		if (!entries)
-			return -ENOMEM;
+		if (!entries) {
+			ret = -ENOMEM;
+			goto out;
+		}
 
 		/*
 		 * When IPS gets enabled, the pipe CRC changes. Since IPS gets
@@ -4081,7 +4090,12 @@
 		hsw_enable_ips(crtc);
 	}
 
-	return 0;
+	ret = 0;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 /*

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e7cd311..b0847b9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h

@@ -751,6 +751,7 @@
 	uint32_t mmio_count;
 	i915_reg_t mmioaddr[8];
 	uint32_t mmiodata[8];
+	uint32_t dc_state;
 };
 
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \

diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 31f6d21..30f9214 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c

@@ -527,6 +527,8 @@
 
 	mutex_lock(&dev_priv->av_mutex);
 	intel_dig_port->audio_connector = connector;
+	/* referred in audio callbacks */
+	dev_priv->dig_port_map[port] = intel_encoder;
 	mutex_unlock(&dev_priv->av_mutex);
 
 	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
@@ -554,6 +556,7 @@
 
 	mutex_lock(&dev_priv->av_mutex);
 	intel_dig_port->audio_connector = NULL;
+	dev_priv->dig_port_map[port] = NULL;
 	mutex_unlock(&dev_priv->av_mutex);
 
 	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)

diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 9c89df1..a7b4a524 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c

@@ -71,22 +71,29 @@
 	struct intel_crt *crt = intel_encoder_to_crt(encoder);
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(crt->adpa_reg);
 
 	if (!(tmp & ADPA_DAC_ENABLE))
-		return false;
+		goto out;
 
 	if (HAS_PCH_CPT(dev))
 		*pipe = PORT_TO_PIPE_CPT(tmp);
 	else
 		*pipe = PORT_TO_PIPE(tmp);
 
-	return true;
+	ret = true;
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static unsigned int intel_crt_get_flags(struct intel_encoder *encoder)

diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 9bb63a8..647d85e 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c

@@ -240,6 +240,8 @@
 		I915_WRITE(dev_priv->csr.mmioaddr[i],
 			   dev_priv->csr.mmiodata[i]);
 	}
+
+	dev_priv->csr.dc_state = 0;
 }
 
 static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 54a165b..084d558 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c

@@ -1969,13 +1969,16 @@
 	enum transcoder cpu_transcoder;
 	enum intel_display_power_domain power_domain;
 	uint32_t tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(intel_encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
-	if (!intel_encoder->get_hw_state(intel_encoder, &pipe))
-		return false;
+	if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) {
+		ret = false;
+		goto out;
+	}
 
 	if (port == PORT_A)
 		cpu_transcoder = TRANSCODER_EDP;
@@ -1987,23 +1990,33 @@
 	switch (tmp & TRANS_DDI_MODE_SELECT_MASK) {
 	case TRANS_DDI_MODE_SELECT_HDMI:
 	case TRANS_DDI_MODE_SELECT_DVI:
-		return (type == DRM_MODE_CONNECTOR_HDMIA);
+		ret = type == DRM_MODE_CONNECTOR_HDMIA;
+		break;
 
 	case TRANS_DDI_MODE_SELECT_DP_SST:
-		if (type == DRM_MODE_CONNECTOR_eDP)
-			return true;
-		return (type == DRM_MODE_CONNECTOR_DisplayPort);
+		ret = type == DRM_MODE_CONNECTOR_eDP ||
+		      type == DRM_MODE_CONNECTOR_DisplayPort;
+		break;
+
 	case TRANS_DDI_MODE_SELECT_DP_MST:
 		/* if the transcoder is in MST state then
 		 * connector isn't connected */
-		return false;
+		ret = false;
+		break;
 
 	case TRANS_DDI_MODE_SELECT_FDI:
-		return (type == DRM_MODE_CONNECTOR_VGA);
+		ret = type == DRM_MODE_CONNECTOR_VGA;
+		break;
 
 	default:
-		return false;
+		ret = false;
+		break;
 	}
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
@@ -2015,15 +2028,18 @@
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
 	int i;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(DDI_BUF_CTL(port));
 
 	if (!(tmp & DDI_BUF_CTL_ENABLE))
-		return false;
+		goto out;
 
 	if (port == PORT_A) {
 		tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
@@ -2041,25 +2057,32 @@
 			break;
 		}
 
-		return true;
-	} else {
-		for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
-			tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+		ret = true;
 
-			if ((tmp & TRANS_DDI_PORT_MASK)
-			    == TRANS_DDI_SELECT_PORT(port)) {
-				if ((tmp & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_DP_MST)
-					return false;
+		goto out;
+	}
 
-				*pipe = i;
-				return true;
-			}
+	for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
+		tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+
+		if ((tmp & TRANS_DDI_PORT_MASK) == TRANS_DDI_SELECT_PORT(port)) {
+			if ((tmp & TRANS_DDI_MODE_SELECT_MASK) ==
+			    TRANS_DDI_MODE_SELECT_DP_MST)
+				goto out;
+
+			*pipe = i;
+			ret = true;
+
+			goto out;
 		}
 	}
 
 	DRM_DEBUG_KMS("No pipe for ddi port %c found\n", port_name(port));
 
-	return false;
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc)
@@ -2508,12 +2531,14 @@
 {
 	uint32_t val;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
 	val = I915_READ(WRPLL_CTL(pll->id));
 	hw_state->wrpll = val;
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
 	return val & WRPLL_PLL_ENABLE;
 }
 
@@ -2523,12 +2548,14 @@
 {
 	uint32_t val;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
 	val = I915_READ(SPLL_CTL);
 	hw_state->spll = val;
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
 	return val & SPLL_PLL_ENABLE;
 }
 
@@ -2645,16 +2672,19 @@
 	uint32_t val;
 	unsigned int dpll;
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
+	ret = false;
+
 	/* DPLL0 is not part of the shared DPLLs, so pll->id is 0 for DPLL1 */
 	dpll = pll->id + 1;
 
 	val = I915_READ(regs[pll->id].ctl);
 	if (!(val & LCPLL_PLL_ENABLE))
-		return false;
+		goto out;
 
 	val = I915_READ(DPLL_CTRL1);
 	hw_state->ctrl1 = (val >> (dpll * 6)) & 0x3f;
@@ -2664,8 +2694,12 @@
 		hw_state->cfgcr1 = I915_READ(regs[pll->id].cfgcr1);
 		hw_state->cfgcr2 = I915_READ(regs[pll->id].cfgcr2);
 	}
+	ret = true;
 
-	return true;
+out:
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
+	return ret;
 }
 
 static void skl_shared_dplls_init(struct drm_i915_private *dev_priv)
@@ -2932,13 +2966,16 @@
 {
 	enum port port = (enum port)pll->id;	/* 1:1 port->PLL mapping */
 	uint32_t val;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
+	ret = false;
+
 	val = I915_READ(BXT_PORT_PLL_ENABLE(port));
 	if (!(val & PORT_PLL_ENABLE))
-		return false;
+		goto out;
 
 	hw_state->ebb0 = I915_READ(BXT_PORT_PLL_EBB_0(port));
 	hw_state->ebb0 &= PORT_PLL_P1_MASK | PORT_PLL_P2_MASK;
@@ -2985,7 +3022,12 @@
 				 I915_READ(BXT_PORT_PCS_DW12_LN23(port)));
 	hw_state->pcsdw12 &= LANE_STAGGER_MASK | LANESTAGGER_STRAP_OVRD;
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
+	return ret;
 }
 
 static void bxt_shared_dplls_init(struct drm_i915_private *dev_priv)
@@ -3120,11 +3162,15 @@
 {
 	u32 temp;
 
-	if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
+	if (intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
 		temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
+
+		intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
+
 		if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe))
 			return true;
 	}
+
 	return false;
 }
 
@@ -3312,7 +3358,6 @@
 	intel_encoder->get_config = intel_ddi_get_config;
 
 	intel_dig_port->port = port;
-	dev_priv->dig_port_map[port] = intel_encoder;
 	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
 					  (DDI_BUF_PORT_REVERSAL |
 					   DDI_A_4_LANES);

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5feb657..46947ff 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c

@@ -1351,18 +1351,21 @@
 	bool cur_state;
 	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
 								      pipe);
+	enum intel_display_power_domain power_domain;
 
 	/* if we need the pipe quirk it must be always on */
 	if ((pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) ||
 	    (pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE))
 		state = true;
 
-	if (!intel_display_power_is_enabled(dev_priv,
-				POWER_DOMAIN_TRANSCODER(cpu_transcoder))) {
-		cur_state = false;
-	} else {
+	power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+	if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
 		u32 val = I915_READ(PIPECONF(cpu_transcoder));
 		cur_state = !!(val & PIPECONF_ENABLE);
+
+		intel_display_power_put(dev_priv, power_domain);
+	} else {
+		cur_state = false;
 	}
 
 	I915_STATE_WARN(cur_state != state,
@@ -8171,18 +8174,22 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum intel_display_power_domain power_domain;
 	uint32_t tmp;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv,
-					    POWER_DOMAIN_PIPE(crtc->pipe)))
+	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
+	ret = false;
+
 	tmp = I915_READ(PIPECONF(crtc->pipe));
 	if (!(tmp & PIPECONF_ENABLE))
-		return false;
+		goto out;
 
 	if (IS_G4X(dev) || IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
 		switch (tmp & PIPECONF_BPC_MASK) {
@@ -8262,7 +8269,12 @@
 	pipe_config->base.adjusted_mode.crtc_clock =
 		pipe_config->port_clock / pipe_config->pixel_multiplier;
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void ironlake_init_pch_refclk(struct drm_device *dev)
@@ -9366,18 +9378,21 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum intel_display_power_domain power_domain;
 	uint32_t tmp;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv,
-					    POWER_DOMAIN_PIPE(crtc->pipe)))
+	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
+	ret = false;
 	tmp = I915_READ(PIPECONF(crtc->pipe));
 	if (!(tmp & PIPECONF_ENABLE))
-		return false;
+		goto out;
 
 	switch (tmp & PIPECONF_BPC_MASK) {
 	case PIPECONF_6BPC:
@@ -9440,7 +9455,12 @@
 
 	ironlake_get_pfit_config(crtc, pipe_config);
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
@@ -9950,12 +9970,17 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	enum intel_display_power_domain pfit_domain;
+	enum intel_display_power_domain power_domain;
+	unsigned long power_domain_mask;
 	uint32_t tmp;
+	bool ret;
 
-	if (!intel_display_power_is_enabled(dev_priv,
-					 POWER_DOMAIN_PIPE(crtc->pipe)))
+	power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
+	power_domain_mask = BIT(power_domain);
+
+	ret = false;
 
 	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
@@ -9982,13 +10007,14 @@
 			pipe_config->cpu_transcoder = TRANSCODER_EDP;
 	}
 
-	if (!intel_display_power_is_enabled(dev_priv,
-			POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder)))
-		return false;
+	power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		goto out;
+	power_domain_mask |= BIT(power_domain);
 
 	tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder));
 	if (!(tmp & PIPECONF_ENABLE))
-		return false;
+		goto out;
 
 	haswell_get_ddi_port_state(crtc, pipe_config);
 
@@ -9998,14 +10024,14 @@
 		skl_init_scalers(dev, crtc, pipe_config);
 	}
 
-	pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
-
 	if (INTEL_INFO(dev)->gen >= 9) {
 		pipe_config->scaler_state.scaler_id = -1;
 		pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
 	}
 
-	if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
+	power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
+	if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+		power_domain_mask |= BIT(power_domain);
 		if (INTEL_INFO(dev)->gen >= 9)
 			skylake_get_pfit_config(crtc, pipe_config);
 		else
@@ -10023,7 +10049,13 @@
 		pipe_config->pixel_multiplier = 1;
 	}
 
-	return true;
+	ret = true;
+
+out:
+	for_each_power_domain(power_domain, power_domain_mask)
+		intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void i845_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
@@ -13630,7 +13662,7 @@
 {
 	uint32_t val;
 
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
 	val = I915_READ(PCH_DPLL(pll->id));
@@ -13638,6 +13670,8 @@
 	hw_state->fp0 = I915_READ(PCH_FP0(pll->id));
 	hw_state->fp1 = I915_READ(PCH_FP1(pll->id));
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
 	return val & DPLL_VCO_ENABLE;
 }
 
@@ -15568,10 +15602,12 @@
 	 * level, just check if the power well is enabled instead of trying to
 	 * follow the "don't touch the power well if we don't need it" policy
 	 * the rest of the driver uses. */
-	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_VGA))
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_VGA))
 		return;
 
 	i915_redisable_vga_power_on(dev);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
 }
 
 static bool primary_get_hw_state(struct intel_plane *plane)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 1bbd67b..cdc2c15 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c

@@ -2362,15 +2362,18 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(intel_dp->output_reg);
 
 	if (!(tmp & DP_PORT_EN))
-		return false;
+		goto out;
 
 	if (IS_GEN7(dev) && port == PORT_A) {
 		*pipe = PORT_TO_PIPE_CPT(tmp);
@@ -2381,7 +2384,9 @@
 			u32 trans_dp = I915_READ(TRANS_DP_CTL(p));
 			if (TRANS_DP_PIPE_TO_PORT(trans_dp) == port) {
 				*pipe = p;
-				return true;
+				ret = true;
+
+				goto out;
 			}
 		}
 
@@ -2393,7 +2398,12 @@
 		*pipe = PORT_TO_PIPE(tmp);
 	}
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void intel_dp_get_config(struct intel_encoder *encoder,
@@ -6035,7 +6045,6 @@
 	}
 
 	intel_dig_port->port = port;
-	dev_priv->dig_port_map[port] = intel_encoder;
 	intel_dig_port->dp.output_reg = output_reg;
 
 	intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;

diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ea54158..df7f3cb 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h

@@ -1428,6 +1428,8 @@
 				      enum intel_display_power_domain domain);
 void intel_display_power_get(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
+bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+					enum intel_display_power_domain domain);
 void intel_display_power_put(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
 
@@ -1514,6 +1516,7 @@
 	enable_rpm_wakeref_asserts(dev_priv)
 
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
 

diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 44742fa..0193c62a 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c

@@ -664,13 +664,16 @@
 	struct drm_device *dev = encoder->base.dev;
 	enum intel_display_power_domain power_domain;
 	enum port port;
+	bool ret;
 
 	DRM_DEBUG_KMS("\n");
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	/* XXX: this only works for one DSI output */
 	for_each_dsi_port(port, intel_dsi->ports) {
 		i915_reg_t ctrl_reg = IS_BROXTON(dev) ?
@@ -691,12 +694,16 @@
 		if (dpi_enabled || (func & CMD_MODE_DATA_WIDTH_MASK)) {
 			if (I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY) {
 				*pipe = port == PORT_A ? PIPE_A : PIPE_B;
-				return true;
+				ret = true;
+
+				goto out;
 			}
 		}
 	}
+out:
+	intel_display_power_put(dev_priv, power_domain);
 
-	return false;
+	return ret;
 }
 
 static void intel_dsi_get_config(struct intel_encoder *encoder,

diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 4a77639..616108c 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c

@@ -880,15 +880,18 @@
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(intel_hdmi->hdmi_reg);
 
 	if (!(tmp & SDVO_ENABLE))
-		return false;
+		goto out;
 
 	if (HAS_PCH_CPT(dev))
 		*pipe = PORT_TO_PIPE_CPT(tmp);
@@ -897,7 +900,12 @@
 	else
 		*pipe = PORT_TO_PIPE(tmp);
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void intel_hdmi_get_config(struct intel_encoder *encoder,
@@ -2146,7 +2154,6 @@
 void intel_hdmi_init(struct drm_device *dev,
 		     i915_reg_t hdmi_reg, enum port port)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_digital_port *intel_dig_port;
 	struct intel_encoder *intel_encoder;
 	struct intel_connector *intel_connector;
@@ -2215,7 +2222,6 @@
 		intel_encoder->cloneable |= 1 << INTEL_OUTPUT_HDMI;
 
 	intel_dig_port->port = port;
-	dev_priv->dig_port_map[port] = intel_encoder;
 	intel_dig_port->hdmi.hdmi_reg = hdmi_reg;
 	intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 

diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index deb8282..52fbe53 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c

@@ -664,6 +664,12 @@
 
 		bus->adapter.algo = &gmbus_algorithm;
 
+		/*
+		 * We wish to retry with bit banging
+		 * after a timed out GMBUS attempt.
+		 */
+		bus->adapter.retries = 1;
+
 		/* By default use a conservative clock rate */
 		bus->reg0 = pin | GMBUS_RATE_100KHZ;
 

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 0da0240..bc04d8d 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c

@@ -75,22 +75,30 @@
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	enum intel_display_power_domain power_domain;
 	u32 tmp;
+	bool ret;
 
 	power_domain = intel_display_port_power_domain(encoder);
-	if (!intel_display_power_is_enabled(dev_priv, power_domain))
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 		return false;
 
+	ret = false;
+
 	tmp = I915_READ(lvds_encoder->reg);
 
 	if (!(tmp & LVDS_PORT_EN))
-		return false;
+		goto out;
 
 	if (HAS_PCH_CPT(dev))
 		*pipe = PORT_TO_PIPE_CPT(tmp);
 	else
 		*pipe = PORT_TO_PIPE(tmp);
 
-	return true;
+	ret = true;
+
+out:
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
 }
 
 static void intel_lvds_get_config(struct intel_encoder *encoder,

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a234687..b28c29f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c

@@ -2829,7 +2829,10 @@
 	memset(ddb, 0, sizeof(*ddb));
 
 	for_each_pipe(dev_priv, pipe) {
-		if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
+		enum intel_display_power_domain power_domain;
+
+		power_domain = POWER_DOMAIN_PIPE(pipe);
+		if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 			continue;
 
 		for_each_plane(dev_priv, pipe, plane) {
@@ -2841,6 +2844,8 @@
 		val = I915_READ(CUR_BUF_CFG(pipe));
 		skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
 					   val);
+
+		intel_display_power_put(dev_priv, power_domain);
 	}
 }
 

diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index ddbdbff..4f43d9b 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c

@@ -470,6 +470,43 @@
 	}
 }
 
+static void gen9_write_dc_state(struct drm_i915_private *dev_priv,
+				u32 state)
+{
+	int rewrites = 0;
+	int rereads = 0;
+	u32 v;
+
+	I915_WRITE(DC_STATE_EN, state);
+
+	/* It has been observed that disabling the dc6 state sometimes
+	 * doesn't stick and dmc keeps returning old value. Make sure
+	 * the write really sticks enough times and also force rewrite until
+	 * we are confident that state is exactly what we want.
+	 */
+	do  {
+		v = I915_READ(DC_STATE_EN);
+
+		if (v != state) {
+			I915_WRITE(DC_STATE_EN, state);
+			rewrites++;
+			rereads = 0;
+		} else if (rereads++ > 5) {
+			break;
+		}
+
+	} while (rewrites < 100);
+
+	if (v != state)
+		DRM_ERROR("Writing dc state to 0x%x failed, now 0x%x\n",
+			  state, v);
+
+	/* Most of the times we need one retry, avoid spam */
+	if (rewrites > 1)
+		DRM_DEBUG_KMS("Rewrote dc state to 0x%x %d times\n",
+			      state, rewrites);
+}
+
 static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
 {
 	uint32_t val;
@@ -494,10 +531,18 @@
 	val = I915_READ(DC_STATE_EN);
 	DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
 		      val & mask, state);
+
+	/* Check if DMC is ignoring our DC state requests */
+	if ((val & mask) != dev_priv->csr.dc_state)
+		DRM_ERROR("DC state mismatch (0x%x -> 0x%x)\n",
+			  dev_priv->csr.dc_state, val & mask);
+
 	val &= ~mask;
 	val |= state;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
+
+	gen9_write_dc_state(dev_priv, val);
+
+	dev_priv->csr.dc_state = val & mask;
 }
 
 void bxt_enable_dc9(struct drm_i915_private *dev_priv)
@@ -1442,6 +1487,22 @@
 	chv_set_pipe_power_well(dev_priv, power_well, false);
 }
 
+static void
+__intel_display_power_get_domain(struct drm_i915_private *dev_priv,
+				 enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *power_well;
+	int i;
+
+	for_each_power_well(i, power_well, BIT(domain), power_domains) {
+		if (!power_well->count++)
+			intel_power_well_enable(dev_priv, power_well);
+	}
+
+	power_domains->domain_use_count[domain]++;
+}
+
 /**
  * intel_display_power_get - grab a power domain reference
  * @dev_priv: i915 device instance
@@ -1457,24 +1518,53 @@
 void intel_display_power_get(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain)
 {
-	struct i915_power_domains *power_domains;
-	struct i915_power_well *power_well;
-	int i;
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
 	intel_runtime_pm_get(dev_priv);
 
-	power_domains = &dev_priv->power_domains;
+	mutex_lock(&power_domains->lock);
+
+	__intel_display_power_get_domain(dev_priv, domain);
+
+	mutex_unlock(&power_domains->lock);
+}
+
+/**
+ * intel_display_power_get_if_enabled - grab a reference for an enabled display power domain
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function grabs a power domain reference for @domain and ensures that the
+ * power domain and all its parents are powered up. Therefore users should only
+ * grab a reference to the innermost power domain they need.
+ *
+ * Any power domain reference obtained by this function must have a symmetric
+ * call to intel_display_power_put() to release the reference again.
+ */
+bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+					enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	bool is_enabled;
+
+	if (!intel_runtime_pm_get_if_in_use(dev_priv))
+		return false;
 
 	mutex_lock(&power_domains->lock);
 
-	for_each_power_well(i, power_well, BIT(domain), power_domains) {
-		if (!power_well->count++)
-			intel_power_well_enable(dev_priv, power_well);
+	if (__intel_display_power_is_enabled(dev_priv, domain)) {
+		__intel_display_power_get_domain(dev_priv, domain);
+		is_enabled = true;
+	} else {
+		is_enabled = false;
 	}
 
-	power_domains->domain_use_count[domain]++;
-
 	mutex_unlock(&power_domains->lock);
+
+	if (!is_enabled)
+		intel_runtime_pm_put(dev_priv);
+
+	return is_enabled;
 }
 
 /**
@@ -2213,15 +2303,15 @@
  */
 void intel_power_domains_suspend(struct drm_i915_private *dev_priv)
 {
-	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
-		skl_display_core_uninit(dev_priv);
-
 	/*
 	 * Even if power well support was disabled we still want to disable
 	 * power wells while we are system suspended.
 	 */
 	if (!i915.disable_power_well)
 		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+		skl_display_core_uninit(dev_priv);
 }
 
 /**
@@ -2246,6 +2336,41 @@
 }
 
 /**
+ * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
+ * @dev_priv: i915 device instance
+ *
+ * This function grabs a device-level runtime pm reference if the device is
+ * already in use and ensures that it is powered up.
+ *
+ * Any runtime pm reference obtained by this function must have a symmetric
+ * call to intel_runtime_pm_put() to release the reference again.
+ */
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct device *device = &dev->pdev->dev;
+
+	if (IS_ENABLED(CONFIG_PM)) {
+		int ret = pm_runtime_get_if_in_use(device);
+
+		/*
+		 * In cases runtime PM is disabled by the RPM core and we get
+		 * an -EINVAL return value we are not supposed to call this
+		 * function, since the power state is undefined. This applies
+		 * atm to the late/early system suspend/resume handlers.
+		 */
+		WARN_ON_ONCE(ret < 0);
+		if (ret <= 0)
+			return false;
+	}
+
+	atomic_inc(&dev_priv->pm.wakeref_count);
+	assert_rpm_wakelock_held(dev_priv);
+
+	return true;
+}
+
+/**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
  * @dev_priv: i915 device instance
  *

diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 30a5718..28722631 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c

@@ -64,6 +64,7 @@
 	/* Start DC channel and DI after IDMAC */
 	ipu_dc_enable_channel(ipu_crtc->dc);
 	ipu_di_enable(ipu_crtc->di);
+	drm_crtc_vblank_on(&ipu_crtc->base);
 
 	ipu_crtc->enabled = 1;
 }
@@ -80,6 +81,7 @@
 	ipu_di_disable(ipu_crtc->di);
 	ipu_plane_disable(ipu_crtc->plane[0]);
 	ipu_dc_disable(ipu);
+	drm_crtc_vblank_off(&ipu_crtc->base);
 
 	ipu_crtc->enabled = 0;
 }

diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 591ba2f..26bb1b6 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c

@@ -42,6 +42,7 @@
 	DRM_FORMAT_YVYU,
 	DRM_FORMAT_YUV420,
 	DRM_FORMAT_YVU420,
+	DRM_FORMAT_RGB565,
 };
 
 int ipu_plane_irq(struct ipu_plane *ipu_plane)

diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 8a70cec..2dfe58a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c

@@ -24,7 +24,7 @@
 static int nouveau_platform_probe(struct platform_device *pdev)
 {
 	const struct nvkm_device_tegra_func *func;
-	struct nvkm_device *device;
+	struct nvkm_device *device = NULL;
 	struct drm_device *drm;
 	int ret;
 

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 7f8a427..e7e581d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c

@@ -252,32 +252,40 @@
 
 	if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL)))
 		return -ENOMEM;
-	*pdevice = &tdev->device;
+
 	tdev->func = func;
 	tdev->pdev = pdev;
 	tdev->irq = -1;
 
 	tdev->vdd = devm_regulator_get(&pdev->dev, "vdd");
-	if (IS_ERR(tdev->vdd))
-		return PTR_ERR(tdev->vdd);
+	if (IS_ERR(tdev->vdd)) {
+		ret = PTR_ERR(tdev->vdd);
+		goto free;
+	}
 
 	tdev->rst = devm_reset_control_get(&pdev->dev, "gpu");
-	if (IS_ERR(tdev->rst))
-		return PTR_ERR(tdev->rst);
+	if (IS_ERR(tdev->rst)) {
+		ret = PTR_ERR(tdev->rst);
+		goto free;
+	}
 
 	tdev->clk = devm_clk_get(&pdev->dev, "gpu");
-	if (IS_ERR(tdev->clk))
-		return PTR_ERR(tdev->clk);
+	if (IS_ERR(tdev->clk)) {
+		ret = PTR_ERR(tdev->clk);
+		goto free;
+	}
 
 	tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
-	if (IS_ERR(tdev->clk_pwr))
-		return PTR_ERR(tdev->clk_pwr);
+	if (IS_ERR(tdev->clk_pwr)) {
+		ret = PTR_ERR(tdev->clk_pwr);
+		goto free;
+	}
 
 	nvkm_device_tegra_probe_iommu(tdev);
 
 	ret = nvkm_device_tegra_power_up(tdev);
 	if (ret)
-		return ret;
+		goto remove;
 
 	tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
 	ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
@@ -285,9 +293,19 @@
 			       cfg, dbg, detect, mmio, subdev_mask,
 			       &tdev->device);
 	if (ret)
-		return ret;
+		goto powerdown;
+
+	*pdevice = &tdev->device;
 
 	return 0;
+
+powerdown:
+	nvkm_device_tegra_power_down(tdev);
+remove:
+	nvkm_device_tegra_remove_iommu(tdev);
+free:
+	kfree(tdev);
+	return ret;
 }
 #else
 int

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
index 74e2f7c..9688970 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c

@@ -328,6 +328,7 @@
 		.outp = outp,
 	}, *dp = &_dp;
 	u32 datarate = 0;
+	u8  pwr;
 	int ret;
 
 	if (!outp->base.info.location && disp->func->sor.magic)
@@ -355,6 +356,15 @@
 	/* disable link interrupt handling during link training */
 	nvkm_notify_put(&outp->irq);
 
+	/* ensure sink is not in a low-power state */
+	if (!nvkm_rdaux(outp->aux, DPCD_SC00, &pwr, 1)) {
+		if ((pwr & DPCD_SC00_SET_POWER) != DPCD_SC00_SET_POWER_D0) {
+			pwr &= ~DPCD_SC00_SET_POWER;
+			pwr |=  DPCD_SC00_SET_POWER_D0;
+			nvkm_wraux(outp->aux, DPCD_SC00, &pwr, 1);
+		}
+	}
+
 	/* enable down-spreading and execute pre-train script from vbios */
 	dp_link_train_init(dp, outp->dpcd[3] & 0x01);
 

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
index 9596290..6e10c5e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h

@@ -71,5 +71,11 @@
 #define DPCD_LS0C_LANE1_POST_CURSOR2                                       0x0c
 #define DPCD_LS0C_LANE0_POST_CURSOR2                                       0x03
 
+/* DPCD Sink Control */
+#define DPCD_SC00                                                       0x00600
+#define DPCD_SC00_SET_POWER                                                0x03
+#define DPCD_SC00_SET_POWER_D0                                             0x01
+#define DPCD_SC00_SET_POWER_D3                                             0x03
+
 void nvkm_dp_train(struct work_struct *);
 #endif

diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index dfebdc4..85dfe36 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c

@@ -573,10 +573,9 @@
 
 		kfree(omap_dmm->engines);
 		if (omap_dmm->refill_va)
-			dma_free_writecombine(omap_dmm->dev,
-				REFILL_BUFFER_SIZE * omap_dmm->num_engines,
-				omap_dmm->refill_va,
-				omap_dmm->refill_pa);
+			dma_free_wc(omap_dmm->dev,
+				    REFILL_BUFFER_SIZE * omap_dmm->num_engines,
+				    omap_dmm->refill_va, omap_dmm->refill_pa);
 		if (omap_dmm->dummy_page)
 			__free_page(omap_dmm->dummy_page);
 
@@ -701,9 +700,9 @@
 	omap_dmm->dummy_pa = page_to_phys(omap_dmm->dummy_page);
 
 	/* alloc refill memory */
-	omap_dmm->refill_va = dma_alloc_writecombine(&dev->dev,
-				REFILL_BUFFER_SIZE * omap_dmm->num_engines,
-				&omap_dmm->refill_pa, GFP_KERNEL);
+	omap_dmm->refill_va = dma_alloc_wc(&dev->dev,
+					   REFILL_BUFFER_SIZE * omap_dmm->num_engines,
+					   &omap_dmm->refill_pa, GFP_KERNEL);
 	if (!omap_dmm->refill_va) {
 		dev_err(&dev->dev, "could not allocate refill memory\n");
 		goto fail;

diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 8495a1a..359b0d7 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c

@@ -1330,8 +1330,8 @@
 			omap_gem_detach_pages(obj);
 
 		if (!is_shmem(obj)) {
-			dma_free_writecombine(dev->dev, obj->size,
-					omap_obj->vaddr, omap_obj->paddr);
+			dma_free_wc(dev->dev, obj->size, omap_obj->vaddr,
+				    omap_obj->paddr);
 		} else if (omap_obj->vaddr) {
 			vunmap(omap_obj->vaddr);
 		}
@@ -1395,8 +1395,8 @@
 		/* attempt to allocate contiguous memory if we don't
 		 * have DMM for remappign discontiguous buffers
 		 */
-		omap_obj->vaddr =  dma_alloc_writecombine(dev->dev, size,
-				&omap_obj->paddr, GFP_KERNEL);
+		omap_obj->vaddr =  dma_alloc_wc(dev->dev, size,
+						&omap_obj->paddr, GFP_KERNEL);
 		if (!omap_obj->vaddr) {
 			kfree(omap_obj);
 

diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 44ee72e..6af8325 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c

@@ -315,15 +315,27 @@
 	unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
 	unsigned lane_num, i, max_pix_clock;
 
-	for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
-		for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
-			max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+	if (radeon_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+	    ENCODER_OBJECT_ID_NUTMEG) {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			max_pix_clock = (lane_num * 270000 * 8) / bpp;
 			if (max_pix_clock >= pix_clock) {
 				*dp_lanes = lane_num;
-				*dp_rate = link_rates[i];
+				*dp_rate = 270000;
 				return 0;
 			}
 		}
+	} else {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+				max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+				if (max_pix_clock >= pix_clock) {
+					*dp_lanes = lane_num;
+					*dp_rate = link_rates[i];
+					return 0;
+				}
+			}
+		}
 	}
 
 	return -EINVAL;

diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 902b59c..4197ca1 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c

@@ -1744,7 +1744,6 @@
 	}
 
 	drm_kms_helper_poll_enable(dev);
-	drm_helper_hpd_irq_event(dev);
 
 	/* set the power state here in case we are a PX system or headless */
 	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 298ea1c..2d9196a 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c

@@ -403,7 +403,8 @@
 	struct drm_crtc *crtc = &radeon_crtc->base;
 	unsigned long flags;
 	int r;
-	int vpos, hpos, stat, min_udelay;
+	int vpos, hpos, stat, min_udelay = 0;
+	unsigned repcnt = 4;
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
         down_read(&rdev->exclusive_lock);
@@ -454,7 +455,7 @@
 	 * In practice this won't execute very often unless on very fast
 	 * machines because the time window for this to happen is very small.
 	 */
-	for (;;) {
+	while (radeon_crtc->enabled && --repcnt) {
 		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
 		 * start in hpos, and to the "fudged earlier" vblank start in
 		 * vpos.
@@ -470,12 +471,24 @@
 			break;
 
 		/* Sleep at least until estimated real start of hw vblank */
-		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+		if (min_udelay > vblank->framedur_ns / 2000) {
+			/* Don't wait ridiculously long - something is wrong */
+			repcnt = 0;
+			break;
+		}
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		usleep_range(min_udelay, 2 * min_udelay);
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 	};
 
+	if (!repcnt)
+		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+				 "framedur %d, linedur %d, stat %d, vpos %d, "
+				 "hpos %d\n", work->crtc_id, min_udelay,
+				 vblank->framedur_ns / 1000,
+				 vblank->linedur_ns / 1000, stat, vpos, hpos);
+
 	/* do the flip (mmio) */
 	radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
 

diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 248c5a9..7a98823 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c

@@ -1082,10 +1082,6 @@
 	/* update displays */
 	radeon_dpm_display_configuration_changed(rdev);
 
-	rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
-	rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
-	rdev->pm.dpm.single_display = single_display;
-
 	/* wait for the rings to drain */
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
 		struct radeon_ring *ring = &rdev->ring[i];
@@ -1101,6 +1097,10 @@
 
 	radeon_dpm_post_set_power_state(rdev);
 
+	rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
+	rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
+	rdev->pm.dpm.single_display = single_display;
+
 	if (rdev->asic->dpm.force_performance_level) {
 		if (rdev->pm.dpm.thermal_active) {
 			enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level;

diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index 8078631..bd736ac 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c

@@ -157,17 +157,15 @@
 		cursor->height = src_h;
 
 		if (cursor->pixmap.base)
-			dma_free_writecombine(cursor->dev,
-					      cursor->pixmap.size,
-					      cursor->pixmap.base,
-					      cursor->pixmap.paddr);
+			dma_free_wc(cursor->dev, cursor->pixmap.size,
+				    cursor->pixmap.base, cursor->pixmap.paddr);
 
 		cursor->pixmap.size = cursor->width * cursor->height;
 
-		cursor->pixmap.base = dma_alloc_writecombine(cursor->dev,
-							cursor->pixmap.size,
-							&cursor->pixmap.paddr,
-							GFP_KERNEL | GFP_DMA);
+		cursor->pixmap.base = dma_alloc_wc(cursor->dev,
+						   cursor->pixmap.size,
+						   &cursor->pixmap.paddr,
+						   GFP_KERNEL | GFP_DMA);
 		if (!cursor->pixmap.base) {
 			DRM_ERROR("Failed to allocate memory for pixmap\n");
 			return;
@@ -252,8 +250,8 @@
 
 	/* Allocate clut buffer */
 	size = 0x100 * sizeof(unsigned short);
-	cursor->clut = dma_alloc_writecombine(dev, size, &cursor->clut_paddr,
-					      GFP_KERNEL | GFP_DMA);
+	cursor->clut = dma_alloc_wc(dev, size, &cursor->clut_paddr,
+				    GFP_KERNEL | GFP_DMA);
 
 	if (!cursor->clut) {
 		DRM_ERROR("Failed to allocate memory for cursor clut\n");
@@ -286,7 +284,7 @@
 	return &cursor->plane.drm_plane;
 
 err_plane:
-	dma_free_writecombine(dev, size, cursor->clut, cursor->clut_paddr);
+	dma_free_wc(dev, size, cursor->clut, cursor->clut_paddr);
 err_clut:
 	devm_kfree(dev, cursor);
 	return NULL;

diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index f9a1d92..514551c 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c

@@ -312,8 +312,7 @@
 	/* Allocate all the nodes within a single memory page */
 	size = sizeof(struct sti_gdp_node) *
 	    GDP_NODE_PER_FIELD * GDP_NODE_NB_BANK;
-	base = dma_alloc_writecombine(gdp->dev,
-				      size, &dma_addr, GFP_KERNEL | GFP_DMA);
+	base = dma_alloc_wc(gdp->dev, size, &dma_addr, GFP_KERNEL | GFP_DMA);
 
 	if (!base) {
 		DRM_ERROR("Failed to allocate memory for GDP node\n");

diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 43861b5..1d3c3d0 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c

@@ -617,9 +617,9 @@
 
 	/* Allocate memory for the VDP commands */
 	size = NB_VDP_CMD * sizeof(struct sti_hqvdp_cmd);
-	hqvdp->hqvdp_cmd = dma_alloc_writecombine(hqvdp->dev, size,
-					 &hqvdp->hqvdp_cmd_paddr,
-					 GFP_KERNEL | GFP_DMA);
+	hqvdp->hqvdp_cmd = dma_alloc_wc(hqvdp->dev, size,
+					&hqvdp->hqvdp_cmd_paddr,
+					GFP_KERNEL | GFP_DMA);
 	if (!hqvdp->hqvdp_cmd) {
 		DRM_ERROR("Failed to allocate memory for VDP cmd\n");
 		return;

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 33add93..3b0d8c3 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c

@@ -175,8 +175,7 @@
 		sg_free_table(bo->sgt);
 		kfree(bo->sgt);
 	} else if (bo->vaddr) {
-		dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr,
-				      bo->paddr);
+		dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
 	}
 }
 
@@ -233,8 +232,8 @@
 	} else {
 		size_t size = bo->gem.size;
 
-		bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
-						   GFP_KERNEL | __GFP_NOWARN);
+		bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->paddr,
+					 GFP_KERNEL | __GFP_NOWARN);
 		if (!bo->vaddr) {
 			dev_err(drm->dev,
 				"failed to allocate buffer of size %zu\n",
@@ -472,8 +471,8 @@
 		vma->vm_flags &= ~VM_PFNMAP;
 		vma->vm_pgoff = 0;
 
-		ret = dma_mmap_writecombine(gem->dev->dev, vma, bo->vaddr,
-					    bo->paddr, gem->size);
+		ret = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->paddr,
+				  gem->size);
 		if (ret) {
 			drm_gem_vm_close(vma);
 			return ret;

diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 22278bc..034ef2d 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c

@@ -398,9 +398,8 @@
 	vma->vm_flags &= ~VM_PFNMAP;
 	vma->vm_pgoff = 0;
 
-	ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
-				    bo->base.vaddr, bo->base.paddr,
-				    vma->vm_end - vma->vm_start);
+	ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
+			  bo->base.paddr, vma->vm_end - vma->vm_start);
 	if (ret)
 		drm_gem_vm_close(vma);
 

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index db082be..c5a1a08 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c

@@ -563,6 +563,8 @@
 
 static const struct drm_connector_funcs vmw_sou_connector_funcs = {
 	.dpms = vmw_du_connector_dpms,
+	.detect = vmw_du_connector_detect,
+	.fill_modes = vmw_du_connector_fill_modes,
 	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_sou_connector_destroy,
 };

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index da462af..dd2dbb9 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c

@@ -18,6 +18,7 @@
 #include <linux/host1x.h>
 #include <linux/of.h>
 #include <linux/slab.h>
+#include <linux/of_device.h>
 
 #include "bus.h"
 #include "dev.h"
@@ -394,6 +395,7 @@
 	device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask;
 	device->dev.dma_mask = &device->dev.coherent_dma_mask;
 	dev_set_name(&device->dev, "%s", driver->driver.name);
+	of_dma_configure(&device->dev, host1x->dev->of_node);
 	device->dev.release = host1x_device_release;
 	device->dev.bus = &host1x_bus_type;
 	device->dev.parent = host1x->dev;

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 5a8c8d5..a18db4d 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c

@@ -52,8 +52,8 @@
 	struct host1x *host1x = cdma_to_host1x(cdma);
 
 	if (pb->phys != 0)
-		dma_free_writecombine(host1x->dev, pb->size_bytes + 4,
-				      pb->mapped, pb->phys);
+		dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
+			    pb->phys);
 
 	pb->mapped = NULL;
 	pb->phys = 0;
@@ -76,8 +76,8 @@
 	pb->pos = 0;
 
 	/* allocate and map pushbuffer memory */
-	pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4,
-					    &pb->phys, GFP_KERNEL);
+	pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
+				  GFP_KERNEL);
 	if (!pb->mapped)
 		goto fail;
 

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 314bf37..ff34869 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c

@@ -23,6 +23,7 @@
 #include <linux/of_device.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/dma-mapping.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/host1x.h>
@@ -68,6 +69,7 @@
 	.nb_bases	= 8,
 	.init		= host1x01_init,
 	.sync_offset	= 0x3000,
+	.dma_mask	= DMA_BIT_MASK(32),
 };
 
 static const struct host1x_info host1x02_info = {
@@ -77,6 +79,7 @@
 	.nb_bases = 12,
 	.init = host1x02_init,
 	.sync_offset = 0x3000,
+	.dma_mask = DMA_BIT_MASK(32),
 };
 
 static const struct host1x_info host1x04_info = {
@@ -86,6 +89,7 @@
 	.nb_bases = 64,
 	.init = host1x04_init,
 	.sync_offset = 0x2100,
+	.dma_mask = DMA_BIT_MASK(34),
 };
 
 static const struct host1x_info host1x05_info = {
@@ -95,6 +99,7 @@
 	.nb_bases = 64,
 	.init = host1x05_init,
 	.sync_offset = 0x2100,
+	.dma_mask = DMA_BIT_MASK(34),
 };
 
 static struct of_device_id host1x_of_match[] = {
@@ -148,6 +153,8 @@
 	if (IS_ERR(host->regs))
 		return PTR_ERR(host->regs);
 
+	dma_set_mask_and_coherent(host->dev, host->info->dma_mask);
+
 	if (host->info->init) {
 		err = host->info->init(host);
 		if (err)

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 0b6e8e9..dace124 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h

@@ -96,6 +96,7 @@
 	int	nb_mlocks;		/* host1x: number of mlocks */
 	int	(*init)(struct host1x *); /* initialize per SoC ops */
 	int	sync_offset;
+	u64	dma_mask;		/* mask of addressable memory */
 };
 
 struct host1x {

diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 63bd63f..defa799 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c

@@ -467,9 +467,8 @@
 		size += g->words * sizeof(u32);
 	}
 
-	job->gather_copy_mapped = dma_alloc_writecombine(dev, size,
-							 &job->gather_copy,
-							 GFP_KERNEL);
+	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
+					       GFP_KERNEL);
 	if (!job->gather_copy_mapped) {
 		job->gather_copy_mapped = NULL;
 		return -ENOMEM;
@@ -578,9 +577,8 @@
 	job->num_unpins = 0;
 
 	if (job->gather_copy_size)
-		dma_free_writecombine(job->channel->dev, job->gather_copy_size,
-				      job->gather_copy_mapped,
-				      job->gather_copy);
+		dma_free_wc(job->channel->dev, job->gather_copy_size,
+		            job->gather_copy_mapped, job->gather_copy);
 }
 EXPORT_SYMBOL(host1x_job_unpin);
 

diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index f2e13eb..e00db3f 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c

@@ -1050,6 +1050,17 @@
 	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
 		const struct ipu_platform_reg *reg = &client_reg[i];
 		struct platform_device *pdev;
+		struct device_node *of_node;
+
+		/* Associate subdevice with the corresponding port node */
+		of_node = of_graph_get_port_by_id(dev->of_node, i);
+		if (!of_node) {
+			dev_info(dev,
+				 "no port@%d node in %s, not using %s%d\n",
+				 i, dev->of_node->full_name,
+				 (i / 2) ? "DI" : "CSI", i % 2);
+			continue;
+		}
 
 		pdev = platform_device_alloc(reg->name, id++);
 		if (!pdev) {
@@ -1057,17 +1068,9 @@
 			goto err_register;
 		}
 
+		pdev->dev.of_node = of_node;
 		pdev->dev.parent = dev;
 
-		/* Associate subdevice with the corresponding port node */
-		pdev->dev.of_node = of_graph_get_port_by_id(dev->of_node, i);
-		if (!pdev->dev.of_node) {
-			dev_err(dev, "missing port@%d node in %s\n", i,
-				dev->of_node->full_name);
-			ret = -ENODEV;
-			goto err_register;
-		}
-
 		ret = platform_device_add_data(pdev, &reg->pdata,
 					       sizeof(reg->pdata));
 		if (!ret)
@@ -1289,10 +1292,6 @@
 	ipu->irq_sync = irq_sync;
 	ipu->irq_err = irq_err;
 
-	ret = ipu_irq_init(ipu);
-	if (ret)
-		goto out_failed_irq;
-
 	ret = device_reset(&pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to reset: %d\n", ret);
@@ -1302,6 +1301,10 @@
 	if (ret)
 		goto out_failed_reset;
 
+	ret = ipu_irq_init(ipu);
+	if (ret)
+		goto out_failed_irq;
+
 	/* Set MCU_T to divide MCU access window into 2 */
 	ipu_cm_write(ipu, 0x00400000L | (IPU_MCU_T_DEFAULT << 18),
 			IPU_DISP_GEN);
@@ -1324,9 +1327,9 @@
 failed_add_clients:
 	ipu_submodules_exit(ipu);
 failed_submodules_init:
-out_failed_reset:
 	ipu_irq_exit(ipu);
 out_failed_irq:
+out_failed_reset:
 	clk_disable_unprepare(ipu->clk);
 	return ret;
 }

diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index f155b83..2b3105c 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c

@@ -126,7 +126,7 @@
 	struct ads1015_data *data = i2c_get_clientdata(client);
 	unsigned int pga = data->channel_data[channel].pga;
 	int fullscale = fullscale_table[pga];
-	const unsigned mask = data->id == ads1115 ? 0x7fff : 0x7ff0;
+	const int mask = data->id == ads1115 ? 0x7fff : 0x7ff0;
 
 	return DIV_ROUND_CLOSEST(reg * fullscale, mask);
 }

diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c
index 82de3de..685568b 100644
--- a/drivers/hwmon/gpio-fan.c
+++ b/drivers/hwmon/gpio-fan.c

@@ -406,16 +406,11 @@
 				  unsigned long *state)
 {
 	struct gpio_fan_data *fan_data = cdev->devdata;
-	int r;
 
 	if (!fan_data)
 		return -EINVAL;
 
-	r = get_fan_speed_index(fan_data);
-	if (r < 0)
-		return r;
-
-	*state = r;
+	*state = fan_data->speed_index;
 	return 0;
 }
 

diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 3711df1..4a45408 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c

@@ -586,8 +586,7 @@
 	if (!dev)
 		return -ENOMEM;
 
-	dev->bsc_regmap = devm_kzalloc(&pdev->dev, sizeof(struct bsc_regs *),
-				       GFP_KERNEL);
+	dev->bsc_regmap = devm_kzalloc(&pdev->dev, sizeof(*dev->bsc_regmap), GFP_KERNEL);
 	if (!dev->bsc_regmap)
 		return -ENOMEM;
 

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index f62d697..27fa0cb 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c

@@ -1271,6 +1271,8 @@
 	switch (dev->device) {
 	case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
 	case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
+	case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
+	case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
 	case PCI_DEVICE_ID_INTEL_DNV_SMBUS:
 		priv->features |= FEATURE_I2C_BLOCK_READ;
 		priv->features |= FEATURE_IRQ;

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 08d26ba..13c4529 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c

@@ -1450,7 +1450,8 @@
 
 err_unuse_clocks:
 	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
-	pm_runtime_put(omap->dev);
+	pm_runtime_dont_use_autosuspend(omap->dev);
+	pm_runtime_put_sync(omap->dev);
 	pm_runtime_disable(&pdev->dev);
 err_free_mem:
 
@@ -1468,6 +1469,7 @@
 		return ret;
 
 	omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	return 0;

diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c
index f3e5ff8..213ba55 100644
--- a/drivers/i2c/busses/i2c-uniphier-f.c
+++ b/drivers/i2c/busses/i2c-uniphier-f.c

@@ -467,7 +467,7 @@
 		bus_speed = UNIPHIER_FI2C_DEFAULT_SPEED;
 
 	if (!bus_speed) {
-		dev_err(dev, "clock-freqyency should not be zero\n");
+		dev_err(dev, "clock-frequency should not be zero\n");
 		return -EINVAL;
 	}
 

diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c
index 1f4f3f5..89eaa8a 100644
--- a/drivers/i2c/busses/i2c-uniphier.c
+++ b/drivers/i2c/busses/i2c-uniphier.c

@@ -328,7 +328,7 @@
 		bus_speed = UNIPHIER_I2C_DEFAULT_SPEED;
 
 	if (!bus_speed) {
-		dev_err(dev, "clock-freqyency should not be zero\n");
+		dev_err(dev, "clock-frequency should not be zero\n");
 		return -EINVAL;
 	}
 

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 00da80e..94b80a5 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c

@@ -358,6 +358,7 @@
 	ret = device->query_device(device, &device->attrs, &uhw);
 	if (ret) {
 		printk(KERN_WARNING "Couldn't query the device attributes\n");
+		ib_cache_cleanup_one(device);
 		goto out;
 	}
 

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index f334090..1e37f35 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c

@@ -1071,7 +1071,7 @@
 		}
 	}
 
-	if (rec->hop_limit > 1 || use_roce) {
+	if (rec->hop_limit > 0 || use_roce) {
 		ah_attr->ah_flags = IB_AH_GRH;
 		ah_attr->grh.dgid = rec->dgid;
 

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 6ffc9c4..6c6fbff 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c

@@ -1970,7 +1970,8 @@
 		   resp_size);
 	INIT_UDATA(&uhw, buf + sizeof(cmd),
 		   (unsigned long)cmd.response + resp_size,
-		   in_len - sizeof(cmd), out_len - resp_size);
+		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - resp_size);
 
 	memset(&cmd_ex, 0, sizeof(cmd_ex));
 	cmd_ex.user_handle = cmd.user_handle;
@@ -3413,7 +3414,8 @@
 
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
-		   in_len - sizeof cmd, out_len - sizeof resp);
+		   in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - sizeof resp);
 
 	ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
 	if (ret)
@@ -3439,7 +3441,8 @@
 
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
-		   in_len - sizeof cmd, out_len - sizeof resp);
+		   in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - sizeof resp);
 
 	ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
 	if (ret)

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 26833bf..d68f506 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c

@@ -817,17 +817,48 @@
 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
-static void edit_counter(struct mlx4_counter *cnt,
-					struct ib_pma_portcounters *pma_cnt)
+static void edit_counter(struct mlx4_counter *cnt, void *counters,
+			 __be16 attr_id)
 {
-	ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
-			     (be64_to_cpu(cnt->tx_bytes) >> 2));
-	ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
-			     (be64_to_cpu(cnt->rx_bytes) >> 2));
-	ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
-			     be64_to_cpu(cnt->tx_frames));
-	ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
-			     be64_to_cpu(cnt->rx_frames));
+	switch (attr_id) {
+	case IB_PMA_PORT_COUNTERS:
+	{
+		struct ib_pma_portcounters *pma_cnt =
+			(struct ib_pma_portcounters *)counters;
+
+		ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
+				     (be64_to_cpu(cnt->tx_bytes) >> 2));
+		ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
+				     (be64_to_cpu(cnt->rx_bytes) >> 2));
+		ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
+				     be64_to_cpu(cnt->tx_frames));
+		ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
+				     be64_to_cpu(cnt->rx_frames));
+		break;
+	}
+	case IB_PMA_PORT_COUNTERS_EXT:
+	{
+		struct ib_pma_portcounters_ext *pma_cnt_ext =
+			(struct ib_pma_portcounters_ext *)counters;
+
+		pma_cnt_ext->port_xmit_data =
+			cpu_to_be64(be64_to_cpu(cnt->tx_bytes) >> 2);
+		pma_cnt_ext->port_rcv_data =
+			cpu_to_be64(be64_to_cpu(cnt->rx_bytes) >> 2);
+		pma_cnt_ext->port_xmit_packets = cnt->tx_frames;
+		pma_cnt_ext->port_rcv_packets = cnt->rx_frames;
+		break;
+	}
+	}
+}
+
+static int iboe_process_mad_port_info(void *out_mad)
+{
+	struct ib_class_port_info cpi = {};
+
+	cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+	memcpy(out_mad, &cpi, sizeof(cpi));
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
 static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -842,6 +873,9 @@
 	if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
 		return -EINVAL;
 
+	if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)
+		return iboe_process_mad_port_info((void *)(out_mad->data + 40));
+
 	memset(&counter_stats, 0, sizeof(counter_stats));
 	mutex_lock(&dev->counters_table[port_num - 1].mutex);
 	list_for_each_entry(tmp_counter,
@@ -863,7 +897,8 @@
 		switch (counter_stats.counter_mode & 0xf) {
 		case 0:
 			edit_counter(&counter_stats,
-				     (void *)(out_mad->data + 40));
+				     (void *)(out_mad->data + 40),
+				     in_mad->mad_hdr.attr_id);
 			err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 			break;
 		default:
@@ -894,8 +929,10 @@
 	 */
 	if (link == IB_LINK_LAYER_INFINIBAND) {
 		if (mlx4_is_slave(dev->dev) &&
-		    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
-		    in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS)
+		    (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+		     (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
+		      in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
+		      in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
 			return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
 						in_grh, in_mad, out_mad);
 

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index bc5536f..fd97534 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c

@@ -1681,9 +1681,12 @@
 	}
 
 	if (qp->ibqp.uobject)
-		context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
+		context->usr_page = cpu_to_be32(
+			mlx4_to_hw_uar_index(dev->dev,
+					     to_mucontext(ibqp->uobject->context)->uar.index));
 	else
-		context->usr_page = cpu_to_be32(dev->priv_uar.index);
+		context->usr_page = cpu_to_be32(
+			mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
 
 	if (attr_mask & IB_QP_DEST_QPN)
 		context->remote_qpn = cpu_to_be32(attr->dest_qp_num);

diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4659256..3b2ddd6 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c

@@ -75,7 +75,8 @@
 
 static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 			   struct mlx5_create_srq_mbox_in **in,
-			   struct ib_udata *udata, int buf_size, int *inlen)
+			   struct ib_udata *udata, int buf_size, int *inlen,
+			   int is_xrc)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_create_srq ucmd = {};
@@ -87,13 +88,8 @@
 	int ncont;
 	u32 offset;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
-	int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
 
-	if (drv_data < 0)
-		return -EINVAL;
-
-	ucmdlen = (drv_data < sizeof(ucmd)) ?
-		  drv_data : sizeof(ucmd);
+	ucmdlen = min(udata->inlen, sizeof(ucmd));
 
 	if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
 		mlx5_ib_dbg(dev, "failed copy udata\n");
@@ -103,15 +99,17 @@
 	if (ucmd.reserved0 || ucmd.reserved1)
 		return -EINVAL;
 
-	if (drv_data > sizeof(ucmd) &&
+	if (udata->inlen > sizeof(ucmd) &&
 	    !ib_is_udata_cleared(udata, sizeof(ucmd),
-				 drv_data - sizeof(ucmd)))
+				 udata->inlen - sizeof(ucmd)))
 		return -EINVAL;
 
-	err = get_srq_user_index(to_mucontext(pd->uobject->context),
-				 &ucmd, udata->inlen, &uidx);
-	if (err)
-		return err;
+	if (is_xrc) {
+		err = get_srq_user_index(to_mucontext(pd->uobject->context),
+					 &ucmd, udata->inlen, &uidx);
+		if (err)
+			return err;
+	}
 
 	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
@@ -151,7 +149,8 @@
 	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
-	if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+	if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+	     is_xrc){
 		xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
 				     xrc_srq_context_entry);
 		MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
@@ -170,7 +169,7 @@
 
 static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 			     struct mlx5_create_srq_mbox_in **in, int buf_size,
-			     int *inlen)
+			     int *inlen, int is_xrc)
 {
 	int err;
 	int i;
@@ -224,7 +223,8 @@
 
 	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
-	if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+	if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+	     is_xrc){
 		xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
 				     xrc_srq_context_entry);
 		/* 0xffffff means we ask to work with cqe version 0 */
@@ -302,10 +302,14 @@
 		    desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
 		    srq->msrq.max_avail_gather);
 
+	is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+
 	if (pd->uobject)
-		err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+		err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
+				      is_xrc);
 	else
-		err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+		err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
+					is_xrc);
 
 	if (err) {
 		mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -313,7 +317,6 @@
 		goto err_srq;
 	}
 
-	is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
 	in->ctx.state_log_sz = ilog2(srq->msrq.max);
 	flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
 	xrcdn = 0;

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 040bb8b..12503f1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h

@@ -323,9 +323,6 @@
 			 */
 	u32 max_hw_cqe;
 	bool phase_change;
-	bool deferred_arm, deferred_sol;
-	bool first_arm;
-
 	spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
 						   * to cq polling
 						   */

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 37620b4..12420e4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

@@ -1094,7 +1094,6 @@
 	spin_lock_init(&cq->comp_handler_lock);
 	INIT_LIST_HEAD(&cq->sq_head);
 	INIT_LIST_HEAD(&cq->rq_head);
-	cq->first_arm = true;
 
 	if (ib_ctx) {
 		uctx = get_ocrdma_ucontext(ib_ctx);
@@ -2910,12 +2909,9 @@
 	}
 stop_cqe:
 	cq->getp = cur_getp;
-	if (cq->deferred_arm || polled_hw_cqes) {
-		ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm,
-				  cq->deferred_sol, polled_hw_cqes);
-		cq->deferred_arm = false;
-		cq->deferred_sol = false;
-	}
+
+	if (polled_hw_cqes)
+		ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
 
 	return i;
 }
@@ -2999,13 +2995,7 @@
 	if (cq_flags & IB_CQ_SOLICITED)
 		sol_needed = true;
 
-	if (cq->first_arm) {
-		ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
-		cq->first_arm = false;
-	}
-
-	cq->deferred_arm = true;
-	cq->deferred_sol = sol_needed;
+	ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
 	spin_unlock_irqrestore(&cq->cq_lock, flags);
 
 	return 0;

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e5e2239..374c129 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c

@@ -114,6 +114,7 @@
 
 static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
+static void detach_device(struct device *dev);
 
 /*
  * For dynamic growth the aperture size is split into ranges of 128MB of
@@ -384,6 +385,9 @@
 	if (!dev_data)
 		return;
 
+	if (dev_data->domain)
+		detach_device(dev);
+
 	iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
 			    dev);
 

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 013bdff..bf4959f 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c

@@ -228,6 +228,10 @@
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
+				    u8 bank, u8 cntr, u8 fxn,
+				    u64 *value, bool is_write);
+
 static inline void update_last_devid(u16 devid)
 {
 	if (devid > amd_iommu_last_bdf)
@@ -1016,6 +1020,34 @@
 }
 
 /*
+ * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
+ * Workaround:
+ *     BIOS should enable ATS write permission check by setting
+ *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
+ */
+static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
+{
+	u32 value;
+
+	if ((boot_cpu_data.x86 != 0x15) ||
+	    (boot_cpu_data.x86_model < 0x30) ||
+	    (boot_cpu_data.x86_model > 0x3f))
+		return;
+
+	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
+	value = iommu_read_l2(iommu, 0x47);
+
+	if (value & BIT(0))
+		return;
+
+	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
+	iommu_write_l2(iommu, 0x47, value | BIT(0));
+
+	pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n",
+		dev_name(&iommu->dev->dev));
+}
+
+/*
  * This function clues the initialization function for one IOMMU
  * together and also allocates the command buffer and programs the
  * hardware. It does NOT enable the IOMMU. This is done afterwards.
@@ -1142,8 +1174,8 @@
 	amd_iommu_pc_present = true;
 
 	/* Check if the performance counters can be written to */
-	if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) ||
-	    (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) ||
+	if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
+	    (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
 	    (val != val2)) {
 		pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
 		amd_iommu_pc_present = false;
@@ -1284,6 +1316,7 @@
 	}
 
 	amd_iommu_erratum_746_workaround(iommu);
+	amd_iommu_ats_write_check_workaround(iommu);
 
 	iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu,
 					       amd_iommu_groups, "ivhd%d",
@@ -2283,22 +2316,15 @@
 }
 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
 
-int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
+static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
+				    u8 bank, u8 cntr, u8 fxn,
 				    u64 *value, bool is_write)
 {
-	struct amd_iommu *iommu;
 	u32 offset;
 	u32 max_offset_lim;
 
-	/* Make sure the IOMMU PC resource is available */
-	if (!amd_iommu_pc_present)
-		return -ENODEV;
-
-	/* Locate the iommu associated with the device ID */
-	iommu = amd_iommu_rlookup_table[devid];
-
 	/* Check for valid iommu and pc register indexing */
-	if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7)))
+	if (WARN_ON((fxn > 0x28) || (fxn & 7)))
 		return -ENODEV;
 
 	offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
@@ -2322,3 +2348,16 @@
 	return 0;
 }
 EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
+
+int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
+				    u64 *value, bool is_write)
+{
+	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+	/* Make sure the IOMMU PC resource is available */
+	if (!amd_iommu_pc_present || iommu == NULL)
+		return -ENODEV;
+
+	return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
+					value, is_write);
+}

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index fb092f3..8ffd756 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c

@@ -329,7 +329,8 @@
 	/* Only care about add/remove events for physical functions */
 	if (pdev->is_virtfn)
 		return NOTIFY_DONE;
-	if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE)
+	if (action != BUS_NOTIFY_ADD_DEVICE &&
+	    action != BUS_NOTIFY_REMOVED_DEVICE)
 		return NOTIFY_DONE;
 
 	info = dmar_alloc_pci_notify_info(pdev, action);
@@ -339,7 +340,7 @@
 	down_write(&dmar_global_lock);
 	if (action == BUS_NOTIFY_ADD_DEVICE)
 		dmar_pci_bus_add_dev(info);
-	else if (action == BUS_NOTIFY_DEL_DEVICE)
+	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
 		dmar_pci_bus_del_dev(info);
 	up_write(&dmar_global_lock);
 

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 986a53e..a2e1b7f 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c

@@ -4367,7 +4367,7 @@
 				rmrru->devices_cnt);
 			if(ret < 0)
 				return ret;
-		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
 			dmar_remove_dev_scope(info, rmrr->segment,
 				rmrru->devices, rmrru->devices_cnt);
 		}
@@ -4387,7 +4387,7 @@
 				break;
 			else if(ret < 0)
 				return ret;
-		} else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
 			if (dmar_remove_dev_scope(info, atsr->segment,
 					atsru->devices, atsru->devices_cnt))
 				break;

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index fb50911..7e8c441 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig

@@ -60,6 +60,17 @@
 	  The maximum number of VICs available in the system, for
 	  power management.
 
+config ARMADA_370_XP_IRQ
+	bool
+	select GENERIC_IRQ_CHIP
+	select PCI_MSI_IRQ_DOMAIN if PCI_MSI
+
+config ALPINE_MSI
+	bool
+	depends on PCI && PCI_MSI
+	select GENERIC_IRQ_CHIP
+	select PCI_MSI_IRQ_DOMAIN
+
 config ATMEL_AIC_IRQ
 	bool
 	select GENERIC_IRQ_CHIP
@@ -78,6 +89,11 @@
 	bool
 	select IRQ_DOMAIN
 
+config BCM6345_L1_IRQ
+	bool
+	select GENERIC_IRQ_CHIP
+	select IRQ_DOMAIN
+
 config BCM7038_L1_IRQ
 	bool
 	select GENERIC_IRQ_CHIP
@@ -151,6 +167,11 @@
 	help
 	  Enables SysCfg Controlled IRQs on STi based platforms.
 
+config TANGO_IRQ
+	bool
+	select IRQ_DOMAIN
+	select GENERIC_IRQ_CHIP
+
 config TB10X_IRQC
 	bool
 	select IRQ_DOMAIN
@@ -160,6 +181,7 @@
 	tristate "TS-4800 IRQ controller"
 	select IRQ_DOMAIN
 	depends on HAS_IOMEM
+	depends on SOC_IMX51 || COMPILE_TEST
 	help
 	  Support for the TS-4800 FPGA IRQ controller
 
@@ -193,6 +215,8 @@
 
 config MIPS_GIC
 	bool
+	select GENERIC_IRQ_IPI
+	select IRQ_DOMAIN_HIERARCHY
 	select MIPS_CM
 
 config INGENIC_IRQ
@@ -218,3 +242,7 @@
 	def_bool y if MACH_ASM9260 || ARCH_MXS
 	select IRQ_DOMAIN
 	select STMP_DEVICE
+
+config MVEBU_ODMI
+	bool
+	select GENERIC_MSI_IRQ_DOMAIN

diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 18caacb..b03cfcb 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile

@@ -1,11 +1,13 @@
 obj-$(CONFIG_IRQCHIP)			+= irqchip.o
 
+obj-$(CONFIG_ALPINE_MSI)		+= irq-alpine-msi.o
+obj-$(CONFIG_ATH79)			+= irq-ath79-cpu.o
+obj-$(CONFIG_ATH79)			+= irq-ath79-misc.o
 obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2835.o
 obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2836.o
 obj-$(CONFIG_ARCH_EXYNOS)		+= exynos-combiner.o
 obj-$(CONFIG_ARCH_HIP04)		+= irq-hip04.o
 obj-$(CONFIG_ARCH_MMP)			+= irq-mmp.o
-obj-$(CONFIG_ARCH_MVEBU)		+= irq-armada-370-xp.o
 obj-$(CONFIG_IRQ_MXS)			+= irq-mxs.o
 obj-$(CONFIG_ARCH_TEGRA)		+= irq-tegra.o
 obj-$(CONFIG_ARCH_S3C24XX)		+= irq-s3c24xx.o
@@ -28,6 +30,7 @@
 obj-$(CONFIG_HISILICON_IRQ_MBIGEN)	+= irq-mbigen.o
 obj-$(CONFIG_ARM_NVIC)			+= irq-nvic.o
 obj-$(CONFIG_ARM_VIC)			+= irq-vic.o
+obj-$(CONFIG_ARMADA_370_XP_IRQ)		+= irq-armada-370-xp.o
 obj-$(CONFIG_ATMEL_AIC_IRQ)		+= irq-atmel-aic-common.o irq-atmel-aic.o
 obj-$(CONFIG_ATMEL_AIC5_IRQ)	+= irq-atmel-aic-common.o irq-atmel-aic5.o
 obj-$(CONFIG_I8259)			+= irq-i8259.o
@@ -40,12 +43,14 @@
 obj-$(CONFIG_ARCH_NSPIRE)		+= irq-zevio.o
 obj-$(CONFIG_ARCH_VT8500)		+= irq-vt8500.o
 obj-$(CONFIG_ST_IRQCHIP)		+= irq-st.o
+obj-$(CONFIG_TANGO_IRQ)			+= irq-tango.o
 obj-$(CONFIG_TB10X_IRQC)		+= irq-tb10x.o
 obj-$(CONFIG_TS4800_IRQ)		+= irq-ts4800.o
 obj-$(CONFIG_XTENSA)			+= irq-xtensa-pic.o
 obj-$(CONFIG_XTENSA_MX)			+= irq-xtensa-mx.o
 obj-$(CONFIG_IRQ_CROSSBAR)		+= irq-crossbar.o
 obj-$(CONFIG_SOC_VF610)			+= irq-vf610-mscm-ir.o
+obj-$(CONFIG_BCM6345_L1_IRQ)		+= irq-bcm6345-l1.o
 obj-$(CONFIG_BCM7038_L1_IRQ)		+= irq-bcm7038-l1.o
 obj-$(CONFIG_BCM7120_L2_IRQ)		+= irq-bcm7120-l2.o
 obj-$(CONFIG_BRCMSTB_L2_IRQ)		+= irq-brcmstb-l2.o
@@ -59,3 +64,4 @@
 obj-$(CONFIG_INGENIC_IRQ)		+= irq-ingenic.o
 obj-$(CONFIG_IMX_GPCV2)			+= irq-imx-gpcv2.o
 obj-$(CONFIG_PIC32_EVIC)		+= irq-pic32-evic.o
+obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o

diff --git a/drivers/irqchip/irq-alpine-msi.c b/drivers/irqchip/irq-alpine-msi.c
new file mode 100644
index 0000000..2538425
--- /dev/null
+++ b/drivers/irqchip/irq-alpine-msi.c

@@ -0,0 +1,293 @@
+/*
+ * Annapurna Labs MSIX support services
+ *
+ * Copyright (C) 2016, Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include <asm/irq.h>
+#include <asm-generic/msi.h>
+
+/* MSIX message address format: local GIC target */
+#define ALPINE_MSIX_SPI_TARGET_CLUSTER0		BIT(16)
+
+struct alpine_msix_data {
+	spinlock_t msi_map_lock;
+	phys_addr_t addr;
+	u32 spi_first;		/* The SGI number that MSIs start */
+	u32 num_spis;		/* The number of SGIs for MSIs */
+	unsigned long *msi_map;
+};
+
+static void alpine_msix_mask_msi_irq(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void alpine_msix_unmask_msi_irq(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip alpine_msix_irq_chip = {
+	.name			= "MSIx",
+	.irq_mask		= alpine_msix_mask_msi_irq,
+	.irq_unmask		= alpine_msix_unmask_msi_irq,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+};
+
+static int alpine_msix_allocate_sgi(struct alpine_msix_data *priv, int num_req)
+{
+	int first;
+
+	spin_lock(&priv->msi_map_lock);
+
+	first = bitmap_find_next_zero_area(priv->msi_map, priv->num_spis, 0,
+					   num_req, 0);
+	if (first >= priv->num_spis) {
+		spin_unlock(&priv->msi_map_lock);
+		return -ENOSPC;
+	}
+
+	bitmap_set(priv->msi_map, first, num_req);
+
+	spin_unlock(&priv->msi_map_lock);
+
+	return priv->spi_first + first;
+}
+
+static void alpine_msix_free_sgi(struct alpine_msix_data *priv, unsigned sgi,
+				 int num_req)
+{
+	int first = sgi - priv->spi_first;
+
+	spin_lock(&priv->msi_map_lock);
+
+	bitmap_clear(priv->msi_map, first, num_req);
+
+	spin_unlock(&priv->msi_map_lock);
+}
+
+static void alpine_msix_compose_msi_msg(struct irq_data *data,
+					struct msi_msg *msg)
+{
+	struct alpine_msix_data *priv = irq_data_get_irq_chip_data(data);
+	phys_addr_t msg_addr = priv->addr;
+
+	msg_addr |= (data->hwirq << 3);
+
+	msg->address_hi = upper_32_bits(msg_addr);
+	msg->address_lo = lower_32_bits(msg_addr);
+	msg->data = 0;
+}
+
+static struct msi_domain_info alpine_msix_domain_info = {
+	.flags	= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_PCI_MSIX,
+	.chip	= &alpine_msix_irq_chip,
+};
+
+static struct irq_chip middle_irq_chip = {
+	.name			= "alpine_msix_middle",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= alpine_msix_compose_msi_msg,
+};
+
+static int alpine_msix_gic_domain_alloc(struct irq_domain *domain,
+					unsigned int virq, int sgi)
+{
+	struct irq_fwspec fwspec;
+	struct irq_data *d;
+	int ret;
+
+	if (!is_of_node(domain->parent->fwnode))
+		return -EINVAL;
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 3;
+	fwspec.param[0] = 0;
+	fwspec.param[1] = sgi;
+	fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (ret)
+		return ret;
+
+	d = irq_domain_get_irq_data(domain->parent, virq);
+	d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+
+	return 0;
+}
+
+static int alpine_msix_middle_domain_alloc(struct irq_domain *domain,
+					   unsigned int virq,
+					   unsigned int nr_irqs, void *args)
+{
+	struct alpine_msix_data *priv = domain->host_data;
+	int sgi, err, i;
+
+	sgi = alpine_msix_allocate_sgi(priv, nr_irqs);
+	if (sgi < 0)
+		return sgi;
+
+	for (i = 0; i < nr_irqs; i++) {
+		err = alpine_msix_gic_domain_alloc(domain, virq + i, sgi + i);
+		if (err)
+			goto err_sgi;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, sgi + i,
+					      &middle_irq_chip, priv);
+	}
+
+	return 0;
+
+err_sgi:
+	while (--i >= 0)
+		irq_domain_free_irqs_parent(domain, virq, i);
+	alpine_msix_free_sgi(priv, sgi, nr_irqs);
+	return err;
+}
+
+static void alpine_msix_middle_domain_free(struct irq_domain *domain,
+					   unsigned int virq,
+					   unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct alpine_msix_data *priv = irq_data_get_irq_chip_data(d);
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+	alpine_msix_free_sgi(priv, d->hwirq, nr_irqs);
+}
+
+static const struct irq_domain_ops alpine_msix_middle_domain_ops = {
+	.alloc	= alpine_msix_middle_domain_alloc,
+	.free	= alpine_msix_middle_domain_free,
+};
+
+static int alpine_msix_init_domains(struct alpine_msix_data *priv,
+				    struct device_node *node)
+{
+	struct irq_domain *middle_domain, *msi_domain, *gic_domain;
+	struct device_node *gic_node;
+
+	gic_node = of_irq_find_parent(node);
+	if (!gic_node) {
+		pr_err("Failed to find the GIC node\n");
+		return -ENODEV;
+	}
+
+	gic_domain = irq_find_host(gic_node);
+	if (!gic_domain) {
+		pr_err("Failed to find the GIC domain\n");
+		return -ENXIO;
+	}
+
+	middle_domain = irq_domain_add_tree(NULL,
+					    &alpine_msix_middle_domain_ops,
+					    priv);
+	if (!middle_domain) {
+		pr_err("Failed to create the MSIX middle domain\n");
+		return -ENOMEM;
+	}
+
+	middle_domain->parent = gic_domain;
+
+	msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node),
+					       &alpine_msix_domain_info,
+					       middle_domain);
+	if (!msi_domain) {
+		pr_err("Failed to create MSI domain\n");
+		irq_domain_remove(middle_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int alpine_msix_init(struct device_node *node,
+			    struct device_node *parent)
+{
+	struct alpine_msix_data *priv;
+	struct resource res;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	spin_lock_init(&priv->msi_map_lock);
+
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret) {
+		pr_err("Failed to allocate resource\n");
+		goto err_priv;
+	}
+
+	/*
+	 * The 20 least significant bits of addr provide direct information
+	 * regarding the interrupt destination.
+	 *
+	 * To select the primary GIC as the target GIC, bits [18:17] must be set
+	 * to 0x0. In this case, bit 16 (SPI_TARGET_CLUSTER0) must be set.
+	 */
+	priv->addr = res.start & GENMASK_ULL(63,20);
+	priv->addr |= ALPINE_MSIX_SPI_TARGET_CLUSTER0;
+
+	if (of_property_read_u32(node, "al,msi-base-spi", &priv->spi_first)) {
+		pr_err("Unable to parse MSI base\n");
+		ret = -EINVAL;
+		goto err_priv;
+	}
+
+	if (of_property_read_u32(node, "al,msi-num-spis", &priv->num_spis)) {
+		pr_err("Unable to parse MSI numbers\n");
+		ret = -EINVAL;
+		goto err_priv;
+	}
+
+	priv->msi_map = kzalloc(sizeof(*priv->msi_map) * BITS_TO_LONGS(priv->num_spis),
+				GFP_KERNEL);
+	if (!priv->msi_map) {
+		ret = -ENOMEM;
+		goto err_priv;
+	}
+
+	pr_debug("Registering %d msixs, starting at %d\n",
+		 priv->num_spis, priv->spi_first);
+
+	ret = alpine_msix_init_domains(priv, node);
+	if (ret)
+		goto err_map;
+
+	return 0;
+
+err_map:
+	kfree(priv->msi_map);
+err_priv:
+	kfree(priv);
+	return ret;
+}
+IRQCHIP_DECLARE(alpine_msix, "al,alpine-msix", alpine_msix_init);

diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 3f3a8c3..e7dc6cb 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c

@@ -71,6 +71,7 @@
 static int parent_irq;
 #ifdef CONFIG_PCI_MSI
 static struct irq_domain *armada_370_xp_msi_domain;
+static struct irq_domain *armada_370_xp_msi_inner_domain;
 static DECLARE_BITMAP(msi_used, PCI_MSI_DOORBELL_NR);
 static DEFINE_MUTEX(msi_used_lock);
 static phys_addr_t msi_doorbell_addr;
@@ -115,127 +116,102 @@
 
 #ifdef CONFIG_PCI_MSI
 
-static int armada_370_xp_alloc_msi(void)
-{
-	int hwirq;
-
-	mutex_lock(&msi_used_lock);
-	hwirq = find_first_zero_bit(&msi_used, PCI_MSI_DOORBELL_NR);
-	if (hwirq >= PCI_MSI_DOORBELL_NR)
-		hwirq = -ENOSPC;
-	else
-		set_bit(hwirq, msi_used);
-	mutex_unlock(&msi_used_lock);
-
-	return hwirq;
-}
-
-static void armada_370_xp_free_msi(int hwirq)
-{
-	mutex_lock(&msi_used_lock);
-	if (!test_bit(hwirq, msi_used))
-		pr_err("trying to free unused MSI#%d\n", hwirq);
-	else
-		clear_bit(hwirq, msi_used);
-	mutex_unlock(&msi_used_lock);
-}
-
-static int armada_370_xp_setup_msi_irq(struct msi_controller *chip,
-				       struct pci_dev *pdev,
-				       struct msi_desc *desc)
-{
-	struct msi_msg msg;
-	int virq, hwirq;
-
-	/* We support MSI, but not MSI-X */
-	if (desc->msi_attrib.is_msix)
-		return -EINVAL;
-
-	hwirq = armada_370_xp_alloc_msi();
-	if (hwirq < 0)
-		return hwirq;
-
-	virq = irq_create_mapping(armada_370_xp_msi_domain, hwirq);
-	if (!virq) {
-		armada_370_xp_free_msi(hwirq);
-		return -EINVAL;
-	}
-
-	irq_set_msi_desc(virq, desc);
-
-	msg.address_lo = msi_doorbell_addr;
-	msg.address_hi = 0;
-	msg.data = 0xf00 | (hwirq + 16);
-
-	pci_write_msi_msg(virq, &msg);
-	return 0;
-}
-
-static void armada_370_xp_teardown_msi_irq(struct msi_controller *chip,
-					   unsigned int irq)
-{
-	struct irq_data *d = irq_get_irq_data(irq);
-	unsigned long hwirq = d->hwirq;
-
-	irq_dispose_mapping(irq);
-	armada_370_xp_free_msi(hwirq);
-}
-
 static struct irq_chip armada_370_xp_msi_irq_chip = {
-	.name = "armada_370_xp_msi_irq",
-	.irq_enable = pci_msi_unmask_irq,
-	.irq_disable = pci_msi_mask_irq,
+	.name = "MPIC MSI",
 	.irq_mask = pci_msi_mask_irq,
 	.irq_unmask = pci_msi_unmask_irq,
 };
 
-static int armada_370_xp_msi_map(struct irq_domain *domain, unsigned int virq,
-				 irq_hw_number_t hw)
-{
-	irq_set_chip_and_handler(virq, &armada_370_xp_msi_irq_chip,
-				 handle_simple_irq);
+static struct msi_domain_info armada_370_xp_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_MULTI_PCI_MSI),
+	.chip	= &armada_370_xp_msi_irq_chip,
+};
 
-	return 0;
+static void armada_370_xp_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	msg->address_lo = lower_32_bits(msi_doorbell_addr);
+	msg->address_hi = upper_32_bits(msi_doorbell_addr);
+	msg->data = 0xf00 | (data->hwirq + PCI_MSI_DOORBELL_START);
 }
 
-static const struct irq_domain_ops armada_370_xp_msi_irq_ops = {
-	.map = armada_370_xp_msi_map,
+static int armada_370_xp_msi_set_affinity(struct irq_data *irq_data,
+					  const struct cpumask *mask, bool force)
+{
+	 return -EINVAL;
+}
+
+static struct irq_chip armada_370_xp_msi_bottom_irq_chip = {
+	.name			= "MPIC MSI",
+	.irq_compose_msi_msg	= armada_370_xp_compose_msi_msg,
+	.irq_set_affinity	= armada_370_xp_msi_set_affinity,
+};
+
+static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq,
+				   unsigned int nr_irqs, void *args)
+{
+	int hwirq, i;
+
+	mutex_lock(&msi_used_lock);
+
+	hwirq = bitmap_find_next_zero_area(msi_used, PCI_MSI_DOORBELL_NR,
+					   0, nr_irqs, 0);
+	if (hwirq >= PCI_MSI_DOORBELL_NR) {
+		mutex_unlock(&msi_used_lock);
+		return -ENOSPC;
+	}
+
+	bitmap_set(msi_used, hwirq, nr_irqs);
+	mutex_unlock(&msi_used_lock);
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &armada_370_xp_msi_bottom_irq_chip,
+				    domain->host_data, handle_simple_irq,
+				    NULL, NULL);
+	}
+
+	return hwirq;
+}
+
+static void armada_370_xp_msi_free(struct irq_domain *domain,
+				   unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+	mutex_lock(&msi_used_lock);
+	bitmap_clear(msi_used, d->hwirq, nr_irqs);
+	mutex_unlock(&msi_used_lock);
+}
+
+static const struct irq_domain_ops armada_370_xp_msi_domain_ops = {
+	.alloc	= armada_370_xp_msi_alloc,
+	.free	= armada_370_xp_msi_free,
 };
 
 static int armada_370_xp_msi_init(struct device_node *node,
 				  phys_addr_t main_int_phys_base)
 {
-	struct msi_controller *msi_chip;
 	u32 reg;
-	int ret;
 
 	msi_doorbell_addr = main_int_phys_base +
 		ARMADA_370_XP_SW_TRIG_INT_OFFS;
 
-	msi_chip = kzalloc(sizeof(*msi_chip), GFP_KERNEL);
-	if (!msi_chip)
+	armada_370_xp_msi_inner_domain =
+		irq_domain_add_linear(NULL, PCI_MSI_DOORBELL_NR,
+				      &armada_370_xp_msi_domain_ops, NULL);
+	if (!armada_370_xp_msi_inner_domain)
 		return -ENOMEM;
 
-	msi_chip->setup_irq = armada_370_xp_setup_msi_irq;
-	msi_chip->teardown_irq = armada_370_xp_teardown_msi_irq;
-	msi_chip->of_node = node;
-
 	armada_370_xp_msi_domain =
-		irq_domain_add_linear(NULL, PCI_MSI_DOORBELL_NR,
-				      &armada_370_xp_msi_irq_ops,
-				      NULL);
+		pci_msi_create_irq_domain(of_node_to_fwnode(node),
+					  &armada_370_xp_msi_domain_info,
+					  armada_370_xp_msi_inner_domain);
 	if (!armada_370_xp_msi_domain) {
-		kfree(msi_chip);
+		irq_domain_remove(armada_370_xp_msi_inner_domain);
 		return -ENOMEM;
 	}
 
-	ret = of_pci_msi_chip_add(msi_chip);
-	if (ret < 0) {
-		irq_domain_remove(armada_370_xp_msi_domain);
-		kfree(msi_chip);
-		return ret;
-	}
-
 	reg = readl(per_cpu_int_base + ARMADA_370_XP_IN_DRBEL_MSK_OFFS)
 		| PCI_MSI_DOORBELL_MASK;
 
@@ -280,7 +256,7 @@
 #endif
 
 static struct irq_chip armada_370_xp_irq_chip = {
-	.name		= "armada_370_xp_irq",
+	.name		= "MPIC",
 	.irq_mask       = armada_370_xp_irq_mask,
 	.irq_mask_ack   = armada_370_xp_irq_mask,
 	.irq_unmask     = armada_370_xp_irq_unmask,
@@ -427,12 +403,12 @@
 			continue;
 
 		if (is_chained) {
-			irq = irq_find_mapping(armada_370_xp_msi_domain,
-					       msinr - 16);
+			irq = irq_find_mapping(armada_370_xp_msi_inner_domain,
+					       msinr - PCI_MSI_DOORBELL_START);
 			generic_handle_irq(irq);
 		} else {
-			irq = msinr - 16;
-			handle_domain_irq(armada_370_xp_msi_domain,
+			irq = msinr - PCI_MSI_DOORBELL_START;
+			handle_domain_irq(armada_370_xp_msi_inner_domain,
 					  irq, regs);
 		}
 	}
@@ -604,8 +580,8 @@
 	armada_370_xp_mpic_domain =
 		irq_domain_add_linear(node, nr_irqs,
 				&armada_370_xp_mpic_irq_ops, NULL);
-
 	BUG_ON(!armada_370_xp_mpic_domain);
+	armada_370_xp_mpic_domain->bus_token = DOMAIN_BUS_WIRED;
 
 	/* Setup for the boot CPU */
 	armada_xp_mpic_perf_init();

diff --git a/drivers/irqchip/irq-ath79-cpu.c b/drivers/irqchip/irq-ath79-cpu.c
new file mode 100644
index 0000000..befe93c
--- /dev/null
+++ b/drivers/irqchip/irq-ath79-cpu.c

@@ -0,0 +1,97 @@
+/*
+ *  Atheros AR71xx/AR724x/AR913x specific interrupt handling
+ *
+ *  Copyright (C) 2015 Alban Bedel <albeu@free.fr>
+ *  Copyright (C) 2010-2011 Jaiganesh Narayanan <jnarayanan@atheros.com>
+ *  Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
+ *  Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
+ *
+ *  Parts of this file are based on Atheros' 2.6.15/2.6.31 BSP
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip.h>
+#include <linux/of.h>
+
+#include <asm/irq_cpu.h>
+#include <asm/mach-ath79/ath79.h>
+
+/*
+ * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for
+ * these devices typically allocate coherent DMA memory, however the
+ * DMA controller may still have some unsynchronized data in the FIFO.
+ * Issue a flush in the handlers to ensure that the driver sees
+ * the update.
+ *
+ * This array map the interrupt lines to the DDR write buffer channels.
+ */
+
+static unsigned irq_wb_chan[8] = {
+	-1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned long pending;
+	int irq;
+
+	pending = read_c0_status() & read_c0_cause() & ST0_IM;
+
+	if (!pending) {
+		spurious_interrupt();
+		return;
+	}
+
+	pending >>= CAUSEB_IP;
+	while (pending) {
+		irq = fls(pending) - 1;
+		if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1)
+			ath79_ddr_wb_flush(irq_wb_chan[irq]);
+		do_IRQ(MIPS_CPU_IRQ_BASE + irq);
+		pending &= ~BIT(irq);
+	}
+}
+
+static int __init ar79_cpu_intc_of_init(
+	struct device_node *node, struct device_node *parent)
+{
+	int err, i, count;
+
+	/* Fill the irq_wb_chan table */
+	count = of_count_phandle_with_args(
+		node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells");
+
+	for (i = 0; i < count; i++) {
+		struct of_phandle_args args;
+		u32 irq = i;
+
+		of_property_read_u32_index(
+			node, "qca,ddr-wb-channel-interrupts", i, &irq);
+		if (irq >= ARRAY_SIZE(irq_wb_chan))
+			continue;
+
+		err = of_parse_phandle_with_args(
+			node, "qca,ddr-wb-channels",
+			"#qca,ddr-wb-channel-cells",
+			i, &args);
+		if (err)
+			return err;
+
+		irq_wb_chan[irq] = args.args[0];
+	}
+
+	return mips_cpu_irq_of_init(node, parent);
+}
+IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
+		ar79_cpu_intc_of_init);
+
+void __init ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3)
+{
+	irq_wb_chan[2] = irq_wb_chan2;
+	irq_wb_chan[3] = irq_wb_chan3;
+	mips_cpu_irq_init();
+}

diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c
new file mode 100644
index 0000000..aa72907
--- /dev/null
+++ b/drivers/irqchip/irq-ath79-misc.c

@@ -0,0 +1,189 @@
+/*
+ *  Atheros AR71xx/AR724x/AR913x MISC interrupt controller
+ *
+ *  Copyright (C) 2015 Alban Bedel <albeu@free.fr>
+ *  Copyright (C) 2010-2011 Jaiganesh Narayanan <jnarayanan@atheros.com>
+ *  Copyright (C) 2008-2011 Gabor Juhos <juhosg@openwrt.org>
+ *  Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
+ *
+ *  Parts of this file are based on Atheros' 2.6.15/2.6.31 BSP
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define AR71XX_RESET_REG_MISC_INT_STATUS	0
+#define AR71XX_RESET_REG_MISC_INT_ENABLE	4
+
+#define ATH79_MISC_IRQ_COUNT			32
+
+static void ath79_misc_irq_handler(struct irq_desc *desc)
+{
+	struct irq_domain *domain = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	void __iomem *base = domain->host_data;
+	u32 pending;
+
+	chained_irq_enter(chip, desc);
+
+	pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
+		  __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
+
+	if (!pending) {
+		spurious_interrupt();
+		chained_irq_exit(chip, desc);
+		return;
+	}
+
+	while (pending) {
+		int bit = __ffs(pending);
+
+		generic_handle_irq(irq_linear_revmap(domain, bit));
+		pending &= ~BIT(bit);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void ar71xx_misc_irq_unmask(struct irq_data *d)
+{
+	void __iomem *base = irq_data_get_irq_chip_data(d);
+	unsigned int irq = d->hwirq;
+	u32 t;
+
+	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
+	__raw_writel(t | BIT(irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
+
+	/* flush write */
+	__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
+}
+
+static void ar71xx_misc_irq_mask(struct irq_data *d)
+{
+	void __iomem *base = irq_data_get_irq_chip_data(d);
+	unsigned int irq = d->hwirq;
+	u32 t;
+
+	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
+	__raw_writel(t & ~BIT(irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
+
+	/* flush write */
+	__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
+}
+
+static void ar724x_misc_irq_ack(struct irq_data *d)
+{
+	void __iomem *base = irq_data_get_irq_chip_data(d);
+	unsigned int irq = d->hwirq;
+	u32 t;
+
+	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
+	__raw_writel(t & ~BIT(irq), base + AR71XX_RESET_REG_MISC_INT_STATUS);
+
+	/* flush write */
+	__raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
+}
+
+static struct irq_chip ath79_misc_irq_chip = {
+	.name		= "MISC",
+	.irq_unmask	= ar71xx_misc_irq_unmask,
+	.irq_mask	= ar71xx_misc_irq_mask,
+};
+
+static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, d->host_data);
+	return 0;
+}
+
+static const struct irq_domain_ops misc_irq_domain_ops = {
+	.xlate = irq_domain_xlate_onecell,
+	.map = misc_map,
+};
+
+static void __init ath79_misc_intc_domain_init(
+	struct irq_domain *domain, int irq)
+{
+	void __iomem *base = domain->host_data;
+
+	/* Disable and clear all interrupts */
+	__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
+	__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
+
+	irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
+}
+
+static int __init ath79_misc_intc_of_init(
+	struct device_node *node, struct device_node *parent)
+{
+	struct irq_domain *domain;
+	void __iomem *base;
+	int irq;
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (!irq) {
+		pr_err("Failed to get MISC IRQ\n");
+		return -EINVAL;
+	}
+
+	base = of_iomap(node, 0);
+	if (!base) {
+		pr_err("Failed to get MISC IRQ registers\n");
+		return -ENOMEM;
+	}
+
+	domain = irq_domain_add_linear(node, ATH79_MISC_IRQ_COUNT,
+				&misc_irq_domain_ops, base);
+	if (!domain) {
+		pr_err("Failed to add MISC irqdomain\n");
+		return -EINVAL;
+	}
+
+	ath79_misc_intc_domain_init(domain, irq);
+	return 0;
+}
+
+static int __init ar7100_misc_intc_of_init(
+	struct device_node *node, struct device_node *parent)
+{
+	ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
+	return ath79_misc_intc_of_init(node, parent);
+}
+
+IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc",
+		ar7100_misc_intc_of_init);
+
+static int __init ar7240_misc_intc_of_init(
+	struct device_node *node, struct device_node *parent)
+{
+	ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
+	return ath79_misc_intc_of_init(node, parent);
+}
+
+IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
+		ar7240_misc_intc_of_init);
+
+void __init ath79_misc_irq_init(void __iomem *regs, int irq,
+				int irq_base, bool is_ar71xx)
+{
+	struct irq_domain *domain;
+
+	if (is_ar71xx)
+		ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
+	else
+		ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
+
+	domain = irq_domain_add_legacy(NULL, ATH79_MISC_IRQ_COUNT,
+			irq_base, 0, &misc_irq_domain_ops, regs);
+	if (!domain)
+		panic("Failed to create MISC irqdomain");
+
+	ath79_misc_intc_domain_init(domain, irq);
+}

diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c
index 37199b9..28b26c8 100644
--- a/drivers/irqchip/irq-atmel-aic-common.c
+++ b/drivers/irqchip/irq-atmel-aic-common.c

@@ -80,16 +80,10 @@
 	return 0;
 }
 
-int aic_common_set_priority(int priority, unsigned *val)
+void aic_common_set_priority(int priority, unsigned *val)
 {
-	if (priority < AT91_AIC_IRQ_MIN_PRIORITY ||
-	    priority > AT91_AIC_IRQ_MAX_PRIORITY)
-		return -EINVAL;
-
 	*val &= ~AT91_AIC_PRIOR;
 	*val |= priority;
-
-	return 0;
 }
 
 int aic_common_irq_domain_xlate(struct irq_domain *d,
@@ -193,7 +187,7 @@
 	}
 }
 
-void __init aic_common_irq_fixup(const struct of_device_id *matches)
+static void __init aic_common_irq_fixup(const struct of_device_id *matches)
 {
 	struct device_node *root = of_find_node_by_path("/");
 	const struct of_device_id *match;
@@ -214,7 +208,8 @@
 
 struct irq_domain *__init aic_common_of_init(struct device_node *node,
 					     const struct irq_domain_ops *ops,
-					     const char *name, int nirqs)
+					     const char *name, int nirqs,
+					     const struct of_device_id *matches)
 {
 	struct irq_chip_generic *gc;
 	struct irq_domain *domain;
@@ -264,6 +259,7 @@
 	}
 
 	aic_common_ext_irq_of_init(domain);
+	aic_common_irq_fixup(matches);
 
 	return domain;
 

diff --git a/drivers/irqchip/irq-atmel-aic-common.h b/drivers/irqchip/irq-atmel-aic-common.h
index 603f0a9..af60376 100644
--- a/drivers/irqchip/irq-atmel-aic-common.h
+++ b/drivers/irqchip/irq-atmel-aic-common.h

@@ -19,7 +19,7 @@
 
 int aic_common_set_type(struct irq_data *d, unsigned type, unsigned *val);
 
-int aic_common_set_priority(int priority, unsigned *val);
+void aic_common_set_priority(int priority, unsigned *val);
 
 int aic_common_irq_domain_xlate(struct irq_domain *d,
 				struct device_node *ctrlr,
@@ -30,12 +30,11 @@
 
 struct irq_domain *__init aic_common_of_init(struct device_node *node,
 					     const struct irq_domain_ops *ops,
-					     const char *name, int nirqs);
+					     const char *name, int nirqs,
+					     const struct of_device_id *matches);
 
 void __init aic_common_rtc_irq_fixup(struct device_node *root);
 
 void __init aic_common_rtt_irq_fixup(struct device_node *root);
 
-void __init aic_common_irq_fixup(const struct of_device_id *matches);
-
 #endif /* __IRQ_ATMEL_AIC_COMMON_H */

diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c
index 8a0c7f2..112e17c 100644
--- a/drivers/irqchip/irq-atmel-aic.c
+++ b/drivers/irqchip/irq-atmel-aic.c

@@ -196,9 +196,8 @@
 
 	irq_gc_lock(gc);
 	smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq));
-	ret = aic_common_set_priority(intspec[2], &smr);
-	if (!ret)
-		irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
+	aic_common_set_priority(intspec[2], &smr);
+	irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
 	irq_gc_unlock(gc);
 
 	return ret;
@@ -248,12 +247,10 @@
 		return -EEXIST;
 
 	domain = aic_common_of_init(node, &aic_irq_ops, "atmel-aic",
-				    NR_AIC_IRQS);
+				    NR_AIC_IRQS, aic_irq_fixups);
 	if (IS_ERR(domain))
 		return PTR_ERR(domain);
 
-	aic_common_irq_fixup(aic_irq_fixups);
-
 	aic_domain = domain;
 	gc = irq_get_domain_generic_chip(domain, 0);
 

diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
index 62bb840..4f0d068 100644
--- a/drivers/irqchip/irq-atmel-aic5.c
+++ b/drivers/irqchip/irq-atmel-aic5.c

@@ -272,9 +272,8 @@
 	irq_gc_lock(bgc);
 	irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR);
 	smr = irq_reg_readl(bgc, AT91_AIC5_SMR);
-	ret = aic_common_set_priority(intspec[2], &smr);
-	if (!ret)
-		irq_reg_writel(bgc, intspec[2] | smr, AT91_AIC5_SMR);
+	aic_common_set_priority(intspec[2], &smr);
+	irq_reg_writel(bgc, smr, AT91_AIC5_SMR);
 	irq_gc_unlock(bgc);
 
 	return ret;
@@ -312,12 +311,10 @@
 		return -EEXIST;
 
 	domain = aic_common_of_init(node, &aic5_irq_ops, "atmel-aic5",
-				    nirqs);
+				    nirqs, aic5_irq_fixups);
 	if (IS_ERR(domain))
 		return PTR_ERR(domain);
 
-	aic_common_irq_fixup(aic5_irq_fixups);
-
 	aic5_domain = domain;
 	nchips = aic5_domain->revmap_size / 32;
 	for (i = 0; i < nchips; i++) {

diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index 963065a..b6e950d 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c

@@ -229,7 +229,6 @@
 	unsigned long secondary_startup_phys =
 		(unsigned long)virt_to_phys((void *)secondary_startup);
 
-	dsb();
 	writel(secondary_startup_phys,
 	       intc.base + LOCAL_MAILBOX3_SET0 + 16 * cpu);
 

diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
new file mode 100644
index 0000000..b844c89
--- /dev/null
+++ b/drivers/irqchip/irq-bcm6345-l1.c

@@ -0,0 +1,364 @@
+/*
+ * Broadcom BCM6345 style Level 1 interrupt controller driver
+ *
+ * Copyright (C) 2014 Broadcom Corporation
+ * Copyright 2015 Simon Arlott
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This is based on the BCM7038 (which supports SMP) but with a single
+ * enable register instead of separate mask/set/clear registers.
+ *
+ * The BCM3380 has a similar mask/status register layout, but each pair
+ * of words is at separate locations (and SMP is not supported).
+ *
+ * ENABLE/STATUS words are packed next to each other for each CPU:
+ *
+ * BCM6368:
+ *   0x1000_0020: CPU0_W0_ENABLE
+ *   0x1000_0024: CPU0_W1_ENABLE
+ *   0x1000_0028: CPU0_W0_STATUS		IRQs 31-63
+ *   0x1000_002c: CPU0_W1_STATUS		IRQs 0-31
+ *   0x1000_0030: CPU1_W0_ENABLE
+ *   0x1000_0034: CPU1_W1_ENABLE
+ *   0x1000_0038: CPU1_W0_STATUS		IRQs 31-63
+ *   0x1000_003c: CPU1_W1_STATUS		IRQs 0-31
+ *
+ * BCM63168:
+ *   0x1000_0020: CPU0_W0_ENABLE
+ *   0x1000_0024: CPU0_W1_ENABLE
+ *   0x1000_0028: CPU0_W2_ENABLE
+ *   0x1000_002c: CPU0_W3_ENABLE
+ *   0x1000_0030: CPU0_W0_STATUS	IRQs 96-127
+ *   0x1000_0034: CPU0_W1_STATUS	IRQs 64-95
+ *   0x1000_0038: CPU0_W2_STATUS	IRQs 32-63
+ *   0x1000_003c: CPU0_W3_STATUS	IRQs 0-31
+ *   0x1000_0040: CPU1_W0_ENABLE
+ *   0x1000_0044: CPU1_W1_ENABLE
+ *   0x1000_0048: CPU1_W2_ENABLE
+ *   0x1000_004c: CPU1_W3_ENABLE
+ *   0x1000_0050: CPU1_W0_STATUS	IRQs 96-127
+ *   0x1000_0054: CPU1_W1_STATUS	IRQs 64-95
+ *   0x1000_0058: CPU1_W2_STATUS	IRQs 32-63
+ *   0x1000_005c: CPU1_W3_STATUS	IRQs 0-31
+ *
+ * IRQs are numbered in CPU native endian order
+ * (which is big-endian in these examples)
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME	": " fmt
+
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+
+#define IRQS_PER_WORD		32
+#define REG_BYTES_PER_IRQ_WORD	(sizeof(u32) * 2)
+
+struct bcm6345_l1_cpu;
+
+struct bcm6345_l1_chip {
+	raw_spinlock_t		lock;
+	unsigned int		n_words;
+	struct irq_domain	*domain;
+	struct cpumask		cpumask;
+	struct bcm6345_l1_cpu	*cpus[NR_CPUS];
+};
+
+struct bcm6345_l1_cpu {
+	void __iomem		*map_base;
+	unsigned int		parent_irq;
+	u32			enable_cache[];
+};
+
+static inline unsigned int reg_enable(struct bcm6345_l1_chip *intc,
+					   unsigned int word)
+{
+#ifdef __BIG_ENDIAN
+	return (1 * intc->n_words - word - 1) * sizeof(u32);
+#else
+	return (0 * intc->n_words + word) * sizeof(u32);
+#endif
+}
+
+static inline unsigned int reg_status(struct bcm6345_l1_chip *intc,
+				      unsigned int word)
+{
+#ifdef __BIG_ENDIAN
+	return (2 * intc->n_words - word - 1) * sizeof(u32);
+#else
+	return (1 * intc->n_words + word) * sizeof(u32);
+#endif
+}
+
+static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc,
+					struct irq_data *d)
+{
+	return cpumask_first_and(&intc->cpumask, irq_data_get_affinity_mask(d));
+}
+
+static void bcm6345_l1_irq_handle(struct irq_desc *desc)
+{
+	struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc);
+	struct bcm6345_l1_cpu *cpu;
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int idx;
+
+#ifdef CONFIG_SMP
+	cpu = intc->cpus[cpu_logical_map(smp_processor_id())];
+#else
+	cpu = intc->cpus[0];
+#endif
+
+	chained_irq_enter(chip, desc);
+
+	for (idx = 0; idx < intc->n_words; idx++) {
+		int base = idx * IRQS_PER_WORD;
+		unsigned long pending;
+		irq_hw_number_t hwirq;
+		unsigned int irq;
+
+		pending = __raw_readl(cpu->map_base + reg_status(intc, idx));
+		pending &= __raw_readl(cpu->map_base + reg_enable(intc, idx));
+
+		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
+			irq = irq_linear_revmap(intc->domain, base + hwirq);
+			if (irq)
+				do_IRQ(irq);
+			else
+				spurious_interrupt();
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static inline void __bcm6345_l1_unmask(struct irq_data *d)
+{
+	struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d);
+	u32 word = d->hwirq / IRQS_PER_WORD;
+	u32 mask = BIT(d->hwirq % IRQS_PER_WORD);
+	unsigned int cpu_idx = cpu_for_irq(intc, d);
+
+	intc->cpus[cpu_idx]->enable_cache[word] |= mask;
+	__raw_writel(intc->cpus[cpu_idx]->enable_cache[word],
+		intc->cpus[cpu_idx]->map_base + reg_enable(intc, word));
+}
+
+static inline void __bcm6345_l1_mask(struct irq_data *d)
+{
+	struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d);
+	u32 word = d->hwirq / IRQS_PER_WORD;
+	u32 mask = BIT(d->hwirq % IRQS_PER_WORD);
+	unsigned int cpu_idx = cpu_for_irq(intc, d);
+
+	intc->cpus[cpu_idx]->enable_cache[word] &= ~mask;
+	__raw_writel(intc->cpus[cpu_idx]->enable_cache[word],
+		intc->cpus[cpu_idx]->map_base + reg_enable(intc, word));
+}
+
+static void bcm6345_l1_unmask(struct irq_data *d)
+{
+	struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&intc->lock, flags);
+	__bcm6345_l1_unmask(d);
+	raw_spin_unlock_irqrestore(&intc->lock, flags);
+}
+
+static void bcm6345_l1_mask(struct irq_data *d)
+{
+	struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&intc->lock, flags);
+	__bcm6345_l1_mask(d);
+	raw_spin_unlock_irqrestore(&intc->lock, flags);
+}
+
+static int bcm6345_l1_set_affinity(struct irq_data *d,
+				   const struct cpumask *dest,
+				   bool force)
+{
+	struct bcm6345_l1_chip *intc = irq_data_get_irq_chip_data(d);
+	u32 word = d->hwirq / IRQS_PER_WORD;
+	u32 mask = BIT(d->hwirq % IRQS_PER_WORD);
+	unsigned int old_cpu = cpu_for_irq(intc, d);
+	unsigned int new_cpu;
+	struct cpumask valid;
+	unsigned long flags;
+	bool enabled;
+
+	if (!cpumask_and(&valid, &intc->cpumask, dest))
+		return -EINVAL;
+
+	new_cpu = cpumask_any_and(&valid, cpu_online_mask);
+	if (new_cpu >= nr_cpu_ids)
+		return -EINVAL;
+
+	dest = cpumask_of(new_cpu);
+
+	raw_spin_lock_irqsave(&intc->lock, flags);
+	if (old_cpu != new_cpu) {
+		enabled = intc->cpus[old_cpu]->enable_cache[word] & mask;
+		if (enabled)
+			__bcm6345_l1_mask(d);
+		cpumask_copy(irq_data_get_affinity_mask(d), dest);
+		if (enabled)
+			__bcm6345_l1_unmask(d);
+	} else {
+		cpumask_copy(irq_data_get_affinity_mask(d), dest);
+	}
+	raw_spin_unlock_irqrestore(&intc->lock, flags);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static int __init bcm6345_l1_init_one(struct device_node *dn,
+				      unsigned int idx,
+				      struct bcm6345_l1_chip *intc)
+{
+	struct resource res;
+	resource_size_t sz;
+	struct bcm6345_l1_cpu *cpu;
+	unsigned int i, n_words;
+
+	if (of_address_to_resource(dn, idx, &res))
+		return -EINVAL;
+	sz = resource_size(&res);
+	n_words = sz / REG_BYTES_PER_IRQ_WORD;
+
+	if (!intc->n_words)
+		intc->n_words = n_words;
+	else if (intc->n_words != n_words)
+		return -EINVAL;
+
+	cpu = intc->cpus[idx] = kzalloc(sizeof(*cpu) + n_words * sizeof(u32),
+					GFP_KERNEL);
+	if (!cpu)
+		return -ENOMEM;
+
+	cpu->map_base = ioremap(res.start, sz);
+	if (!cpu->map_base)
+		return -ENOMEM;
+
+	for (i = 0; i < n_words; i++) {
+		cpu->enable_cache[i] = 0;
+		__raw_writel(0, cpu->map_base + reg_enable(intc, i));
+	}
+
+	cpu->parent_irq = irq_of_parse_and_map(dn, idx);
+	if (!cpu->parent_irq) {
+		pr_err("failed to map parent interrupt %d\n", cpu->parent_irq);
+		return -EINVAL;
+	}
+	irq_set_chained_handler_and_data(cpu->parent_irq,
+						bcm6345_l1_irq_handle, intc);
+
+	return 0;
+}
+
+static struct irq_chip bcm6345_l1_irq_chip = {
+	.name			= "bcm6345-l1",
+	.irq_mask		= bcm6345_l1_mask,
+	.irq_unmask		= bcm6345_l1_unmask,
+	.irq_set_affinity	= bcm6345_l1_set_affinity,
+};
+
+static int bcm6345_l1_map(struct irq_domain *d, unsigned int virq,
+			  irq_hw_number_t hw_irq)
+{
+	irq_set_chip_and_handler(virq,
+		&bcm6345_l1_irq_chip, handle_percpu_irq);
+	irq_set_chip_data(virq, d->host_data);
+	return 0;
+}
+
+static const struct irq_domain_ops bcm6345_l1_domain_ops = {
+	.xlate			= irq_domain_xlate_onecell,
+	.map			= bcm6345_l1_map,
+};
+
+static int __init bcm6345_l1_of_init(struct device_node *dn,
+			      struct device_node *parent)
+{
+	struct bcm6345_l1_chip *intc;
+	unsigned int idx;
+	int ret;
+
+	intc = kzalloc(sizeof(*intc), GFP_KERNEL);
+	if (!intc)
+		return -ENOMEM;
+
+	for_each_possible_cpu(idx) {
+		ret = bcm6345_l1_init_one(dn, idx, intc);
+		if (ret)
+			pr_err("failed to init intc L1 for cpu %d: %d\n",
+				idx, ret);
+		else
+			cpumask_set_cpu(idx, &intc->cpumask);
+	}
+
+	if (!cpumask_weight(&intc->cpumask)) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	raw_spin_lock_init(&intc->lock);
+
+	intc->domain = irq_domain_add_linear(dn, IRQS_PER_WORD * intc->n_words,
+					     &bcm6345_l1_domain_ops,
+					     intc);
+	if (!intc->domain) {
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+
+	pr_info("registered BCM6345 L1 intc (IRQs: %d)\n",
+			IRQS_PER_WORD * intc->n_words);
+	for_each_cpu(idx, &intc->cpumask) {
+		struct bcm6345_l1_cpu *cpu = intc->cpus[idx];
+
+		pr_info("  CPU%u at MMIO 0x%p (irq = %d)\n", idx,
+				cpu->map_base, cpu->parent_irq);
+	}
+
+	return 0;
+
+out_unmap:
+	for_each_possible_cpu(idx) {
+		struct bcm6345_l1_cpu *cpu = intc->cpus[idx];
+
+		if (cpu) {
+			if (cpu->map_base)
+				iounmap(cpu->map_base);
+			kfree(cpu);
+		}
+	}
+out_free:
+	kfree(intc);
+	return ret;
+}
+
+IRQCHIP_DECLARE(bcm6345_l1, "brcm,bcm6345-l1-intc", bcm6345_l1_of_init);

diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c
index aa46eb2..54c2964 100644
--- a/drivers/irqchip/irq-gic-realview.c
+++ b/drivers/irqchip/irq-gic-realview.c

@@ -10,7 +10,8 @@
 #include <linux/irqchip/arm-gic.h>
 
 #define REALVIEW_SYS_LOCK_OFFSET	0x20
-#define REALVIEW_PB11MP_SYS_PLD_CTRL1	0x74
+#define REALVIEW_SYS_PLD_CTRL1		0x74
+#define REALVIEW_EB_REVB_SYS_PLD_CTRL1	0xD8
 #define VERSATILE_LOCK_VAL		0xA05F
 #define PLD_INTMODE_MASK		BIT(22)|BIT(23)|BIT(24)
 #define PLD_INTMODE_LEGACY		0x0
@@ -18,26 +19,57 @@
 #define PLD_INTMODE_NEW_NO_DCC		BIT(23)
 #define PLD_INTMODE_FIQ_ENABLE		BIT(24)
 
+/* For some reason RealView EB Rev B moved this register */
+static const struct of_device_id syscon_pldset_of_match[] = {
+	{
+		.compatible = "arm,realview-eb11mp-revb-syscon",
+		.data = (void *)REALVIEW_EB_REVB_SYS_PLD_CTRL1,
+	},
+	{
+		.compatible = "arm,realview-eb11mp-revc-syscon",
+		.data = (void *)REALVIEW_SYS_PLD_CTRL1,
+	},
+	{
+		.compatible = "arm,realview-eb-syscon",
+		.data = (void *)REALVIEW_SYS_PLD_CTRL1,
+	},
+	{
+		.compatible = "arm,realview-pb11mp-syscon",
+		.data = (void *)REALVIEW_SYS_PLD_CTRL1,
+	},
+	{},
+};
+
 static int __init
 realview_gic_of_init(struct device_node *node, struct device_node *parent)
 {
 	static struct regmap *map;
+	struct device_node *np;
+	const struct of_device_id *gic_id;
+	u32 pld1_ctrl;
+
+	np = of_find_matching_node_and_match(NULL, syscon_pldset_of_match,
+					     &gic_id);
+	if (!np)
+		return -ENODEV;
+	pld1_ctrl = (u32)gic_id->data;
 
 	/* The PB11MPCore GIC needs to be configured in the syscon */
-	map = syscon_regmap_lookup_by_compatible("arm,realview-pb11mp-syscon");
+	map = syscon_node_to_regmap(np);
 	if (!IS_ERR(map)) {
 		/* new irq mode with no DCC */
 		regmap_write(map, REALVIEW_SYS_LOCK_OFFSET,
 			     VERSATILE_LOCK_VAL);
-		regmap_update_bits(map, REALVIEW_PB11MP_SYS_PLD_CTRL1,
+		regmap_update_bits(map, pld1_ctrl,
 				   PLD_INTMODE_NEW_NO_DCC,
 				   PLD_INTMODE_MASK);
 		regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, 0x0000);
-		pr_info("TC11MP GIC: set up interrupt controller to NEW mode, no DCC\n");
+		pr_info("RealView GIC: set up interrupt controller to NEW mode, no DCC\n");
 	} else {
-		pr_err("TC11MP GIC setup: could not find syscon\n");
-		return -ENXIO;
+		pr_err("RealView GIC setup: could not find syscon\n");
+		return -ENODEV;
 	}
 	return gic_of_init(node, parent);
 }
 IRQCHIP_DECLARE(armtc11mp_gic, "arm,tc11mp-gic", realview_gic_of_init);
+IRQCHIP_DECLARE(armeb11mp_gic, "arm,eb11mp-gic", realview_gic_of_init);

diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index c779f83..28f047c 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c

@@ -92,18 +92,6 @@
 	.chip	= &gicv2m_msi_irq_chip,
 };
 
-static int gicv2m_set_affinity(struct irq_data *irq_data,
-			       const struct cpumask *mask, bool force)
-{
-	int ret;
-
-	ret = irq_chip_set_affinity_parent(irq_data, mask, force);
-	if (ret == IRQ_SET_MASK_OK)
-		ret = IRQ_SET_MASK_OK_DONE;
-
-	return ret;
-}
-
 static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 {
 	struct v2m_data *v2m = irq_data_get_irq_chip_data(data);
@@ -122,7 +110,7 @@
 	.irq_mask		= irq_chip_mask_parent,
 	.irq_unmask		= irq_chip_unmask_parent,
 	.irq_eoi		= irq_chip_eoi_parent,
-	.irq_set_affinity	= gicv2m_set_affinity,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
 	.irq_compose_msi_msg	= gicv2m_compose_msi_msg,
 };
 

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 0a73632..3926179 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c

@@ -78,6 +78,9 @@
 
 #define ITS_ITT_ALIGN		SZ_256
 
+/* Convert page order to size in bytes */
+#define PAGE_ORDER_TO_SIZE(o)	(PAGE_SIZE << (o))
+
 struct event_lpi_map {
 	unsigned long		*lpi_map;
 	u16			*col_map;
@@ -100,7 +103,6 @@
 
 static LIST_HEAD(its_nodes);
 static DEFINE_SPINLOCK(its_lock);
-static struct device_node *gic_root_node;
 static struct rdists *gic_rdists;
 
 #define gic_data_rdist()		(raw_cpu_ptr(gic_rdists->rdist))
@@ -600,11 +602,6 @@
 	lpi_set_config(d, true);
 }
 
-static void its_eoi_irq(struct irq_data *d)
-{
-	gic_write_eoir(d->hwirq);
-}
-
 static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 			    bool force)
 {
@@ -641,7 +638,7 @@
 	.name			= "ITS",
 	.irq_mask		= its_mask_irq,
 	.irq_unmask		= its_unmask_irq,
-	.irq_eoi		= its_eoi_irq,
+	.irq_eoi		= irq_chip_eoi_parent,
 	.irq_set_affinity	= its_set_affinity,
 	.irq_compose_msi_msg	= its_irq_compose_msi_msg,
 };
@@ -673,7 +670,7 @@
 	return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192;
 }
 
-static int its_lpi_init(u32 id_bits)
+static int __init its_lpi_init(u32 id_bits)
 {
 	lpi_chunks = its_lpi_to_chunk(1UL << id_bits);
 
@@ -846,7 +843,6 @@
 		u64 type = GITS_BASER_TYPE(val);
 		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
 		int order = get_order(psz);
-		int alloc_size;
 		int alloc_pages;
 		u64 tmp;
 		void *base;
@@ -878,9 +874,8 @@
 			}
 		}
 
-		alloc_size = (1 << order) * PAGE_SIZE;
 retry_alloc_baser:
-		alloc_pages = (alloc_size / psz);
+		alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
 		if (alloc_pages > GITS_BASER_PAGES_MAX) {
 			alloc_pages = GITS_BASER_PAGES_MAX;
 			order = get_order(GITS_BASER_PAGES_MAX * psz);
@@ -933,7 +928,7 @@
 			shr = tmp & GITS_BASER_SHAREABILITY_MASK;
 			if (!shr) {
 				cache = GITS_BASER_nC;
-				__flush_dcache_area(base, alloc_size);
+				__flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
 			}
 			goto retry_baser;
 		}
@@ -966,7 +961,7 @@
 		}
 
 		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
-			(int)(alloc_size / entry_size),
+			(int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
 			its_base_type_string[type],
 			(unsigned long)virt_to_phys(base),
 			psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
@@ -1434,7 +1429,8 @@
 	gic_enable_quirks(iidr, its_quirks, its);
 }
 
-static int its_probe(struct device_node *node, struct irq_domain *parent)
+static int __init its_probe(struct device_node *node,
+			    struct irq_domain *parent)
 {
 	struct resource res;
 	struct its_node *its;
@@ -1595,7 +1591,7 @@
 	{},
 };
 
-int its_init(struct device_node *node, struct rdists *rdists,
+int __init its_init(struct device_node *node, struct rdists *rdists,
 	     struct irq_domain *parent_domain)
 {
 	struct device_node *np;
@@ -1611,8 +1607,6 @@
 	}
 
 	gic_rdists = rdists;
-	gic_root_node = node;
-
 	its_alloc_lpi_tables();
 	its_lpi_init(rdists->id_bits);
 

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index d7be6dd..5b7d3c2 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c

@@ -15,10 +15,12 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
@@ -38,6 +40,7 @@
 struct redist_region {
 	void __iomem		*redist_base;
 	phys_addr_t		phys_base;
+	bool			single_redist;
 };
 
 struct gic_chip_data {
@@ -434,6 +437,9 @@
 				return 0;
 			}
 
+			if (gic_data.redist_regions[i].single_redist)
+				break;
+
 			if (gic_data.redist_stride) {
 				ptr += gic_data.redist_stride;
 			} else {
@@ -634,7 +640,7 @@
 	else
 		gic_dist_wait_for_rwp();
 
-	return IRQ_SET_MASK_OK;
+	return IRQ_SET_MASK_OK_DONE;
 }
 #else
 #define gic_set_affinity	NULL
@@ -764,6 +770,15 @@
 		return 0;
 	}
 
+	if (is_fwnode_irqchip(fwspec->fwnode)) {
+		if(fwspec->param_count != 2)
+			return -EINVAL;
+
+		*hwirq = fwspec->param[0];
+		*type = fwspec->param[1];
+		return 0;
+	}
+
 	return -EINVAL;
 }
 
@@ -811,17 +826,88 @@
 #endif
 }
 
+static int __init gic_init_bases(void __iomem *dist_base,
+				 struct redist_region *rdist_regs,
+				 u32 nr_redist_regions,
+				 u64 redist_stride,
+				 struct fwnode_handle *handle)
+{
+	struct device_node *node;
+	u32 typer;
+	int gic_irqs;
+	int err;
+
+	if (!is_hyp_mode_available())
+		static_key_slow_dec(&supports_deactivate);
+
+	if (static_key_true(&supports_deactivate))
+		pr_info("GIC: Using split EOI/Deactivate mode\n");
+
+	gic_data.dist_base = dist_base;
+	gic_data.redist_regions = rdist_regs;
+	gic_data.nr_redist_regions = nr_redist_regions;
+	gic_data.redist_stride = redist_stride;
+
+	gicv3_enable_quirks();
+
+	/*
+	 * Find out how many interrupts are supported.
+	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
+	 */
+	typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
+	gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer);
+	gic_irqs = GICD_TYPER_IRQS(typer);
+	if (gic_irqs > 1020)
+		gic_irqs = 1020;
+	gic_data.irq_nr = gic_irqs;
+
+	gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops,
+						 &gic_data);
+	gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist));
+
+	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	set_handle_irq(gic_handle_irq);
+
+	node = to_of_node(handle);
+	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis() &&
+	    node) /* Temp hack to prevent ITS init for ACPI */
+		its_init(node, &gic_data.rdists, gic_data.domain);
+
+	gic_smp_init();
+	gic_dist_init();
+	gic_cpu_init();
+	gic_cpu_pm_init();
+
+	return 0;
+
+out_free:
+	if (gic_data.domain)
+		irq_domain_remove(gic_data.domain);
+	free_percpu(gic_data.rdists.rdist);
+	return err;
+}
+
+static int __init gic_validate_dist_version(void __iomem *dist_base)
+{
+	u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
+
+	if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4)
+		return -ENODEV;
+
+	return 0;
+}
+
 static int __init gic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *dist_base;
 	struct redist_region *rdist_regs;
 	u64 redist_stride;
 	u32 nr_redist_regions;
-	u32 typer;
-	u32 reg;
-	int gic_irqs;
-	int err;
-	int i;
+	int err, i;
 
 	dist_base = of_iomap(node, 0);
 	if (!dist_base) {
@@ -830,11 +916,10 @@
 		return -ENXIO;
 	}
 
-	reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
-	if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) {
+	err = gic_validate_dist_version(dist_base);
+	if (err) {
 		pr_err("%s: no distributor detected, giving up\n",
 			node->full_name);
-		err = -ENODEV;
 		goto out_unmap_dist;
 	}
 
@@ -865,55 +950,11 @@
 	if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
 		redist_stride = 0;
 
-	if (!is_hyp_mode_available())
-		static_key_slow_dec(&supports_deactivate);
+	err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions,
+			     redist_stride, &node->fwnode);
+	if (!err)
+		return 0;
 
-	if (static_key_true(&supports_deactivate))
-		pr_info("GIC: Using split EOI/Deactivate mode\n");
-
-	gic_data.dist_base = dist_base;
-	gic_data.redist_regions = rdist_regs;
-	gic_data.nr_redist_regions = nr_redist_regions;
-	gic_data.redist_stride = redist_stride;
-
-	gicv3_enable_quirks();
-
-	/*
-	 * Find out how many interrupts are supported.
-	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
-	 */
-	typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
-	gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer);
-	gic_irqs = GICD_TYPER_IRQS(typer);
-	if (gic_irqs > 1020)
-		gic_irqs = 1020;
-	gic_data.irq_nr = gic_irqs;
-
-	gic_data.domain = irq_domain_add_tree(node, &gic_irq_domain_ops,
-					      &gic_data);
-	gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist));
-
-	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) {
-		err = -ENOMEM;
-		goto out_free;
-	}
-
-	set_handle_irq(gic_handle_irq);
-
-	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
-		its_init(node, &gic_data.rdists, gic_data.domain);
-
-	gic_smp_init();
-	gic_dist_init();
-	gic_cpu_init();
-	gic_cpu_pm_init();
-
-	return 0;
-
-out_free:
-	if (gic_data.domain)
-		irq_domain_remove(gic_data.domain);
-	free_percpu(gic_data.rdists.rdist);
 out_unmap_rdist:
 	for (i = 0; i < nr_redist_regions; i++)
 		if (rdist_regs[i].redist_base)
@@ -925,3 +966,213 @@
 }
 
 IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
+
+#ifdef CONFIG_ACPI
+static void __iomem *dist_base;
+static struct redist_region *redist_regs __initdata;
+static u32 nr_redist_regions __initdata;
+static bool single_redist;
+
+static void __init
+gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base)
+{
+	static int count = 0;
+
+	redist_regs[count].phys_base = phys_base;
+	redist_regs[count].redist_base = redist_base;
+	redist_regs[count].single_redist = single_redist;
+	count++;
+}
+
+static int __init
+gic_acpi_parse_madt_redist(struct acpi_subtable_header *header,
+			   const unsigned long end)
+{
+	struct acpi_madt_generic_redistributor *redist =
+			(struct acpi_madt_generic_redistributor *)header;
+	void __iomem *redist_base;
+
+	redist_base = ioremap(redist->base_address, redist->length);
+	if (!redist_base) {
+		pr_err("Couldn't map GICR region @%llx\n", redist->base_address);
+		return -ENOMEM;
+	}
+
+	gic_acpi_register_redist(redist->base_address, redist_base);
+	return 0;
+}
+
+static int __init
+gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header,
+			 const unsigned long end)
+{
+	struct acpi_madt_generic_interrupt *gicc =
+				(struct acpi_madt_generic_interrupt *)header;
+	u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
+	u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
+	void __iomem *redist_base;
+
+	redist_base = ioremap(gicc->gicr_base_address, size);
+	if (!redist_base)
+		return -ENOMEM;
+
+	gic_acpi_register_redist(gicc->gicr_base_address, redist_base);
+	return 0;
+}
+
+static int __init gic_acpi_collect_gicr_base(void)
+{
+	acpi_tbl_entry_handler redist_parser;
+	enum acpi_madt_type type;
+
+	if (single_redist) {
+		type = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
+		redist_parser = gic_acpi_parse_madt_gicc;
+	} else {
+		type = ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR;
+		redist_parser = gic_acpi_parse_madt_redist;
+	}
+
+	/* Collect redistributor base addresses in GICR entries */
+	if (acpi_table_parse_madt(type, redist_parser, 0) > 0)
+		return 0;
+
+	pr_info("No valid GICR entries exist\n");
+	return -ENODEV;
+}
+
+static int __init gic_acpi_match_gicr(struct acpi_subtable_header *header,
+				  const unsigned long end)
+{
+	/* Subtable presence means that redist exists, that's it */
+	return 0;
+}
+
+static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header,
+				      const unsigned long end)
+{
+	struct acpi_madt_generic_interrupt *gicc =
+				(struct acpi_madt_generic_interrupt *)header;
+
+	/*
+	 * If GICC is enabled and has valid gicr base address, then it means
+	 * GICR base is presented via GICC
+	 */
+	if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address)
+		return 0;
+
+	return -ENODEV;
+}
+
+static int __init gic_acpi_count_gicr_regions(void)
+{
+	int count;
+
+	/*
+	 * Count how many redistributor regions we have. It is not allowed
+	 * to mix redistributor description, GICR and GICC subtables have to be
+	 * mutually exclusive.
+	 */
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR,
+				      gic_acpi_match_gicr, 0);
+	if (count > 0) {
+		single_redist = false;
+		return count;
+	}
+
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+				      gic_acpi_match_gicc, 0);
+	if (count > 0)
+		single_redist = true;
+
+	return count;
+}
+
+static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header,
+					   struct acpi_probe_entry *ape)
+{
+	struct acpi_madt_generic_distributor *dist;
+	int count;
+
+	dist = (struct acpi_madt_generic_distributor *)header;
+	if (dist->version != ape->driver_data)
+		return false;
+
+	/* We need to do that exercise anyway, the sooner the better */
+	count = gic_acpi_count_gicr_regions();
+	if (count <= 0)
+		return false;
+
+	nr_redist_regions = count;
+	return true;
+}
+
+#define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K)
+
+static int __init
+gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
+{
+	struct acpi_madt_generic_distributor *dist;
+	struct fwnode_handle *domain_handle;
+	int i, err;
+
+	/* Get distributor base address */
+	dist = (struct acpi_madt_generic_distributor *)header;
+	dist_base = ioremap(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE);
+	if (!dist_base) {
+		pr_err("Unable to map GICD registers\n");
+		return -ENOMEM;
+	}
+
+	err = gic_validate_dist_version(dist_base);
+	if (err) {
+		pr_err("No distributor detected at @%p, giving up", dist_base);
+		goto out_dist_unmap;
+	}
+
+	redist_regs = kzalloc(sizeof(*redist_regs) * nr_redist_regions,
+			      GFP_KERNEL);
+	if (!redist_regs) {
+		err = -ENOMEM;
+		goto out_dist_unmap;
+	}
+
+	err = gic_acpi_collect_gicr_base();
+	if (err)
+		goto out_redist_unmap;
+
+	domain_handle = irq_domain_alloc_fwnode(dist_base);
+	if (!domain_handle) {
+		err = -ENOMEM;
+		goto out_redist_unmap;
+	}
+
+	err = gic_init_bases(dist_base, redist_regs, nr_redist_regions, 0,
+			     domain_handle);
+	if (err)
+		goto out_fwhandle_free;
+
+	acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
+	return 0;
+
+out_fwhandle_free:
+	irq_domain_free_fwnode(domain_handle);
+out_redist_unmap:
+	for (i = 0; i < nr_redist_regions; i++)
+		if (redist_regs[i].redist_base)
+			iounmap(redist_regs[i].redist_base);
+	kfree(redist_regs);
+out_dist_unmap:
+	iounmap(dist_base);
+	return err;
+}
+IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
+		     acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V3,
+		     gic_acpi_init);
+IRQCHIP_ACPI_DECLARE(gic_v4, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
+		     acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V4,
+		     gic_acpi_init);
+IRQCHIP_ACPI_DECLARE(gic_v3_or_v4, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
+		     acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_NONE,
+		     gic_acpi_init);
+#endif

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 8f9ebf7..282344b 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c

@@ -319,7 +319,7 @@
 	writel_relaxed(val | bit, reg);
 	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 
-	return IRQ_SET_MASK_OK;
+	return IRQ_SET_MASK_OK_DONE;
 }
 #endif
 

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 9e17ef2..94a30da 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c

@@ -29,16 +29,32 @@
 	DECLARE_BITMAP(pcpu_mask, GIC_MAX_INTRS);
 };
 
+struct gic_irq_spec {
+	enum {
+		GIC_DEVICE,
+		GIC_IPI
+	} type;
+
+	union {
+		struct cpumask *ipimask;
+		unsigned int hwirq;
+	};
+};
+
 static unsigned long __gic_base_addr;
+
 static void __iomem *gic_base;
 static struct gic_pcpu_mask pcpu_masks[NR_CPUS];
 static DEFINE_SPINLOCK(gic_lock);
 static struct irq_domain *gic_irq_domain;
+static struct irq_domain *gic_dev_domain;
+static struct irq_domain *gic_ipi_domain;
 static int gic_shared_intrs;
 static int gic_vpes;
 static unsigned int gic_cpu_pin;
 static unsigned int timer_cpu_pin;
 static struct irq_chip gic_level_irq_controller, gic_edge_irq_controller;
+DECLARE_BITMAP(ipi_resrv, GIC_MAX_INTRS);
 
 static void __gic_irq_dispatch(void);
 
@@ -264,9 +280,11 @@
 		  GIC_VPE_EIC_SS(irq), set);
 }
 
-void gic_send_ipi(unsigned int intr)
+static void gic_send_ipi(struct irq_data *d, unsigned int cpu)
 {
-	gic_write(GIC_REG(SHARED, GIC_SH_WEDGE), GIC_SH_WEDGE_SET(intr));
+	irq_hw_number_t hwirq = GIC_HWIRQ_TO_SHARED(irqd_to_hwirq(d));
+
+	gic_write(GIC_REG(SHARED, GIC_SH_WEDGE), GIC_SH_WEDGE_SET(hwirq));
 }
 
 int gic_get_c0_compare_int(void)
@@ -449,7 +467,7 @@
 	gic_map_to_vpe(irq, mips_cm_vp_id(cpumask_first(&tmp)));
 
 	/* Update the pcpu_masks */
-	for (i = 0; i < NR_CPUS; i++)
+	for (i = 0; i < gic_vpes; i++)
 		clear_bit(irq, pcpu_masks[i].pcpu_mask);
 	set_bit(irq, pcpu_masks[cpumask_first(&tmp)].pcpu_mask);
 
@@ -479,6 +497,7 @@
 #ifdef CONFIG_SMP
 	.irq_set_affinity	=	gic_set_affinity,
 #endif
+	.ipi_send_single	=	gic_send_ipi,
 };
 
 static void gic_handle_local_int(bool chained)
@@ -572,83 +591,6 @@
 	gic_handle_shared_int(true);
 }
 
-#ifdef CONFIG_MIPS_GIC_IPI
-static int gic_resched_int_base;
-static int gic_call_int_base;
-
-unsigned int plat_ipi_resched_int_xlate(unsigned int cpu)
-{
-	return gic_resched_int_base + cpu;
-}
-
-unsigned int plat_ipi_call_int_xlate(unsigned int cpu)
-{
-	return gic_call_int_base + cpu;
-}
-
-static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
-{
-	scheduler_ipi();
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
-{
-	generic_smp_call_function_interrupt();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq_resched = {
-	.handler	= ipi_resched_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI resched"
-};
-
-static struct irqaction irq_call = {
-	.handler	= ipi_call_interrupt,
-	.flags		= IRQF_PERCPU,
-	.name		= "IPI call"
-};
-
-static __init void gic_ipi_init_one(unsigned int intr, int cpu,
-				    struct irqaction *action)
-{
-	int virq = irq_create_mapping(gic_irq_domain,
-				      GIC_SHARED_TO_HWIRQ(intr));
-	int i;
-
-	gic_map_to_vpe(intr, mips_cm_vp_id(cpu));
-	for (i = 0; i < NR_CPUS; i++)
-		clear_bit(intr, pcpu_masks[i].pcpu_mask);
-	set_bit(intr, pcpu_masks[cpu].pcpu_mask);
-
-	irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING);
-
-	irq_set_handler(virq, handle_percpu_irq);
-	setup_irq(virq, action);
-}
-
-static __init void gic_ipi_init(void)
-{
-	int i;
-
-	/* Use last 2 * NR_CPUS interrupts as IPIs */
-	gic_resched_int_base = gic_shared_intrs - nr_cpu_ids;
-	gic_call_int_base = gic_resched_int_base - nr_cpu_ids;
-
-	for (i = 0; i < nr_cpu_ids; i++) {
-		gic_ipi_init_one(gic_call_int_base + i, i, &irq_call);
-		gic_ipi_init_one(gic_resched_int_base + i, i, &irq_resched);
-	}
-}
-#else
-static inline void gic_ipi_init(void)
-{
-}
-#endif
-
 static void __init gic_basic_init(void)
 {
 	unsigned int i;
@@ -753,19 +695,21 @@
 }
 
 static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
-				     irq_hw_number_t hw)
+				     irq_hw_number_t hw, unsigned int vpe)
 {
 	int intr = GIC_HWIRQ_TO_SHARED(hw);
 	unsigned long flags;
+	int i;
 
 	irq_set_chip_and_handler(virq, &gic_level_irq_controller,
 				 handle_level_irq);
 
 	spin_lock_irqsave(&gic_lock, flags);
 	gic_map_to_pin(intr, gic_cpu_pin);
-	/* Map to VPE 0 by default */
-	gic_map_to_vpe(intr, 0);
-	set_bit(intr, pcpu_masks[0].pcpu_mask);
+	gic_map_to_vpe(intr, vpe);
+	for (i = 0; i < gic_vpes; i++)
+		clear_bit(intr, pcpu_masks[i].pcpu_mask);
+	set_bit(intr, pcpu_masks[vpe].pcpu_mask);
 	spin_unlock_irqrestore(&gic_lock, flags);
 
 	return 0;
@@ -776,10 +720,93 @@
 {
 	if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS)
 		return gic_local_irq_domain_map(d, virq, hw);
-	return gic_shared_irq_domain_map(d, virq, hw);
+	return gic_shared_irq_domain_map(d, virq, hw, 0);
 }
 
-static int gic_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	struct gic_irq_spec *spec = arg;
+	irq_hw_number_t hwirq, base_hwirq;
+	int cpu, ret, i;
+
+	if (spec->type == GIC_DEVICE) {
+		/* verify that it doesn't conflict with an IPI irq */
+		if (test_bit(spec->hwirq, ipi_resrv))
+			return -EBUSY;
+	} else {
+		base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
+		if (base_hwirq == gic_shared_intrs) {
+			return -ENOMEM;
+		}
+
+		/* check that we have enough space */
+		for (i = base_hwirq; i < nr_irqs; i++) {
+			if (!test_bit(i, ipi_resrv))
+				return -EBUSY;
+		}
+		bitmap_clear(ipi_resrv, base_hwirq, nr_irqs);
+
+		/* map the hwirq for each cpu consecutively */
+		i = 0;
+		for_each_cpu(cpu, spec->ipimask) {
+			hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i);
+
+			ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq,
+							    &gic_edge_irq_controller,
+							    NULL);
+			if (ret)
+				goto error;
+
+			ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu);
+			if (ret)
+				goto error;
+
+			i++;
+		}
+
+		/*
+		 * tell the parent about the base hwirq we allocated so it can
+		 * set its own domain data
+		 */
+		spec->hwirq = base_hwirq;
+	}
+
+	return 0;
+error:
+	bitmap_set(ipi_resrv, base_hwirq, nr_irqs);
+	return ret;
+}
+
+void gic_irq_domain_free(struct irq_domain *d, unsigned int virq,
+			 unsigned int nr_irqs)
+{
+	irq_hw_number_t base_hwirq;
+	struct irq_data *data;
+
+	data = irq_get_irq_data(virq);
+	if (!data)
+		return;
+
+	base_hwirq = GIC_HWIRQ_TO_SHARED(irqd_to_hwirq(data));
+	bitmap_set(ipi_resrv, base_hwirq, nr_irqs);
+}
+
+int gic_irq_domain_match(struct irq_domain *d, struct device_node *node,
+			 enum irq_domain_bus_token bus_token)
+{
+	/* this domain should'nt be accessed directly */
+	return 0;
+}
+
+static const struct irq_domain_ops gic_irq_domain_ops = {
+	.map = gic_irq_domain_map,
+	.alloc = gic_irq_domain_alloc,
+	.free = gic_irq_domain_free,
+	.match = gic_irq_domain_match,
+};
+
+static int gic_dev_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
 				const u32 *intspec, unsigned int intsize,
 				irq_hw_number_t *out_hwirq,
 				unsigned int *out_type)
@@ -798,9 +825,130 @@
 	return 0;
 }
 
-static const struct irq_domain_ops gic_irq_domain_ops = {
-	.map = gic_irq_domain_map,
-	.xlate = gic_irq_domain_xlate,
+static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	struct irq_fwspec *fwspec = arg;
+	struct gic_irq_spec spec = {
+		.type = GIC_DEVICE,
+		.hwirq = fwspec->param[1],
+	};
+	int i, ret;
+	bool is_shared = fwspec->param[0] == GIC_SHARED;
+
+	if (is_shared) {
+		ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_hw_number_t hwirq;
+
+		if (is_shared)
+			hwirq = GIC_SHARED_TO_HWIRQ(spec.hwirq + i);
+		else
+			hwirq = GIC_LOCAL_TO_HWIRQ(spec.hwirq + i);
+
+		ret = irq_domain_set_hwirq_and_chip(d, virq + i,
+						    hwirq,
+						    &gic_level_irq_controller,
+						    NULL);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
+			 unsigned int nr_irqs)
+{
+	/* no real allocation is done for dev irqs, so no need to free anything */
+	return;
+}
+
+static struct irq_domain_ops gic_dev_domain_ops = {
+	.xlate = gic_dev_domain_xlate,
+	.alloc = gic_dev_domain_alloc,
+	.free = gic_dev_domain_free,
+};
+
+static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+				const u32 *intspec, unsigned int intsize,
+				irq_hw_number_t *out_hwirq,
+				unsigned int *out_type)
+{
+	/*
+	 * There's nothing to translate here. hwirq is dynamically allocated and
+	 * the irq type is always edge triggered.
+	 * */
+	*out_hwirq = 0;
+	*out_type = IRQ_TYPE_EDGE_RISING;
+
+	return 0;
+}
+
+static int gic_ipi_domain_alloc(struct irq_domain *d, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	struct cpumask *ipimask = arg;
+	struct gic_irq_spec spec = {
+		.type = GIC_IPI,
+		.ipimask = ipimask
+	};
+	int ret, i;
+
+	ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec);
+	if (ret)
+		return ret;
+
+	/* the parent should have set spec.hwirq to the base_hwirq it allocated */
+	for (i = 0; i < nr_irqs; i++) {
+		ret = irq_domain_set_hwirq_and_chip(d, virq + i,
+						    GIC_SHARED_TO_HWIRQ(spec.hwirq + i),
+						    &gic_edge_irq_controller,
+						    NULL);
+		if (ret)
+			goto error;
+
+		ret = irq_set_irq_type(virq + i, IRQ_TYPE_EDGE_RISING);
+		if (ret)
+			goto error;
+	}
+
+	return 0;
+error:
+	irq_domain_free_irqs_parent(d, virq, nr_irqs);
+	return ret;
+}
+
+void gic_ipi_domain_free(struct irq_domain *d, unsigned int virq,
+			 unsigned int nr_irqs)
+{
+	irq_domain_free_irqs_parent(d, virq, nr_irqs);
+}
+
+int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node,
+			 enum irq_domain_bus_token bus_token)
+{
+	bool is_ipi;
+
+	switch (bus_token) {
+	case DOMAIN_BUS_IPI:
+		is_ipi = d->bus_token == bus_token;
+		return to_of_node(d->fwnode) == node && is_ipi;
+		break;
+	default:
+		return 0;
+	}
+}
+
+static struct irq_domain_ops gic_ipi_domain_ops = {
+	.xlate = gic_ipi_domain_xlate,
+	.alloc = gic_ipi_domain_alloc,
+	.free = gic_ipi_domain_free,
+	.match = gic_ipi_domain_match,
 };
 
 static void __init __gic_init(unsigned long gic_base_addr,
@@ -809,6 +957,7 @@
 			      struct device_node *node)
 {
 	unsigned int gicconfig;
+	unsigned int v[2];
 
 	__gic_base_addr = gic_base_addr;
 
@@ -864,9 +1013,32 @@
 	if (!gic_irq_domain)
 		panic("Failed to add GIC IRQ domain");
 
-	gic_basic_init();
+	gic_dev_domain = irq_domain_add_hierarchy(gic_irq_domain, 0,
+						  GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
+						  node, &gic_dev_domain_ops, NULL);
+	if (!gic_dev_domain)
+		panic("Failed to add GIC DEV domain");
 
-	gic_ipi_init();
+	gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain,
+						  IRQ_DOMAIN_FLAG_IPI_PER_CPU,
+						  GIC_NUM_LOCAL_INTRS + gic_shared_intrs,
+						  node, &gic_ipi_domain_ops, NULL);
+	if (!gic_ipi_domain)
+		panic("Failed to add GIC IPI domain");
+
+	gic_ipi_domain->bus_token = DOMAIN_BUS_IPI;
+
+	if (node &&
+	    !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) {
+		bitmap_set(ipi_resrv, v[0], v[1]);
+	} else {
+		/* Make the last 2 * gic_vpes available for IPIs */
+		bitmap_set(ipi_resrv,
+			   gic_shared_intrs - 2 * gic_vpes,
+			   2 * gic_vpes);
+	}
+
+	gic_basic_init();
 }
 
 void __init gic_init(unsigned long gic_base_addr,

diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c
new file mode 100644
index 0000000..b4d3678
--- /dev/null
+++ b/drivers/irqchip/irq-mvebu-odmi.c

@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2016 Marvell
+ *
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#define pr_fmt(fmt) "GIC-ODMI: " fmt
+
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+#define GICP_ODMIN_SET			0x40
+#define   GICP_ODMI_INT_NUM_SHIFT	12
+#define GICP_ODMIN_GM_EP_R0		0x110
+#define GICP_ODMIN_GM_EP_R1		0x114
+#define GICP_ODMIN_GM_EA_R0		0x108
+#define GICP_ODMIN_GM_EA_R1		0x118
+
+/*
+ * We don't support the group events, so we simply have 8 interrupts
+ * per frame.
+ */
+#define NODMIS_SHIFT		3
+#define NODMIS_PER_FRAME	(1 << NODMIS_SHIFT)
+#define NODMIS_MASK		(NODMIS_PER_FRAME - 1)
+
+struct odmi_data {
+	struct resource res;
+	void __iomem *base;
+	unsigned int spi_base;
+};
+
+static struct odmi_data *odmis;
+static unsigned long *odmis_bm;
+static unsigned int odmis_count;
+
+/* Protects odmis_bm */
+static DEFINE_SPINLOCK(odmis_bm_lock);
+
+static void odmi_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct odmi_data *odmi;
+	phys_addr_t addr;
+	unsigned int odmin;
+
+	if (WARN_ON(d->hwirq >= odmis_count * NODMIS_PER_FRAME))
+		return;
+
+	odmi = &odmis[d->hwirq >> NODMIS_SHIFT];
+	odmin = d->hwirq & NODMIS_MASK;
+
+	addr = odmi->res.start + GICP_ODMIN_SET;
+
+	msg->address_hi = upper_32_bits(addr);
+	msg->address_lo = lower_32_bits(addr);
+	msg->data = odmin << GICP_ODMI_INT_NUM_SHIFT;
+}
+
+static struct irq_chip odmi_irq_chip = {
+	.name			= "ODMI",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_compose_msi_msg	= odmi_compose_msi_msg,
+};
+
+static int odmi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs, void *args)
+{
+	struct odmi_data *odmi = NULL;
+	struct irq_fwspec fwspec;
+	struct irq_data *d;
+	unsigned int hwirq, odmin;
+	int ret;
+
+	spin_lock(&odmis_bm_lock);
+	hwirq = find_first_zero_bit(odmis_bm, NODMIS_PER_FRAME * odmis_count);
+	if (hwirq >= NODMIS_PER_FRAME * odmis_count) {
+		spin_unlock(&odmis_bm_lock);
+		return -ENOSPC;
+	}
+
+	__set_bit(hwirq, odmis_bm);
+	spin_unlock(&odmis_bm_lock);
+
+	odmi = &odmis[hwirq >> NODMIS_SHIFT];
+	odmin = hwirq & NODMIS_MASK;
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 3;
+	fwspec.param[0] = GIC_SPI;
+	fwspec.param[1] = odmi->spi_base - 32 + odmin;
+	fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (ret) {
+		pr_err("Cannot allocate parent IRQ\n");
+		spin_lock(&odmis_bm_lock);
+		__clear_bit(odmin, odmis_bm);
+		spin_unlock(&odmis_bm_lock);
+		return ret;
+	}
+
+	/* Configure the interrupt line to be edge */
+	d = irq_domain_get_irq_data(domain->parent, virq);
+	d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+
+	irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+				      &odmi_irq_chip, NULL);
+
+	return 0;
+}
+
+static void odmi_irq_domain_free(struct irq_domain *domain,
+				 unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+	if (d->hwirq >= odmis_count * NODMIS_PER_FRAME) {
+		pr_err("Failed to teardown msi. Invalid hwirq %lu\n", d->hwirq);
+		return;
+	}
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+
+	/* Actually free the MSI */
+	spin_lock(&odmis_bm_lock);
+	__clear_bit(d->hwirq, odmis_bm);
+	spin_unlock(&odmis_bm_lock);
+}
+
+static const struct irq_domain_ops odmi_domain_ops = {
+	.alloc	= odmi_irq_domain_alloc,
+	.free	= odmi_irq_domain_free,
+};
+
+static struct irq_chip odmi_msi_irq_chip = {
+	.name	= "ODMI",
+};
+
+static struct msi_domain_ops odmi_msi_ops = {
+};
+
+static struct msi_domain_info odmi_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
+	.ops	= &odmi_msi_ops,
+	.chip	= &odmi_msi_irq_chip,
+};
+
+static int __init mvebu_odmi_init(struct device_node *node,
+				  struct device_node *parent)
+{
+	struct irq_domain *inner_domain, *plat_domain;
+	int ret, i;
+
+	if (of_property_read_u32(node, "marvell,odmi-frames", &odmis_count))
+		return -EINVAL;
+
+	odmis = kcalloc(odmis_count, sizeof(struct odmi_data), GFP_KERNEL);
+	if (!odmis)
+		return -ENOMEM;
+
+	odmis_bm = kcalloc(BITS_TO_LONGS(odmis_count * NODMIS_PER_FRAME),
+			   sizeof(long), GFP_KERNEL);
+	if (!odmis_bm) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	for (i = 0; i < odmis_count; i++) {
+		struct odmi_data *odmi = &odmis[i];
+
+		ret = of_address_to_resource(node, i, &odmi->res);
+		if (ret)
+			goto err_unmap;
+
+		odmi->base = of_io_request_and_map(node, i, "odmi");
+		if (IS_ERR(odmi->base)) {
+			ret = PTR_ERR(odmi->base);
+			goto err_unmap;
+		}
+
+		if (of_property_read_u32_index(node, "marvell,spi-base",
+					       i, &odmi->spi_base)) {
+			ret = -EINVAL;
+			goto err_unmap;
+		}
+	}
+
+	inner_domain = irq_domain_create_linear(of_node_to_fwnode(node),
+						odmis_count * NODMIS_PER_FRAME,
+						&odmi_domain_ops, NULL);
+	if (!inner_domain) {
+		ret = -ENOMEM;
+		goto err_unmap;
+	}
+
+	inner_domain->parent = irq_find_host(parent);
+
+	plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node),
+						     &odmi_msi_domain_info,
+						     inner_domain);
+	if (!plat_domain) {
+		ret = -ENOMEM;
+		goto err_remove_inner;
+	}
+
+	return 0;
+
+err_remove_inner:
+	irq_domain_remove(inner_domain);
+err_unmap:
+	for (i = 0; i < odmis_count; i++) {
+		struct odmi_data *odmi = &odmis[i];
+
+		if (odmi->base && !IS_ERR(odmi->base))
+			iounmap(odmis[i].base);
+	}
+	kfree(odmis_bm);
+err_alloc:
+	kfree(odmis);
+	return ret;
+}
+
+IRQCHIP_DECLARE(mvebu_odmi, "marvell,odmi-controller", mvebu_odmi_init);

diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index efe5084..1730470 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c

@@ -183,7 +183,7 @@
 	void __iomem *icoll_base;
 
 	icoll_base = of_io_request_and_map(np, 0, np->name);
-	if (!icoll_base)
+	if (IS_ERR(icoll_base))
 		panic("%s: unable to map resource", np->full_name);
 	return icoll_base;
 }

diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 0820f67..668730c 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c

@@ -160,9 +160,9 @@
 
 	gc = irq_get_domain_generic_chip(domain, 0);
 	gc->reg_base = of_io_request_and_map(node, 0, of_node_full_name(node));
-	if (!gc->reg_base) {
+	if (IS_ERR(gc->reg_base)) {
 		pr_err("unable to map resource\n");
-		ret = -ENOMEM;
+		ret = PTR_ERR(gc->reg_base);
 		goto fail_irqd_remove;
 	}
 

diff --git a/drivers/irqchip/irq-tango.c b/drivers/irqchip/irq-tango.c
new file mode 100644
index 0000000..bdbb5c0
--- /dev/null
+++ b/drivers/irqchip/irq-tango.c

@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2014 Mans Rullgard <mans@mansr.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+
+#define IRQ0_CTL_BASE		0x0000
+#define IRQ1_CTL_BASE		0x0100
+#define EDGE_CTL_BASE		0x0200
+#define IRQ2_CTL_BASE		0x0300
+
+#define IRQ_CTL_HI		0x18
+#define EDGE_CTL_HI		0x20
+
+#define IRQ_STATUS		0x00
+#define IRQ_RAWSTAT		0x04
+#define IRQ_EN_SET		0x08
+#define IRQ_EN_CLR		0x0c
+#define IRQ_SOFT_SET		0x10
+#define IRQ_SOFT_CLR		0x14
+
+#define EDGE_STATUS		0x00
+#define EDGE_RAWSTAT		0x04
+#define EDGE_CFG_RISE		0x08
+#define EDGE_CFG_FALL		0x0c
+#define EDGE_CFG_RISE_SET	0x10
+#define EDGE_CFG_RISE_CLR	0x14
+#define EDGE_CFG_FALL_SET	0x18
+#define EDGE_CFG_FALL_CLR	0x1c
+
+struct tangox_irq_chip {
+	void __iomem *base;
+	unsigned long ctl;
+};
+
+static inline u32 intc_readl(struct tangox_irq_chip *chip, int reg)
+{
+	return readl_relaxed(chip->base + reg);
+}
+
+static inline void intc_writel(struct tangox_irq_chip *chip, int reg, u32 val)
+{
+	writel_relaxed(val, chip->base + reg);
+}
+
+static void tangox_dispatch_irqs(struct irq_domain *dom, unsigned int status,
+				 int base)
+{
+	unsigned int hwirq;
+	unsigned int virq;
+
+	while (status) {
+		hwirq = __ffs(status);
+		virq = irq_find_mapping(dom, base + hwirq);
+		if (virq)
+			generic_handle_irq(virq);
+		status &= ~BIT(hwirq);
+	}
+}
+
+static void tangox_irq_handler(struct irq_desc *desc)
+{
+	struct irq_domain *dom = irq_desc_get_handler_data(desc);
+	struct irq_chip *host_chip = irq_desc_get_chip(desc);
+	struct tangox_irq_chip *chip = dom->host_data;
+	unsigned int status_lo, status_hi;
+
+	chained_irq_enter(host_chip, desc);
+
+	status_lo = intc_readl(chip, chip->ctl + IRQ_STATUS);
+	status_hi = intc_readl(chip, chip->ctl + IRQ_CTL_HI + IRQ_STATUS);
+
+	tangox_dispatch_irqs(dom, status_lo, 0);
+	tangox_dispatch_irqs(dom, status_hi, 32);
+
+	chained_irq_exit(host_chip, desc);
+}
+
+static int tangox_irq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	struct tangox_irq_chip *chip = gc->domain->host_data;
+	struct irq_chip_regs *regs = &gc->chip_types[0].regs;
+
+	switch (flow_type & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_RISING:
+		intc_writel(chip, regs->type + EDGE_CFG_RISE_SET, d->mask);
+		intc_writel(chip, regs->type + EDGE_CFG_FALL_CLR, d->mask);
+		break;
+
+	case IRQ_TYPE_EDGE_FALLING:
+		intc_writel(chip, regs->type + EDGE_CFG_RISE_CLR, d->mask);
+		intc_writel(chip, regs->type + EDGE_CFG_FALL_SET, d->mask);
+		break;
+
+	case IRQ_TYPE_LEVEL_HIGH:
+		intc_writel(chip, regs->type + EDGE_CFG_RISE_CLR, d->mask);
+		intc_writel(chip, regs->type + EDGE_CFG_FALL_CLR, d->mask);
+		break;
+
+	case IRQ_TYPE_LEVEL_LOW:
+		intc_writel(chip, regs->type + EDGE_CFG_RISE_SET, d->mask);
+		intc_writel(chip, regs->type + EDGE_CFG_FALL_SET, d->mask);
+		break;
+
+	default:
+		pr_err("Invalid trigger mode %x for IRQ %d\n",
+		       flow_type, d->irq);
+		return -EINVAL;
+	}
+
+	return irq_setup_alt_chip(d, flow_type);
+}
+
+static void __init tangox_irq_init_chip(struct irq_chip_generic *gc,
+					unsigned long ctl_offs,
+					unsigned long edge_offs)
+{
+	struct tangox_irq_chip *chip = gc->domain->host_data;
+	struct irq_chip_type *ct = gc->chip_types;
+	unsigned long ctl_base = chip->ctl + ctl_offs;
+	unsigned long edge_base = EDGE_CTL_BASE + edge_offs;
+	int i;
+
+	gc->reg_base = chip->base;
+	gc->unused = 0;
+
+	for (i = 0; i < 2; i++) {
+		ct[i].chip.irq_ack = irq_gc_ack_set_bit;
+		ct[i].chip.irq_mask = irq_gc_mask_disable_reg;
+		ct[i].chip.irq_mask_ack = irq_gc_mask_disable_reg_and_ack;
+		ct[i].chip.irq_unmask = irq_gc_unmask_enable_reg;
+		ct[i].chip.irq_set_type = tangox_irq_set_type;
+		ct[i].chip.name = gc->domain->name;
+
+		ct[i].regs.enable = ctl_base + IRQ_EN_SET;
+		ct[i].regs.disable = ctl_base + IRQ_EN_CLR;
+		ct[i].regs.ack = edge_base + EDGE_RAWSTAT;
+		ct[i].regs.type = edge_base;
+	}
+
+	ct[0].type = IRQ_TYPE_LEVEL_MASK;
+	ct[0].handler = handle_level_irq;
+
+	ct[1].type = IRQ_TYPE_EDGE_BOTH;
+	ct[1].handler = handle_edge_irq;
+
+	intc_writel(chip, ct->regs.disable, 0xffffffff);
+	intc_writel(chip, ct->regs.ack, 0xffffffff);
+}
+
+static void __init tangox_irq_domain_init(struct irq_domain *dom)
+{
+	struct irq_chip_generic *gc;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		gc = irq_get_domain_generic_chip(dom, i * 32);
+		tangox_irq_init_chip(gc, i * IRQ_CTL_HI, i * EDGE_CTL_HI);
+	}
+}
+
+static int __init tangox_irq_init(void __iomem *base, struct resource *baseres,
+				  struct device_node *node)
+{
+	struct tangox_irq_chip *chip;
+	struct irq_domain *dom;
+	struct resource res;
+	int irq;
+	int err;
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (!irq)
+		panic("%s: failed to get IRQ", node->name);
+
+	err = of_address_to_resource(node, 0, &res);
+	if (err)
+		panic("%s: failed to get address", node->name);
+
+	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+	chip->ctl = res.start - baseres->start;
+	chip->base = base;
+
+	dom = irq_domain_add_linear(node, 64, &irq_generic_chip_ops, chip);
+	if (!dom)
+		panic("%s: failed to create irqdomain", node->name);
+
+	err = irq_alloc_domain_generic_chips(dom, 32, 2, node->name,
+					     handle_level_irq, 0, 0, 0);
+	if (err)
+		panic("%s: failed to allocate irqchip", node->name);
+
+	tangox_irq_domain_init(dom);
+
+	irq_set_chained_handler(irq, tangox_irq_handler);
+	irq_set_handler_data(irq, dom);
+
+	return 0;
+}
+
+static int __init tangox_of_irq_init(struct device_node *node,
+				     struct device_node *parent)
+{
+	struct device_node *c;
+	struct resource res;
+	void __iomem *base;
+
+	base = of_iomap(node, 0);
+	if (!base)
+		panic("%s: of_iomap failed", node->name);
+
+	of_address_to_resource(node, 0, &res);
+
+	for_each_child_of_node(node, c)
+		tangox_irq_init(base, &res, c);
+
+	return 0;
+}
+IRQCHIP_DECLARE(tangox_intc, "sigma,smp8642-intc", tangox_of_irq_init);

diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
index 4192bdc..2325fb3 100644
--- a/drivers/irqchip/irq-ts4800.c
+++ b/drivers/irqchip/irq-ts4800.c

@@ -59,7 +59,7 @@
 	return 0;
 }
 
-struct irq_domain_ops ts4800_ic_ops = {
+static const struct irq_domain_ops ts4800_ic_ops = {
 	.map = ts4800_irqdomain_map,
 	.xlate = irq_domain_xlate_onecell,
 };

diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c
index 2a506fe..d1f8ab9 100644
--- a/drivers/isdn/gigaset/ser-gigaset.c
+++ b/drivers/isdn/gigaset/ser-gigaset.c

@@ -373,13 +373,7 @@
 
 static void gigaset_device_release(struct device *dev)
 {
-	struct cardstate *cs = dev_get_drvdata(dev);
-
-	if (!cs)
-		return;
-	dev_set_drvdata(dev, NULL);
-	kfree(cs->hw.ser);
-	cs->hw.ser = NULL;
+	kfree(container_of(dev, struct ser_cardstate, dev.dev));
 }
 
 /*
@@ -408,7 +402,6 @@
 		cs->hw.ser = NULL;
 		return rc;
 	}
-	dev_set_drvdata(&cs->hw.ser->dev.dev, cs);
 
 	tasklet_init(&cs->write_tasklet,
 		     gigaset_modem_fill, (unsigned long) cs);

diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
index 8e29447..afde4ed 100644
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ b/drivers/isdn/hardware/mISDN/netjet.c

@@ -392,7 +392,7 @@
 	}
 	stat = bchannel_get_rxbuf(&bc->bch, cnt);
 	/* only transparent use the count here, HDLC overun is detected later */
-	if (stat == ENOMEM) {
+	if (stat == -ENOMEM) {
 		pr_warning("%s.B%d: No memory for %d bytes\n",
 			   card->name, bc->bch.nr, cnt);
 		return;

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5df4048..dd83492 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c

@@ -1191,6 +1191,8 @@
 
 	if (clone)
 		free_rq_clone(clone);
+	else if (!tio->md->queue->mq_ops)
+		free_rq_tio(tio);
 }
 
 /*

diff --git a/drivers/media/i2c/adp1653.c b/drivers/media/i2c/adp1653.c
index 7e9cbf7..fb7ed73 100644
--- a/drivers/media/i2c/adp1653.c
+++ b/drivers/media/i2c/adp1653.c

@@ -497,7 +497,7 @@
 		if (!client->dev.platform_data) {
 			dev_err(&client->dev,
 				"Neither DT not platform data provided\n");
-			return EINVAL;
+			return -EINVAL;
 		}
 		flash->platform_data = client->dev.platform_data;
 	}

diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index f8dd750..e1719ff 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c

@@ -1960,10 +1960,9 @@
 	}
 
 	/* tx 5v detect */
-	tx_5v = io_read(sd, 0x70) & info->cable_det_mask;
+	tx_5v = irq_reg_0x70 & info->cable_det_mask;
 	if (tx_5v) {
 		v4l2_dbg(1, debug, sd, "%s: tx_5v: 0x%x\n", __func__, tx_5v);
-		io_write(sd, 0x71, tx_5v);
 		adv76xx_s_detect_tx_5v_ctrl(sd);
 		if (handled)
 			*handled = true;

diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index 7dae0ac..e9219f5 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c

@@ -20,6 +20,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+/* We need to access legacy defines from linux/media.h */
+#define __NEED_MEDIA_LEGACY_API
+
 #include <linux/compat.h>
 #include <linux/export.h>
 #include <linux/idr.h>
@@ -115,6 +118,26 @@
 	u_ent.group_id = 0;		/* Unused */
 	u_ent.pads = ent->num_pads;
 	u_ent.links = ent->num_links - ent->num_backlinks;
+
+	/*
+	 * Workaround for a bug at media-ctl <= v1.10 that makes it to
+	 * do the wrong thing if the entity function doesn't belong to
+	 * either MEDIA_ENT_F_OLD_BASE or MEDIA_ENT_F_OLD_SUBDEV_BASE
+	 * Ranges.
+	 *
+	 * Non-subdevices are expected to be at the MEDIA_ENT_F_OLD_BASE,
+	 * or, otherwise, will be silently ignored by media-ctl when
+	 * printing the graphviz diagram. So, map them into the devnode
+	 * old range.
+	 */
+	if (ent->function < MEDIA_ENT_F_OLD_BASE ||
+	    ent->function > MEDIA_ENT_T_DEVNODE_UNKNOWN) {
+		if (is_media_entity_v4l2_subdev(ent))
+			u_ent.type = MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN;
+		else if (ent->function != MEDIA_ENT_F_IO_V4L)
+			u_ent.type = MEDIA_ENT_T_DEVNODE_UNKNOWN;
+	}
+
 	memcpy(&u_ent.raw, &ent->info, sizeof(ent->info));
 	if (copy_to_user(uent, &u_ent, sizeof(u_ent)))
 		return -EFAULT;

diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c
index 7d28899..38aacc7 100644
--- a/drivers/media/platform/coda/coda-bit.c
+++ b/drivers/media/platform/coda/coda-bit.c

@@ -1455,9 +1455,9 @@
 		return 0;
 
 	ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
-	ctx->bitstream.vaddr = dma_alloc_writecombine(
-			&ctx->dev->plat_dev->dev, ctx->bitstream.size,
-			&ctx->bitstream.paddr, GFP_KERNEL);
+	ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
+					    ctx->bitstream.size,
+					    &ctx->bitstream.paddr, GFP_KERNEL);
 	if (!ctx->bitstream.vaddr) {
 		v4l2_err(&ctx->dev->v4l2_dev,
 			 "failed to allocate bitstream ringbuffer");
@@ -1474,8 +1474,8 @@
 	if (ctx->bitstream.vaddr == NULL)
 		return;
 
-	dma_free_writecombine(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
-			      ctx->bitstream.vaddr, ctx->bitstream.paddr);
+	dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
+		    ctx->bitstream.vaddr, ctx->bitstream.paddr);
 	ctx->bitstream.vaddr = NULL;
 	kfifo_init(&ctx->bitstream_fifo, NULL, 0);
 }

diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 8c54fd2..a136257 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c

@@ -1843,8 +1843,7 @@
 			ent->function = MEDIA_ENT_F_CONN_RF;
 			break;
 		default: /* AU0828_VMUX_DEBUG */
-			ent->function = MEDIA_ENT_F_CONN_TEST;
-			break;
+			continue;
 		}
 
 		ret = media_entity_pads_init(ent, 1, &dev->input_pad[i]);

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 4c1903f..0c6c17a1 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c

@@ -415,7 +415,7 @@
 		delta = mftb() - psl_tb;
 		if (delta < 0)
 			delta = -delta;
-	} while (cputime_to_usecs(delta) > 16);
+	} while (tb_to_ns(delta) > 16000);
 
 	return 0;
 }

diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index 11fdadc..2a6eaf1 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c

@@ -103,6 +103,7 @@
 	CT_EXEC_USERSPACE,
 	CT_ACCESS_USERSPACE,
 	CT_WRITE_RO,
+	CT_WRITE_RO_AFTER_INIT,
 	CT_WRITE_KERN,
 };
 
@@ -140,6 +141,7 @@
 	"EXEC_USERSPACE",
 	"ACCESS_USERSPACE",
 	"WRITE_RO",
+	"WRITE_RO_AFTER_INIT",
 	"WRITE_KERN",
 };
 
@@ -162,6 +164,7 @@
 static u8 data_area[EXEC_SIZE];
 
 static const unsigned long rodata = 0xAA55AA55;
+static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
 
 module_param(recur_count, int, 0644);
 MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
@@ -503,11 +506,28 @@
 		break;
 	}
 	case CT_WRITE_RO: {
-		unsigned long *ptr;
+		/* Explicitly cast away "const" for the test. */
+		unsigned long *ptr = (unsigned long *)&rodata;
 
-		ptr = (unsigned long *)&rodata;
+		pr_info("attempting bad rodata write at %p\n", ptr);
+		*ptr ^= 0xabcd1234;
 
-		pr_info("attempting bad write at %p\n", ptr);
+		break;
+	}
+	case CT_WRITE_RO_AFTER_INIT: {
+		unsigned long *ptr = &ro_after_init;
+
+		/*
+		 * Verify we were written to during init. Since an Oops
+		 * is considered a "success", a failure is to just skip the
+		 * real test.
+		 */
+		if ((*ptr & 0xAA) != 0xAA) {
+			pr_info("%p was NOT written during init!?\n", ptr);
+			break;
+		}
+
+		pr_info("attempting bad ro_after_init write at %p\n", ptr);
 		*ptr ^= 0xabcd1234;
 
 		break;
@@ -817,6 +837,9 @@
 	int n_debugfs_entries = 1; /* Assume only the direct entry */
 	int i;
 
+	/* Make sure we can write to __ro_after_init values during __init */
+	ro_after_init |= 0xAA;
+
 	/* Register debugfs interface */
 	lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
 	if (!lkdtm_debugfs_root) {

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index b6639ea..f6e4d97 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c

@@ -2232,6 +2232,7 @@
 		dma_release_channel(host->tx_chan);
 	if (host->rx_chan)
 		dma_release_channel(host->rx_chan);
+	pm_runtime_dont_use_autosuspend(host->dev);
 	pm_runtime_put_sync(host->dev);
 	pm_runtime_disable(host->dev);
 	if (host->dbclk)
@@ -2253,6 +2254,7 @@
 	dma_release_channel(host->tx_chan);
 	dma_release_channel(host->rx_chan);
 
+	pm_runtime_dont_use_autosuspend(host->dev);
 	pm_runtime_put_sync(host->dev);
 	pm_runtime_disable(host->dev);
 	device_init_wakeup(&pdev->dev, false);

diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c
index 7931615..88b6c81 100644
--- a/drivers/mtd/tests/mtd_nandecctest.c
+++ b/drivers/mtd/tests/mtd_nandecctest.c

@@ -187,7 +187,7 @@
 	__nand_calculate_ecc(error_data, size, calc_ecc);
 	ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size);
 
-	return (ret == -1) ? 0 : -EINVAL;
+	return (ret == -EBADMSG) ? 0 : -EINVAL;
 }
 
 static const struct nand_ecc_test nand_ecc_test[] = {

diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 2a1b6e0..0134ba3 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c

@@ -193,7 +193,7 @@
 	vol->changing_leb = 1;
 	vol->ch_lnum = req->lnum;
 
-	vol->upd_buf = vmalloc(req->bytes);
+	vol->upd_buf = vmalloc(ALIGN((int)req->bytes, ubi->min_io_size));
 	if (!vol->upd_buf)
 		return -ENOMEM;
 

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 56b5605..b7f1a99 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c

@@ -214,6 +214,8 @@
 static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 						struct rtnl_link_stats64 *stats);
 static void bond_slave_arr_handler(struct work_struct *work);
+static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+				  int mod);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -2127,6 +2129,7 @@
 			continue;
 
 		case BOND_LINK_UP:
+			bond_update_speed_duplex(slave);
 			bond_set_slave_link_state(slave, BOND_LINK_UP,
 						  BOND_SLAVE_NOTIFY_NOW);
 			slave->last_link_up = jiffies;
@@ -2459,7 +2462,7 @@
 		 struct slave *slave)
 {
 	struct arphdr *arp = (struct arphdr *)skb->data;
-	struct slave *curr_active_slave;
+	struct slave *curr_active_slave, *curr_arp_slave;
 	unsigned char *arp_ptr;
 	__be32 sip, tip;
 	int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
@@ -2506,26 +2509,41 @@
 		     &sip, &tip);
 
 	curr_active_slave = rcu_dereference(bond->curr_active_slave);
+	curr_arp_slave = rcu_dereference(bond->current_arp_slave);
 
-	/* Backup slaves won't see the ARP reply, but do come through
-	 * here for each ARP probe (so we swap the sip/tip to validate
-	 * the probe).  In a "redundant switch, common router" type of
-	 * configuration, the ARP probe will (hopefully) travel from
-	 * the active, through one switch, the router, then the other
-	 * switch before reaching the backup.
+	/* We 'trust' the received ARP enough to validate it if:
 	 *
-	 * We 'trust' the arp requests if there is an active slave and
-	 * it received valid arp reply(s) after it became active. This
-	 * is done to avoid endless looping when we can't reach the
+	 * (a) the slave receiving the ARP is active (which includes the
+	 * current ARP slave, if any), or
+	 *
+	 * (b) the receiving slave isn't active, but there is a currently
+	 * active slave and it received valid arp reply(s) after it became
+	 * the currently active slave, or
+	 *
+	 * (c) there is an ARP slave that sent an ARP during the prior ARP
+	 * interval, and we receive an ARP reply on any slave.  We accept
+	 * these because switch FDB update delays may deliver the ARP
+	 * reply to a slave other than the sender of the ARP request.
+	 *
+	 * Note: for (b), backup slaves are receiving the broadcast ARP
+	 * request, not a reply.  This request passes from the sending
+	 * slave through the L2 switch(es) to the receiving slave.  Since
+	 * this is checking the request, sip/tip are swapped for
+	 * validation.
+	 *
+	 * This is done to avoid endless looping when we can't reach the
 	 * arp_ip_target and fool ourselves with our own arp requests.
 	 */
-
 	if (bond_is_active_slave(slave))
 		bond_validate_arp(bond, slave, sip, tip);
 	else if (curr_active_slave &&
 		 time_after(slave_last_rx(bond, curr_active_slave),
 			    curr_active_slave->last_link_up))
 		bond_validate_arp(bond, slave, tip, sip);
+	else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
+		 bond_time_in_interval(bond,
+				       dev_trans_start(curr_arp_slave->dev), 1))
+		bond_validate_arp(bond, slave, sip, tip);
 
 out_unlock:
 	if (arp != (struct arphdr *)skb->data)

diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 575790e..74a7dfe 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c

@@ -843,7 +843,7 @@
 		if (clear_intf)
 			mcp251x_write_bits(spi, CANINTF, clear_intf, 0x00);
 
-		if (eflag)
+		if (eflag & (EFLG_RX0OVR | EFLG_RX1OVR))
 			mcp251x_write_bits(spi, EFLG, eflag, 0x00);
 
 		/* Update can state */

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index fc5b756..eb7192f 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c

@@ -117,6 +117,9 @@
  */
 #define EMS_USB_ARM7_CLOCK 8000000
 
+#define CPC_TX_QUEUE_TRIGGER_LOW	25
+#define CPC_TX_QUEUE_TRIGGER_HIGH	35
+
 /*
  * CAN-Message representation in a CPC_MSG. Message object type is
  * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or
@@ -278,6 +281,11 @@
 	switch (urb->status) {
 	case 0:
 		dev->free_slots = dev->intr_in_buffer[1];
+		if(dev->free_slots > CPC_TX_QUEUE_TRIGGER_HIGH){
+			if (netif_queue_stopped(netdev)){
+				netif_wake_queue(netdev);
+			}
+		}
 		break;
 
 	case -ECONNRESET: /* unlink */
@@ -526,8 +534,6 @@
 	/* Release context */
 	context->echo_index = MAX_TX_URBS;
 
-	if (netif_queue_stopped(netdev))
-		netif_wake_queue(netdev);
 }
 
 /*
@@ -587,7 +593,7 @@
 	int err, i;
 
 	dev->intr_in_buffer[0] = 0;
-	dev->free_slots = 15; /* initial size */
+	dev->free_slots = 50; /* initial size */
 
 	for (i = 0; i < MAX_RX_URBS; i++) {
 		struct urb *urb = NULL;
@@ -835,7 +841,7 @@
 
 		/* Slow down tx path */
 		if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS ||
-		    dev->free_slots < 5) {
+		    dev->free_slots < CPC_TX_QUEUE_TRIGGER_LOW) {
 			netif_stop_queue(netdev);
 		}
 	}

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 5eee62b..cbc99d5 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c

@@ -826,9 +826,8 @@
 static void gs_destroy_candev(struct gs_can *dev)
 {
 	unregister_candev(dev->netdev);
-	free_candev(dev->netdev);
 	usb_kill_anchored_urbs(&dev->tx_submitted);
-	kfree(dev);
+	free_candev(dev->netdev);
 }
 
 static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
@@ -913,12 +912,15 @@
 	for (i = 0; i < icount; i++) {
 		dev->canch[i] = gs_make_candev(i, intf);
 		if (IS_ERR_OR_NULL(dev->canch[i])) {
+			/* save error code to return later */
+			rc = PTR_ERR(dev->canch[i]);
+
 			/* on failure destroy previously created candevs */
 			icount = i;
-			for (i = 0; i < icount; i++) {
+			for (i = 0; i < icount; i++)
 				gs_destroy_candev(dev->canch[i]);
-				dev->canch[i] = NULL;
-			}
+
+			usb_kill_anchored_urbs(&dev->rx_submitted);
 			kfree(dev);
 			return rc;
 		}
@@ -939,16 +941,12 @@
 		return;
 	}
 
-	for (i = 0; i < GS_MAX_INTF; i++) {
-		struct gs_can *can = dev->canch[i];
-
-		if (!can)
-			continue;
-
-		gs_destroy_candev(can);
-	}
+	for (i = 0; i < GS_MAX_INTF; i++)
+		if (dev->canch[i])
+			gs_destroy_candev(dev->canch[i]);
 
 	usb_kill_anchored_urbs(&dev->rx_submitted);
+	kfree(dev);
 }
 
 static const struct usb_device_id gs_usb_table[] = {

diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index cc6c545..a47f52f 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c

@@ -25,6 +25,7 @@
 static const struct mv88e6xxx_switch_id mv88e6352_table[] = {
 	{ PORT_SWITCH_ID_6172, "Marvell 88E6172" },
 	{ PORT_SWITCH_ID_6176, "Marvell 88E6176" },
+	{ PORT_SWITCH_ID_6240, "Marvell 88E6240" },
 	{ PORT_SWITCH_ID_6320, "Marvell 88E6320" },
 	{ PORT_SWITCH_ID_6320_A1, "Marvell 88E6320 (A1)" },
 	{ PORT_SWITCH_ID_6320_A2, "Marvell 88e6320 (A2)" },

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index cf34681..512c8c0 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c

@@ -1555,7 +1555,7 @@
 
 	if (vlan.vid != vid || !vlan.valid ||
 	    vlan.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
-		return -ENOENT;
+		return -EOPNOTSUPP;
 
 	vlan.data[port] = GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER;
 
@@ -1582,6 +1582,7 @@
 			    const struct switchdev_obj_port_vlan *vlan)
 {
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+	const u16 defpvid = 4000 + ds->index * DSA_MAX_PORTS + port;
 	u16 pvid, vid;
 	int err = 0;
 
@@ -1597,7 +1598,8 @@
 			goto unlock;
 
 		if (vid == pvid) {
-			err = _mv88e6xxx_port_pvid_set(ds, port, 0);
+			/* restore reserved VLAN ID */
+			err = _mv88e6xxx_port_pvid_set(ds, port, defpvid);
 			if (err)
 				goto unlock;
 		}
@@ -1889,26 +1891,20 @@
 
 int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, u32 members)
 {
-	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-	const u16 pvid = 4000 + ds->index * DSA_MAX_PORTS + port;
-	int err;
-
-	/* The port joined a bridge, so leave its reserved VLAN */
-	mutex_lock(&ps->smi_mutex);
-	err = _mv88e6xxx_port_vlan_del(ds, port, pvid);
-	if (!err)
-		err = _mv88e6xxx_port_pvid_set(ds, port, 0);
-	mutex_unlock(&ps->smi_mutex);
-	return err;
+	return 0;
 }
 
 int mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port, u32 members)
 {
+	return 0;
+}
+
+static int mv88e6xxx_setup_port_default_vlan(struct dsa_switch *ds, int port)
+{
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	const u16 pvid = 4000 + ds->index * DSA_MAX_PORTS + port;
 	int err;
 
-	/* The port left the bridge, so join its reserved VLAN */
 	mutex_lock(&ps->smi_mutex);
 	err = _mv88e6xxx_port_vlan_add(ds, port, pvid, true);
 	if (!err)
@@ -2192,8 +2188,7 @@
 		if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
 			continue;
 
-		/* setup the unbridged state */
-		ret = mv88e6xxx_port_bridge_leave(ds, i, 0);
+		ret = mv88e6xxx_setup_port_default_vlan(ds, i);
 		if (ret < 0)
 			return ret;
 	}

diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 79e1a02..17b2126 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c

@@ -2461,7 +2461,7 @@
 					int i;
 					pci_unmap_single(VORTEX_PCI(vp),
 							le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
-							le32_to_cpu(vp->tx_ring[entry].frag[0].length),
+							le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF,
 							PCI_DMA_TODEVICE);
 
 					for (i=1; i<=skb_shinfo(skb)->nr_frags; i++)

diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index 2777289..2f79d29 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c

@@ -1501,6 +1501,7 @@
 	PCMCIA_DEVICE_MANF_CARD(0x026f, 0x030a),
 	PCMCIA_DEVICE_MANF_CARD(0x0274, 0x1103),
 	PCMCIA_DEVICE_MANF_CARD(0x0274, 0x1121),
+	PCMCIA_DEVICE_MANF_CARD(0xc001, 0x0009),
 	PCMCIA_DEVICE_PROD_ID12("2408LAN", "Ethernet", 0x352fff7f, 0x00b2e941),
 	PCMCIA_DEVICE_PROD_ID1234("Socket", "CF 10/100 Ethernet Card", "Revision B", "05/11/06", 0xb38bcc2e, 0x4de88352, 0xeaca6c8d, 0x7e57c22e),
 	PCMCIA_DEVICE_PROD_ID123("Cardwell", "PCMCIA", "ETHERNET", 0x9533672e, 0x281f1c5d, 0x3ff7175b),

diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 3f3bcbe..0907ab6 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c

@@ -2380,7 +2380,7 @@
 						    sizeof(u32),
 						    &tx_ring->tx_status_pa,
 						    GFP_KERNEL);
-	if (!tx_ring->tx_status_pa) {
+	if (!tx_ring->tx_status) {
 		dev_err(&adapter->pdev->dev,
 			"Cannot alloc memory for Tx status block\n");
 		return -ENOMEM;

diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 1747285..f749e4d 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c

@@ -193,7 +193,6 @@
 			    priv->mdio->id);
 
 	mdiobus_unregister(priv->mdio);
-	kfree(priv->mdio->irq);
 	mdiobus_free(priv->mdio);
 	priv->mdio = NULL;
 }

diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c
index 87e727b..fcdf5dd 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c

@@ -50,8 +50,8 @@
 static void write_rreg(u_long base, u_int reg, u_int val)
 {
 	asm volatile(
-	"str%?h	%1, [%2]	@ NET_RAP\n\t"
-	"str%?h	%0, [%2, #-4]	@ NET_RDP"
+	"strh	%1, [%2]	@ NET_RAP\n\t"
+	"strh	%0, [%2, #-4]	@ NET_RDP"
 	:
 	: "r" (val), "r" (reg), "r" (ISAIO_BASE + 0x0464));
 }
@@ -60,8 +60,8 @@
 {
 	unsigned short v;
 	asm volatile(
-	"str%?h	%1, [%2]	@ NET_RAP\n\t"
-	"ldr%?h	%0, [%2, #-4]	@ NET_RDP"
+	"strh	%1, [%2]	@ NET_RAP\n\t"
+	"ldrh	%0, [%2, #-4]	@ NET_RDP"
 	: "=r" (v)
 	: "r" (reg), "r" (ISAIO_BASE + 0x0464));
 	return v;
@@ -70,8 +70,8 @@
 static inline void write_ireg(u_long base, u_int reg, u_int val)
 {
 	asm volatile(
-	"str%?h	%1, [%2]	@ NET_RAP\n\t"
-	"str%?h	%0, [%2, #8]	@ NET_IDP"
+	"strh	%1, [%2]	@ NET_RAP\n\t"
+	"strh	%0, [%2, #8]	@ NET_IDP"
 	:
 	: "r" (val), "r" (reg), "r" (ISAIO_BASE + 0x0464));
 }
@@ -80,8 +80,8 @@
 {
 	u_short v;
 	asm volatile(
-	"str%?h	%1, [%2]	@ NAT_RAP\n\t"
-	"ldr%?h	%0, [%2, #8]	@ NET_IDP\n\t"
+	"strh	%1, [%2]	@ NAT_RAP\n\t"
+	"ldrh	%0, [%2, #8]	@ NET_IDP\n\t"
 	: "=r" (v)
 	: "r" (reg), "r" (ISAIO_BASE + 0x0464));
 	return v;
@@ -96,7 +96,7 @@
 	offset = ISAMEM_BASE + (offset << 1);
 	length = (length + 1) & ~1;
 	if ((int)buf & 2) {
-		asm volatile("str%?h	%2, [%0], #4"
+		asm volatile("strh	%2, [%0], #4"
 		 : "=&r" (offset) : "0" (offset), "r" (buf[0] | (buf[1] << 8)));
 		buf += 2;
 		length -= 2;
@@ -104,20 +104,20 @@
 	while (length > 8) {
 		register unsigned int tmp asm("r2"), tmp2 asm("r3");
 		asm volatile(
-			"ldm%?ia	%0!, {%1, %2}"
+			"ldmia	%0!, {%1, %2}"
 			: "+r" (buf), "=&r" (tmp), "=&r" (tmp2));
 		length -= 8;
 		asm volatile(
-			"str%?h	%1, [%0], #4\n\t"
-			"mov%?	%1, %1, lsr #16\n\t"
-			"str%?h	%1, [%0], #4\n\t"
-			"str%?h	%2, [%0], #4\n\t"
-			"mov%?	%2, %2, lsr #16\n\t"
-			"str%?h	%2, [%0], #4"
+			"strh	%1, [%0], #4\n\t"
+			"mov	%1, %1, lsr #16\n\t"
+			"strh	%1, [%0], #4\n\t"
+			"strh	%2, [%0], #4\n\t"
+			"mov	%2, %2, lsr #16\n\t"
+			"strh	%2, [%0], #4"
 		: "+r" (offset), "=&r" (tmp), "=&r" (tmp2));
 	}
 	while (length > 0) {
-		asm volatile("str%?h	%2, [%0], #4"
+		asm volatile("strh	%2, [%0], #4"
 		 : "=&r" (offset) : "0" (offset), "r" (buf[0] | (buf[1] << 8)));
 		buf += 2;
 		length -= 2;
@@ -132,23 +132,23 @@
 	if ((int)buf & 2) {
 		unsigned int tmp;
 		asm volatile(
-			"ldr%?h	%2, [%0], #4\n\t"
-			"str%?b	%2, [%1], #1\n\t"
-			"mov%?	%2, %2, lsr #8\n\t"
-			"str%?b	%2, [%1], #1"
+			"ldrh	%2, [%0], #4\n\t"
+			"strb	%2, [%1], #1\n\t"
+			"mov	%2, %2, lsr #8\n\t"
+			"strb	%2, [%1], #1"
 		: "=&r" (offset), "=&r" (buf), "=r" (tmp): "0" (offset), "1" (buf));
 		length -= 2;
 	}
 	while (length > 8) {
 		register unsigned int tmp asm("r2"), tmp2 asm("r3"), tmp3;
 		asm volatile(
-			"ldr%?h	%2, [%0], #4\n\t"
-			"ldr%?h	%4, [%0], #4\n\t"
-			"ldr%?h	%3, [%0], #4\n\t"
-			"orr%?	%2, %2, %4, lsl #16\n\t"
-			"ldr%?h	%4, [%0], #4\n\t"
-			"orr%?	%3, %3, %4, lsl #16\n\t"
-			"stm%?ia	%1!, {%2, %3}"
+			"ldrh	%2, [%0], #4\n\t"
+			"ldrh	%4, [%0], #4\n\t"
+			"ldrh	%3, [%0], #4\n\t"
+			"orr	%2, %2, %4, lsl #16\n\t"
+			"ldrh	%4, [%0], #4\n\t"
+			"orr	%3, %3, %4, lsl #16\n\t"
+			"stmia	%1!, {%2, %3}"
 		: "=&r" (offset), "=&r" (buf), "=r" (tmp), "=r" (tmp2), "=r" (tmp3)
 		: "0" (offset), "1" (buf));
 		length -= 8;
@@ -156,10 +156,10 @@
 	while (length > 0) {
 		unsigned int tmp;
 		asm volatile(
-			"ldr%?h	%2, [%0], #4\n\t"
-			"str%?b	%2, [%1], #1\n\t"
-			"mov%?	%2, %2, lsr #8\n\t"
-			"str%?b	%2, [%1], #1"
+			"ldrh	%2, [%0], #4\n\t"
+			"strb	%2, [%1], #1\n\t"
+			"mov	%2, %2, lsr #8\n\t"
+			"strb	%2, [%1], #1"
 		: "=&r" (offset), "=&r" (buf), "=r" (tmp) : "0" (offset), "1" (buf));
 		length -= 2;
 	}

diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index 256f590..3a7ebfd 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c

@@ -547,8 +547,8 @@
 	/* Make certain the data structures used by the LANCE are aligned and DMAble. */
 
 	lp = kzalloc(sizeof(*lp), GFP_DMA | GFP_KERNEL);
-	if(lp==NULL)
-		return -ENODEV;
+	if (!lp)
+		return -ENOMEM;
 	if (lance_debug > 6) printk(" (#0x%05lx)", (unsigned long)lp);
 	dev->ml_priv = lp;
 	lp->name = chipname;

diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index abe1eab..6446af1 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c

@@ -163,7 +163,7 @@
 		struct sk_buff *skb = tx_buff->skb;
 		unsigned int info = le32_to_cpu(txbd->info);
 
-		if ((info & FOR_EMAC) || !txbd->data)
+		if ((info & FOR_EMAC) || !txbd->data || !skb)
 			break;
 
 		if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
@@ -191,6 +191,7 @@
 
 		txbd->data = 0;
 		txbd->info = 0;
+		tx_buff->skb = NULL;
 
 		*txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM;
 	}
@@ -446,6 +447,9 @@
 		*last_rx_bd = (*last_rx_bd + 1) % RX_BD_NUM;
 	}
 
+	priv->txbd_curr = 0;
+	priv->txbd_dirty = 0;
+
 	/* Clean Tx BD's */
 	memset(priv->txbd, 0, TX_RING_SZ);
 
@@ -514,6 +518,64 @@
 }
 
 /**
+ * arc_free_tx_queue - free skb from tx queue
+ * @ndev:	Pointer to the network device.
+ *
+ * This function must be called while EMAC disable
+ */
+static void arc_free_tx_queue(struct net_device *ndev)
+{
+	struct arc_emac_priv *priv = netdev_priv(ndev);
+	unsigned int i;
+
+	for (i = 0; i < TX_BD_NUM; i++) {
+		struct arc_emac_bd *txbd = &priv->txbd[i];
+		struct buffer_state *tx_buff = &priv->tx_buff[i];
+
+		if (tx_buff->skb) {
+			dma_unmap_single(&ndev->dev, dma_unmap_addr(tx_buff, addr),
+					 dma_unmap_len(tx_buff, len), DMA_TO_DEVICE);
+
+			/* return the sk_buff to system */
+			dev_kfree_skb_irq(tx_buff->skb);
+		}
+
+		txbd->info = 0;
+		txbd->data = 0;
+		tx_buff->skb = NULL;
+	}
+}
+
+/**
+ * arc_free_rx_queue - free skb from rx queue
+ * @ndev:	Pointer to the network device.
+ *
+ * This function must be called while EMAC disable
+ */
+static void arc_free_rx_queue(struct net_device *ndev)
+{
+	struct arc_emac_priv *priv = netdev_priv(ndev);
+	unsigned int i;
+
+	for (i = 0; i < RX_BD_NUM; i++) {
+		struct arc_emac_bd *rxbd = &priv->rxbd[i];
+		struct buffer_state *rx_buff = &priv->rx_buff[i];
+
+		if (rx_buff->skb) {
+			dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
+					dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
+
+			/* return the sk_buff to system */
+			dev_kfree_skb_irq(rx_buff->skb);
+		}
+
+		rxbd->info = 0;
+		rxbd->data = 0;
+		rx_buff->skb = NULL;
+	}
+}
+
+/**
  * arc_emac_stop - Close the network device.
  * @ndev:	Pointer to the network device.
  *
@@ -534,6 +596,10 @@
 	/* Disable EMAC */
 	arc_reg_clr(priv, R_CTRL, EN_MASK);
 
+	/* Return the sk_buff to system */
+	arc_free_tx_queue(ndev);
+	arc_free_rx_queue(ndev);
+
 	return 0;
 }
 
@@ -610,7 +676,6 @@
 	dma_unmap_addr_set(&priv->tx_buff[*txbd_curr], addr, addr);
 	dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
 
-	priv->tx_buff[*txbd_curr].skb = skb;
 	priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
 
 	/* Make sure pointer to data buffer is set */
@@ -620,6 +685,11 @@
 
 	*info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
 
+	/* Make sure info word is set */
+	wmb();
+
+	priv->tx_buff[*txbd_curr].skb = skb;
+
 	/* Increment index to point to the next BD */
 	*txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
 

diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index f71ab26..08a23e6 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c

@@ -1460,7 +1460,19 @@
 		goto err_disable_clk;
 	}
 
-	priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+	if (of_phy_is_fixed_link(pdev->dev.of_node)) {
+		ret = of_phy_register_fixed_link(pdev->dev.of_node);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "bad fixed-link spec\n");
+			goto err_free_bus;
+		}
+		priv->phy_node = of_node_get(pdev->dev.of_node);
+	}
+
+	if (!priv->phy_node)
+		priv->phy_node = of_parse_phandle(pdev->dev.of_node,
+						  "phy-handle", 0);
+
 	if (!priv->phy_node) {
 		dev_err(&pdev->dev, "no PHY specified\n");
 		ret = -ENODEV;

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 27aa080..91874d2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h

@@ -4896,9 +4896,9 @@
  * cfc delete event data
  */
 struct cfc_del_event_data {
-	u32 cid;
-	u32 reserved0;
-	u32 reserved1;
+	__le32 cid;
+	__le32 reserved0;
+	__le32 reserved1;
 };
 
 
@@ -5114,15 +5114,9 @@
  * zone that triggers the in-bound interrupt
  */
 struct trigger_vf_zone {
-#if defined(__BIG_ENDIAN)
-	u16 reserved1;
-	u8 reserved0;
-	struct vf_pf_channel_zone_trigger vf_pf_channel;
-#elif defined(__LITTLE_ENDIAN)
 	struct vf_pf_channel_zone_trigger vf_pf_channel;
 	u8 reserved0;
 	u16 reserved1;
-#endif
 	u32 reserved2;
 };
 
@@ -5207,9 +5201,9 @@
  * set mac event data
  */
 struct eth_event_data {
-	u32 echo;
-	u32 reserved0;
-	u32 reserved1;
+	__le32 echo;
+	__le32 reserved0;
+	__le32 reserved1;
 };
 
 
@@ -5219,9 +5213,9 @@
 struct vf_pf_event_data {
 	u8 vf_id;
 	u8 reserved0;
-	u16 reserved1;
-	u32 msg_addr_lo;
-	u32 msg_addr_hi;
+	__le16 reserved1;
+	__le32 msg_addr_lo;
+	__le32 msg_addr_hi;
 };
 
 /*
@@ -5230,9 +5224,9 @@
 struct vf_flr_event_data {
 	u8 vf_id;
 	u8 reserved0;
-	u16 reserved1;
-	u32 reserved2;
-	u32 reserved3;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
 };
 
 /*
@@ -5241,9 +5235,9 @@
 struct malicious_vf_event_data {
 	u8 vf_id;
 	u8 err_id;
-	u16 reserved1;
-	u32 reserved2;
-	u32 reserved3;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
 };
 
 /*

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index d946bba..1fb8010 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c

@@ -6185,26 +6185,80 @@
 		shift -= 4;
 		digit = ((num & mask) >> shift);
 		if (digit == 0 && remove_leading_zeros) {
-			mask = mask >> 4;
-			continue;
-		} else if (digit < 0xa)
-			*str_ptr = digit + '0';
-		else
-			*str_ptr = digit - 0xa + 'a';
-		remove_leading_zeros = 0;
-		str_ptr++;
-		(*len)--;
+			*str_ptr = '0';
+		} else {
+			if (digit < 0xa)
+				*str_ptr = digit + '0';
+			else
+				*str_ptr = digit - 0xa + 'a';
+
+			remove_leading_zeros = 0;
+			str_ptr++;
+			(*len)--;
+		}
 		mask = mask >> 4;
 		if (shift == 4*4) {
+			if (remove_leading_zeros) {
+				str_ptr++;
+				(*len)--;
+			}
 			*str_ptr = '.';
 			str_ptr++;
 			(*len)--;
 			remove_leading_zeros = 1;
 		}
 	}
+	if (remove_leading_zeros)
+		(*len)--;
 	return 0;
 }
 
+static int bnx2x_3_seq_format_ver(u32 num, u8 *str, u16 *len)
+{
+	u8 *str_ptr = str;
+	u32 mask = 0x00f00000;
+	u8 shift = 8*3;
+	u8 digit;
+	u8 remove_leading_zeros = 1;
+
+	if (*len < 10) {
+		/* Need more than 10chars for this format */
+		*str_ptr = '\0';
+		(*len)--;
+		return -EINVAL;
+	}
+
+	while (shift > 0) {
+		shift -= 4;
+		digit = ((num & mask) >> shift);
+		if (digit == 0 && remove_leading_zeros) {
+			*str_ptr = '0';
+		} else {
+			if (digit < 0xa)
+				*str_ptr = digit + '0';
+			else
+				*str_ptr = digit - 0xa + 'a';
+
+			remove_leading_zeros = 0;
+			str_ptr++;
+			(*len)--;
+		}
+		mask = mask >> 4;
+		if ((shift == 4*4) || (shift == 4*2)) {
+			if (remove_leading_zeros) {
+				str_ptr++;
+				(*len)--;
+			}
+			*str_ptr = '.';
+			str_ptr++;
+			(*len)--;
+			remove_leading_zeros = 1;
+		}
+	}
+	if (remove_leading_zeros)
+		(*len)--;
+	return 0;
+}
 
 static int bnx2x_null_format_ver(u32 spirom_ver, u8 *str, u16 *len)
 {
@@ -9677,8 +9731,9 @@
 
 	if (bnx2x_is_8483x_8485x(phy)) {
 		bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD, 0x400f, &fw_ver1);
-		bnx2x_save_spirom_version(bp, port, fw_ver1 & 0xfff,
-				phy->ver_addr);
+		if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+			fw_ver1 &= 0xfff;
+		bnx2x_save_spirom_version(bp, port, fw_ver1, phy->ver_addr);
 	} else {
 		/* For 32-bit registers in 848xx, access via MDIO2ARM i/f. */
 		/* (1) set reg 0xc200_0014(SPI_BRIDGE_CTRL_2) to 0x03000000 */
@@ -9732,16 +9787,32 @@
 static void bnx2x_848xx_set_led(struct bnx2x *bp,
 				struct bnx2x_phy *phy)
 {
-	u16 val, offset, i;
+	u16 val, led3_blink_rate, offset, i;
 	static struct bnx2x_reg_set reg_set[] = {
 		{MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED1_MASK, 0x0080},
 		{MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED2_MASK, 0x0018},
 		{MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED3_MASK, 0x0006},
-		{MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED3_BLINK, 0x0000},
 		{MDIO_PMA_DEVAD, MDIO_PMA_REG_84823_CTL_SLOW_CLK_CNT_HIGH,
 			MDIO_PMA_REG_84823_BLINK_RATE_VAL_15P9HZ},
 		{MDIO_AN_DEVAD, 0xFFFB, 0xFFFD}
 	};
+
+	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+		/* Set LED5 source */
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD,
+				 MDIO_PMA_REG_8481_LED5_MASK,
+				 0x90);
+		led3_blink_rate = 0x000f;
+	} else {
+		led3_blink_rate = 0x0000;
+	}
+	/* Set LED3 BLINK */
+	bnx2x_cl45_write(bp, phy,
+			 MDIO_PMA_DEVAD,
+			 MDIO_PMA_REG_8481_LED3_BLINK,
+			 led3_blink_rate);
+
 	/* PHYC_CTL_LED_CTL */
 	bnx2x_cl45_read(bp, phy,
 			MDIO_PMA_DEVAD,
@@ -9749,6 +9820,9 @@
 	val &= 0xFE00;
 	val |= 0x0092;
 
+	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+		val |= 2 << 12; /* LED5 ON based on source */
+
 	bnx2x_cl45_write(bp, phy,
 			 MDIO_PMA_DEVAD,
 			 MDIO_PMA_REG_8481_LINK_SIGNAL, val);
@@ -9762,10 +9836,17 @@
 	else
 		offset = MDIO_PMA_REG_84823_CTL_LED_CTL_1;
 
-	/* stretch_en for LED3*/
+	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858)
+		val = MDIO_PMA_REG_84858_ALLOW_GPHY_ACT |
+		      MDIO_PMA_REG_84823_LED3_STRETCH_EN;
+	else
+		val = MDIO_PMA_REG_84823_LED3_STRETCH_EN;
+
+	/* stretch_en for LEDs */
 	bnx2x_cl45_read_or_write(bp, phy,
-				 MDIO_PMA_DEVAD, offset,
-				 MDIO_PMA_REG_84823_LED3_STRETCH_EN);
+				 MDIO_PMA_DEVAD,
+				 offset,
+				 val);
 }
 
 static void bnx2x_848xx_specific_func(struct bnx2x_phy *phy,
@@ -9775,7 +9856,7 @@
 	struct bnx2x *bp = params->bp;
 	switch (action) {
 	case PHY_INIT:
-		if (!bnx2x_is_8483x_8485x(phy)) {
+		if (bnx2x_is_8483x_8485x(phy)) {
 			/* Save spirom version */
 			bnx2x_save_848xx_spirom_version(phy, bp, params->port);
 		}
@@ -10036,15 +10117,20 @@
 
 static int bnx2x_84833_cmd_hdlr(struct bnx2x_phy *phy,
 				struct link_params *params, u16 fw_cmd,
-				u16 cmd_args[], int argc)
+				u16 cmd_args[], int argc, int process)
 {
 	int idx;
 	u16 val;
 	struct bnx2x *bp = params->bp;
-	/* Write CMD_OPEN_OVERRIDE to STATUS reg */
-	bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-			MDIO_848xx_CMD_HDLR_STATUS,
-			PHY84833_STATUS_CMD_OPEN_OVERRIDE);
+	int rc = 0;
+
+	if (process == PHY84833_MB_PROCESS2) {
+		/* Write CMD_OPEN_OVERRIDE to STATUS reg */
+		bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+				 MDIO_848xx_CMD_HDLR_STATUS,
+				 PHY84833_STATUS_CMD_OPEN_OVERRIDE);
+	}
+
 	for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
 		bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
 				MDIO_848xx_CMD_HDLR_STATUS, &val);
@@ -10054,15 +10140,27 @@
 	}
 	if (idx >= PHY848xx_CMDHDLR_WAIT) {
 		DP(NETIF_MSG_LINK, "FW cmd: FW not ready.\n");
+		/* if the status is CMD_COMPLETE_PASS or CMD_COMPLETE_ERROR
+		 * clear the status to CMD_CLEAR_COMPLETE
+		 */
+		if (val == PHY84833_STATUS_CMD_COMPLETE_PASS ||
+		    val == PHY84833_STATUS_CMD_COMPLETE_ERROR) {
+			bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+					 MDIO_848xx_CMD_HDLR_STATUS,
+					 PHY84833_STATUS_CMD_CLEAR_COMPLETE);
+		}
 		return -EINVAL;
 	}
-
-	/* Prepare argument(s) and issue command */
-	for (idx = 0; idx < argc; idx++) {
-		bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-				MDIO_848xx_CMD_HDLR_DATA1 + idx,
-				cmd_args[idx]);
+	if (process == PHY84833_MB_PROCESS1 ||
+	    process == PHY84833_MB_PROCESS2) {
+		/* Prepare argument(s) */
+		for (idx = 0; idx < argc; idx++) {
+			bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+					 MDIO_848xx_CMD_HDLR_DATA1 + idx,
+					 cmd_args[idx]);
+		}
 	}
+
 	bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
 			MDIO_848xx_CMD_HDLR_COMMAND, fw_cmd);
 	for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
@@ -10076,24 +10174,30 @@
 	if ((idx >= PHY848xx_CMDHDLR_WAIT) ||
 	    (val == PHY84833_STATUS_CMD_COMPLETE_ERROR)) {
 		DP(NETIF_MSG_LINK, "FW cmd failed.\n");
-		return -EINVAL;
+		rc = -EINVAL;
 	}
-	/* Gather returning data */
-	for (idx = 0; idx < argc; idx++) {
-		bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
-				MDIO_848xx_CMD_HDLR_DATA1 + idx,
-				&cmd_args[idx]);
+	if (process == PHY84833_MB_PROCESS3 && rc == 0) {
+		/* Gather returning data */
+		for (idx = 0; idx < argc; idx++) {
+			bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD,
+					MDIO_848xx_CMD_HDLR_DATA1 + idx,
+					&cmd_args[idx]);
+		}
 	}
-	bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
-			MDIO_848xx_CMD_HDLR_STATUS,
-			PHY84833_STATUS_CMD_CLEAR_COMPLETE);
-	return 0;
+	if (val == PHY84833_STATUS_CMD_COMPLETE_ERROR ||
+	    val == PHY84833_STATUS_CMD_COMPLETE_PASS) {
+		bnx2x_cl45_write(bp, phy, MDIO_CTL_DEVAD,
+				 MDIO_848xx_CMD_HDLR_STATUS,
+				 PHY84833_STATUS_CMD_CLEAR_COMPLETE);
+	}
+	return rc;
 }
 
 static int bnx2x_848xx_cmd_hdlr(struct bnx2x_phy *phy,
 				struct link_params *params,
 				u16 fw_cmd,
-				u16 cmd_args[], int argc)
+					   u16 cmd_args[], int argc,
+					   int process)
 {
 	struct bnx2x *bp = params->bp;
 
@@ -10106,7 +10210,7 @@
 					    argc);
 	} else {
 		return bnx2x_84833_cmd_hdlr(phy, params, fw_cmd, cmd_args,
-					    argc);
+					    argc, process);
 	}
 }
 
@@ -10133,7 +10237,7 @@
 
 	status = bnx2x_848xx_cmd_hdlr(phy, params,
 				      PHY848xx_CMD_SET_PAIR_SWAP, data,
-				      PHY848xx_CMDHDLR_MAX_ARGS);
+				      2, PHY84833_MB_PROCESS2);
 	if (status == 0)
 		DP(NETIF_MSG_LINK, "Pairswap OK, val=0x%x\n", data[1]);
 
@@ -10222,8 +10326,8 @@
 	DP(NETIF_MSG_LINK, "Don't Advertise 10GBase-T EEE\n");
 
 	/* Prevent Phy from working in EEE and advertising it */
-	rc = bnx2x_848xx_cmd_hdlr(phy, params,
-				  PHY848xx_CMD_SET_EEE_MODE, &cmd_args, 1);
+	rc = bnx2x_848xx_cmd_hdlr(phy, params, PHY848xx_CMD_SET_EEE_MODE,
+				  &cmd_args, 1, PHY84833_MB_PROCESS1);
 	if (rc) {
 		DP(NETIF_MSG_LINK, "EEE disable failed.\n");
 		return rc;
@@ -10240,8 +10344,8 @@
 	struct bnx2x *bp = params->bp;
 	u16 cmd_args = 1;
 
-	rc = bnx2x_848xx_cmd_hdlr(phy, params,
-				  PHY848xx_CMD_SET_EEE_MODE, &cmd_args, 1);
+	rc = bnx2x_848xx_cmd_hdlr(phy, params, PHY848xx_CMD_SET_EEE_MODE,
+				  &cmd_args, 1, PHY84833_MB_PROCESS1);
 	if (rc) {
 		DP(NETIF_MSG_LINK, "EEE enable failed.\n");
 		return rc;
@@ -10362,7 +10466,7 @@
 		cmd_args[3] = PHY84833_CONSTANT_LATENCY;
 		rc = bnx2x_848xx_cmd_hdlr(phy, params,
 					  PHY848xx_CMD_SET_EEE_MODE, cmd_args,
-					  PHY848xx_CMDHDLR_MAX_ARGS);
+					  4, PHY84833_MB_PROCESS1);
 		if (rc)
 			DP(NETIF_MSG_LINK, "Cfg AutogrEEEn failed.\n");
 	}
@@ -10416,6 +10520,32 @@
 		vars->eee_status &= ~SHMEM_EEE_SUPPORTED_MASK;
 	}
 
+	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+		/* Additional settings for jumbo packets in 1000BASE-T mode */
+		/* Allow rx extended length */
+		bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+				MDIO_AN_REG_8481_AUX_CTRL, &val);
+		val |= 0x4000;
+		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+				 MDIO_AN_REG_8481_AUX_CTRL, val);
+		/* TX FIFO Elasticity LSB */
+		bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+				MDIO_AN_REG_8481_1G_100T_EXT_CTRL, &val);
+		val |= 0x1;
+		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+				 MDIO_AN_REG_8481_1G_100T_EXT_CTRL, val);
+		/* TX FIFO Elasticity MSB */
+		/* Enable expansion register 0x46 (Pattern Generator status) */
+		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+				 MDIO_AN_REG_8481_EXPANSION_REG_ACCESS, 0xf46);
+
+		bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+				MDIO_AN_REG_8481_EXPANSION_REG_RD_RW, &val);
+		val |= 0x4000;
+		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+				 MDIO_AN_REG_8481_EXPANSION_REG_RD_RW, val);
+	}
+
 	if (bnx2x_is_8483x_8485x(phy)) {
 		/* Bring PHY out of super isolate mode as the final step. */
 		bnx2x_cl45_read_and_write(bp, phy,
@@ -10555,6 +10685,17 @@
 	return link_up;
 }
 
+static int bnx2x_8485x_format_ver(u32 raw_ver, u8 *str, u16 *len)
+{
+	int status = 0;
+	u32 num;
+
+	num = ((raw_ver & 0xF80) >> 7) << 16 | ((raw_ver & 0x7F) << 8) |
+	      ((raw_ver & 0xF000) >> 12);
+	status = bnx2x_3_seq_format_ver(num, str, len);
+	return status;
+}
+
 static int bnx2x_848xx_format_ver(u32 raw_ver, u8 *str, u16 *len)
 {
 	int status = 0;
@@ -10651,10 +10792,25 @@
 					0x0);
 
 		} else {
+			/* LED 1 OFF */
 			bnx2x_cl45_write(bp, phy,
 					 MDIO_PMA_DEVAD,
 					 MDIO_PMA_REG_8481_LED1_MASK,
 					 0x0);
+
+			if (phy->type ==
+				PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+				/* LED 2 OFF */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED2_MASK,
+						 0x0);
+				/* LED 3 OFF */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED3_MASK,
+						 0x0);
+			}
 		}
 		break;
 	case LED_MODE_FRONT_PANEL_OFF:
@@ -10713,6 +10869,19 @@
 						 MDIO_PMA_REG_8481_SIGNAL_MASK,
 						 0x0);
 			}
+			if (phy->type ==
+				PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+				/* LED 2 OFF */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED2_MASK,
+						 0x0);
+				/* LED 3 OFF */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED3_MASK,
+						 0x0);
+			}
 		}
 		break;
 	case LED_MODE_ON:
@@ -10776,6 +10945,25 @@
 						params->port*4,
 						NIG_MASK_MI_INT);
 				}
+			}
+			if (phy->type ==
+			    PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+				/* Tell LED3 to constant on */
+				bnx2x_cl45_read(bp, phy,
+						MDIO_PMA_DEVAD,
+						MDIO_PMA_REG_8481_LINK_SIGNAL,
+						&val);
+				val &= ~(7<<6);
+				val |= (2<<6);  /* A83B[8:6]= 2 */
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LINK_SIGNAL,
+						 val);
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED3_MASK,
+						 0x20);
+			} else {
 				bnx2x_cl45_write(bp, phy,
 						 MDIO_PMA_DEVAD,
 						 MDIO_PMA_REG_8481_SIGNAL_MASK,
@@ -10854,6 +11042,17 @@
 					 MDIO_PMA_REG_8481_LINK_SIGNAL,
 					 val);
 			if (phy->type ==
+			    PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84858) {
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED2_MASK,
+						 0x18);
+				bnx2x_cl45_write(bp, phy,
+						 MDIO_PMA_DEVAD,
+						 MDIO_PMA_REG_8481_LED3_MASK,
+						 0x06);
+			}
+			if (phy->type ==
 			    PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834) {
 				/* Restore LED4 source to external link,
 				 * and re-enable interrupts.
@@ -11982,7 +12181,7 @@
 	.read_status	= (read_status_t)bnx2x_848xx_read_status,
 	.link_reset	= (link_reset_t)bnx2x_848x3_link_reset,
 	.config_loopback = (config_loopback_t)NULL,
-	.format_fw_ver	= (format_fw_ver_t)bnx2x_848xx_format_ver,
+	.format_fw_ver	= (format_fw_ver_t)bnx2x_8485x_format_ver,
 	.hw_reset	= (hw_reset_t)bnx2x_84833_hw_reset_phy,
 	.set_link_led	= (set_link_led_t)bnx2x_848xx_set_link_led,
 	.phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func
@@ -13807,8 +14006,10 @@
 	if (CHIP_IS_E3(bp)) {
 		struct bnx2x_phy *phy = &params->phy[INT_PHY];
 		bnx2x_set_aer_mmd(params, phy);
-		if ((phy->supported & SUPPORTED_20000baseKR2_Full) &&
-		    (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G))
+		if (((phy->req_line_speed == SPEED_AUTO_NEG) &&
+		     (phy->speed_cap_mask &
+		      PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) ||
+		    (phy->req_line_speed == SPEED_20000))
 			bnx2x_check_kr2_wa(params, vars, phy);
 		bnx2x_check_over_curr(params, vars);
 		if (vars->rx_tx_asic_rst)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 6c4e3a6..2bf9c87 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

@@ -5280,14 +5280,14 @@
 {
 	unsigned long ramrod_flags = 0;
 	int rc = 0;
-	u32 cid = elem->message.data.eth_event.echo & BNX2X_SWCID_MASK;
+	u32 echo = le32_to_cpu(elem->message.data.eth_event.echo);
+	u32 cid = echo & BNX2X_SWCID_MASK;
 	struct bnx2x_vlan_mac_obj *vlan_mac_obj;
 
 	/* Always push next commands out, don't wait here */
 	__set_bit(RAMROD_CONT, &ramrod_flags);
 
-	switch (le32_to_cpu((__force __le32)elem->message.data.eth_event.echo)
-			    >> BNX2X_SWCID_SHIFT) {
+	switch (echo >> BNX2X_SWCID_SHIFT) {
 	case BNX2X_FILTER_MAC_PENDING:
 		DP(BNX2X_MSG_SP, "Got SETUP_MAC completions\n");
 		if (CNIC_LOADED(bp) && (cid == BNX2X_ISCSI_ETH_CID(bp)))
@@ -5308,8 +5308,7 @@
 		bnx2x_handle_mcast_eqe(bp);
 		return;
 	default:
-		BNX2X_ERR("Unsupported classification command: %d\n",
-			  elem->message.data.eth_event.echo);
+		BNX2X_ERR("Unsupported classification command: 0x%x\n", echo);
 		return;
 	}
 
@@ -5478,9 +5477,6 @@
 			goto next_spqe;
 		}
 
-		/* elem CID originates from FW; actually LE */
-		cid = SW_CID((__force __le32)
-			     elem->message.data.cfc_del_event.cid);
 		opcode = elem->message.opcode;
 
 		/* handle eq element */
@@ -5503,6 +5499,10 @@
 			 * we may want to verify here that the bp state is
 			 * HALTING
 			 */
+
+			/* elem CID originates from FW; actually LE */
+			cid = SW_CID(elem->message.data.cfc_del_event.cid);
+
 			DP(BNX2X_MSG_SP,
 			   "got delete ramrod for MULTI[%d]\n", cid);
 
@@ -5596,10 +5596,8 @@
 		      BNX2X_STATE_OPENING_WAIT4_PORT):
 		case (EVENT_RING_OPCODE_RSS_UPDATE_RULES |
 		      BNX2X_STATE_CLOSING_WAIT4_HALT):
-			cid = elem->message.data.eth_event.echo &
-				BNX2X_SWCID_MASK;
 			DP(BNX2X_MSG_SP, "got RSS_UPDATE ramrod. CID %d\n",
-			   cid);
+			   SW_CID(elem->message.data.eth_event.echo));
 			rss_raw->clear_pending(rss_raw);
 			break;
 
@@ -5684,7 +5682,7 @@
 		if (status & BNX2X_DEF_SB_IDX) {
 			struct bnx2x_fastpath *fp = bnx2x_fcoe_fp(bp);
 
-		if (FCOE_INIT(bp) &&
+			if (FCOE_INIT(bp) &&
 			    (bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
 				/* Prevent local bottom-halves from running as
 				 * we are going to change the local NAPI list.

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 4dead49..a43dea2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h

@@ -7296,6 +7296,8 @@
 #define MDIO_PMA_REG_84823_CTL_LED_CTL_1			0xa8e3
 #define MDIO_PMA_REG_84833_CTL_LED_CTL_1			0xa8ec
 #define MDIO_PMA_REG_84823_LED3_STRETCH_EN			0x0080
+/* BCM84858 only */
+#define MDIO_PMA_REG_84858_ALLOW_GPHY_ACT			0x8000
 
 /* BCM84833 only */
 #define MDIO_84833_TOP_CFG_FW_REV			0x400f
@@ -7337,6 +7339,10 @@
 #define PHY84833_STATUS_CMD_NOT_OPEN_FOR_CMDS		0x0040
 #define PHY84833_STATUS_CMD_CLEAR_COMPLETE		0x0080
 #define PHY84833_STATUS_CMD_OPEN_OVERRIDE		0xa5a5
+/* Mailbox Process */
+#define PHY84833_MB_PROCESS1				1
+#define PHY84833_MB_PROCESS2				2
+#define PHY84833_MB_PROCESS3				3
 
 /* Mailbox status set used by 84858 only */
 #define PHY84858_STATUS_CMD_RECEIVED			0x0001

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9d02734..632daff 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c

@@ -1672,11 +1672,12 @@
 {
 	unsigned long ramrod_flags = 0;
 	int rc = 0;
+	u32 echo = le32_to_cpu(elem->message.data.eth_event.echo);
 
 	/* Always push next commands out, don't wait here */
 	set_bit(RAMROD_CONT, &ramrod_flags);
 
-	switch (elem->message.data.eth_event.echo >> BNX2X_SWCID_SHIFT) {
+	switch (echo >> BNX2X_SWCID_SHIFT) {
 	case BNX2X_FILTER_MAC_PENDING:
 		rc = vfq->mac_obj.complete(bp, &vfq->mac_obj, elem,
 					   &ramrod_flags);
@@ -1686,8 +1687,7 @@
 					    &ramrod_flags);
 		break;
 	default:
-		BNX2X_ERR("Unsupported classification command: %d\n",
-			  elem->message.data.eth_event.echo);
+		BNX2X_ERR("Unsupported classification command: 0x%x\n", echo);
 		return;
 	}
 	if (rc < 0)
@@ -1747,16 +1747,14 @@
 
 	switch (opcode) {
 	case EVENT_RING_OPCODE_CFC_DEL:
-		cid = SW_CID((__force __le32)
-			     elem->message.data.cfc_del_event.cid);
+		cid = SW_CID(elem->message.data.cfc_del_event.cid);
 		DP(BNX2X_MSG_IOV, "checking cfc-del comp cid=%d\n", cid);
 		break;
 	case EVENT_RING_OPCODE_CLASSIFICATION_RULES:
 	case EVENT_RING_OPCODE_MULTICAST_RULES:
 	case EVENT_RING_OPCODE_FILTERS_RULES:
 	case EVENT_RING_OPCODE_RSS_UPDATE_RULES:
-		cid = (elem->message.data.eth_event.echo &
-		       BNX2X_SWCID_MASK);
+		cid = SW_CID(elem->message.data.eth_event.echo);
 		DP(BNX2X_MSG_IOV, "checking filtering comp cid=%d\n", cid);
 		break;
 	case EVENT_RING_OPCODE_VF_FLR:

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 1374e53..bfae300 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c

@@ -2187,8 +2187,10 @@
 
 	/* Update VFDB with current message and schedule its handling */
 	mutex_lock(&BP_VFDB(bp)->event_mutex);
-	BP_VF_MBX(bp, vf_idx)->vf_addr_hi = vfpf_event->msg_addr_hi;
-	BP_VF_MBX(bp, vf_idx)->vf_addr_lo = vfpf_event->msg_addr_lo;
+	BP_VF_MBX(bp, vf_idx)->vf_addr_hi =
+		le32_to_cpu(vfpf_event->msg_addr_hi);
+	BP_VF_MBX(bp, vf_idx)->vf_addr_lo =
+		le32_to_cpu(vfpf_event->msg_addr_lo);
 	BP_VFDB(bp)->event_occur |= (1ULL << vf_idx);
 	mutex_unlock(&BP_VFDB(bp)->event_mutex);
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5dc89e5..82f1913 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

@@ -69,7 +69,7 @@
 #define BNXT_RX_DMA_OFFSET NET_SKB_PAD
 #define BNXT_RX_COPY_THRESH 256
 
-#define BNXT_TX_PUSH_THRESH 92
+#define BNXT_TX_PUSH_THRESH 164
 
 enum board_idx {
 	BCM57301,
@@ -223,11 +223,12 @@
 	}
 
 	if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
-		struct tx_push_bd *push = txr->tx_push;
-		struct tx_bd *tx_push = &push->txbd1;
-		struct tx_bd_ext *tx_push1 = &push->txbd2;
-		void *pdata = tx_push1 + 1;
-		int j;
+		struct tx_push_buffer *tx_push_buf = txr->tx_push;
+		struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
+		struct tx_bd_ext *tx_push1 = &tx_push->txbd2;
+		void *pdata = tx_push_buf->data;
+		u64 *end;
+		int j, push_len;
 
 		/* Set COAL_NOW to be ready quickly for the next push */
 		tx_push->tx_bd_len_flags_type =
@@ -247,6 +248,10 @@
 		tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
 		tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
 
+		end = pdata + length;
+		end = PTR_ALIGN(end, 8) - 1;
+		*end = 0;
+
 		skb_copy_from_linear_data(skb, pdata, len);
 		pdata += len;
 		for (j = 0; j < last_frag; j++) {
@@ -261,22 +266,29 @@
 			pdata += skb_frag_size(frag);
 		}
 
-		memcpy(txbd, tx_push, sizeof(*txbd));
+		txbd->tx_bd_len_flags_type = tx_push->tx_bd_len_flags_type;
+		txbd->tx_bd_haddr = txr->data_mapping;
 		prod = NEXT_TX(prod);
 		txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
 		memcpy(txbd, tx_push1, sizeof(*txbd));
 		prod = NEXT_TX(prod);
-		push->doorbell =
+		tx_push->doorbell =
 			cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
 		txr->tx_prod = prod;
 
 		netdev_tx_sent_queue(txq, skb->len);
 
-		__iowrite64_copy(txr->tx_doorbell, push,
-				 (length + sizeof(*push) + 8) / 8);
+		push_len = (length + sizeof(*tx_push) + 7) / 8;
+		if (push_len > 16) {
+			__iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
+			__iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+					 push_len - 16);
+		} else {
+			__iowrite64_copy(txr->tx_doorbell, tx_push_buf,
+					 push_len);
+		}
 
 		tx_buf->is_push = 1;
-
 		goto tx_done;
 	}
 
@@ -1753,7 +1765,7 @@
 		push_size  = L1_CACHE_ALIGN(sizeof(struct tx_push_bd) +
 					bp->tx_push_thresh);
 
-		if (push_size > 128) {
+		if (push_size > 256) {
 			push_size = 0;
 			bp->tx_push_thresh = 0;
 		}
@@ -1772,7 +1784,6 @@
 			return rc;
 
 		if (bp->tx_push_size) {
-			struct tx_bd *txbd;
 			dma_addr_t mapping;
 
 			/* One pre-allocated DMA buffer to backup
@@ -1786,13 +1797,11 @@
 			if (!txr->tx_push)
 				return -ENOMEM;
 
-			txbd = &txr->tx_push->txbd1;
-
 			mapping = txr->tx_push_mapping +
 				sizeof(struct tx_push_bd);
-			txbd->tx_bd_haddr = cpu_to_le64(mapping);
+			txr->data_mapping = cpu_to_le64(mapping);
 
-			memset(txbd + 1, 0, sizeof(struct tx_bd_ext));
+			memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
 		}
 		ring->queue_id = bp->q_info[j].queue_id;
 		if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@ -4546,20 +4555,18 @@
 	if (!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) &&
 	    link_info->force_pause_setting != link_info->req_flow_ctrl)
 		update_pause = true;
-	if (link_info->req_duplex != link_info->duplex_setting)
-		update_link = true;
 	if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
 		if (BNXT_AUTO_MODE(link_info->auto_mode))
 			update_link = true;
 		if (link_info->req_link_speed != link_info->force_link_speed)
 			update_link = true;
+		if (link_info->req_duplex != link_info->duplex_setting)
+			update_link = true;
 	} else {
 		if (link_info->auto_mode == BNXT_LINK_AUTO_NONE)
 			update_link = true;
 		if (link_info->advertising != link_info->auto_link_speeds)
 			update_link = true;
-		if (link_info->req_link_speed != link_info->auto_link_speed)
-			update_link = true;
 	}
 
 	if (update_link)
@@ -4636,7 +4643,7 @@
 	if (link_re_init) {
 		rc = bnxt_update_phy_setting(bp);
 		if (rc)
-			goto open_err;
+			netdev_warn(bp->dev, "failed to update phy settings\n");
 	}
 
 	if (irq_re_init) {
@@ -4654,6 +4661,7 @@
 	/* Enable TX queues */
 	bnxt_tx_enable(bp);
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
+	bnxt_update_link(bp, true);
 
 	return 0;
 
@@ -5670,22 +5678,16 @@
 	}
 
 	/*initialize the ethool setting copy with NVM settings */
-	if (BNXT_AUTO_MODE(link_info->auto_mode))
-		link_info->autoneg |= BNXT_AUTONEG_SPEED;
-
-	if (link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) {
-		if (link_info->auto_pause_setting == BNXT_LINK_PAUSE_BOTH)
-			link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
+	if (BNXT_AUTO_MODE(link_info->auto_mode)) {
+		link_info->autoneg = BNXT_AUTONEG_SPEED |
+				     BNXT_AUTONEG_FLOW_CTRL;
+		link_info->advertising = link_info->auto_link_speeds;
 		link_info->req_flow_ctrl = link_info->auto_pause_setting;
-	} else if (link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) {
+	} else {
+		link_info->req_link_speed = link_info->force_link_speed;
+		link_info->req_duplex = link_info->duplex_setting;
 		link_info->req_flow_ctrl = link_info->force_pause_setting;
 	}
-	link_info->req_duplex = link_info->duplex_setting;
-	if (link_info->autoneg & BNXT_AUTONEG_SPEED)
-		link_info->req_link_speed = link_info->auto_link_speed;
-	else
-		link_info->req_link_speed = link_info->force_link_speed;
-	link_info->advertising = link_info->auto_link_speeds;
 	snprintf(phy_ver, PHY_VER_STR_LEN, " ph %d.%d.%d",
 		 link_info->phy_ver[0],
 		 link_info->phy_ver[1],

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 8af3ca8..2be51b3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

@@ -411,8 +411,8 @@
 
 #define BNXT_NUM_TESTS(bp)	0
 
-#define BNXT_DEFAULT_RX_RING_SIZE	1023
-#define BNXT_DEFAULT_TX_RING_SIZE	512
+#define BNXT_DEFAULT_RX_RING_SIZE	511
+#define BNXT_DEFAULT_TX_RING_SIZE	511
 
 #define MAX_TPA		64
 
@@ -523,10 +523,16 @@
 
 struct tx_push_bd {
 	__le32			doorbell;
-	struct tx_bd		txbd1;
+	__le32			tx_bd_len_flags_type;
+	u32			tx_bd_opaque;
 	struct tx_bd_ext	txbd2;
 };
 
+struct tx_push_buffer {
+	struct tx_push_bd	push_bd;
+	u32			data[25];
+};
+
 struct bnxt_tx_ring_info {
 	struct bnxt_napi	*bnapi;
 	u16			tx_prod;
@@ -538,8 +544,9 @@
 
 	dma_addr_t		tx_desc_mapping[MAX_TX_PAGES];
 
-	struct tx_push_bd	*tx_push;
+	struct tx_push_buffer	*tx_push;
 	dma_addr_t		tx_push_mapping;
+	__le64			data_mapping;
 
 #define BNXT_DEV_STATE_CLOSING	0x1
 	u32			dev_state;

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 922b898..3238817 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c

@@ -486,15 +486,8 @@
 		speed_mask |= SUPPORTED_2500baseX_Full;
 	if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
 		speed_mask |= SUPPORTED_10000baseT_Full;
-	/* TODO: support 25GB, 50GB with different cable type */
-	if (fw_speeds & BNXT_LINK_SPEED_MSK_20GB)
-		speed_mask |= SUPPORTED_20000baseMLD2_Full |
-			SUPPORTED_20000baseKR2_Full;
 	if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
-		speed_mask |= SUPPORTED_40000baseKR4_Full |
-			SUPPORTED_40000baseCR4_Full |
-			SUPPORTED_40000baseSR4_Full |
-			SUPPORTED_40000baseLR4_Full;
+		speed_mask |= SUPPORTED_40000baseCR4_Full;
 
 	return speed_mask;
 }
@@ -514,15 +507,8 @@
 		speed_mask |= ADVERTISED_2500baseX_Full;
 	if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
 		speed_mask |= ADVERTISED_10000baseT_Full;
-	/* TODO: how to advertise 20, 25, 40, 50GB with different cable type ?*/
-	if (fw_speeds & BNXT_LINK_SPEED_MSK_20GB)
-		speed_mask |= ADVERTISED_20000baseMLD2_Full |
-			      ADVERTISED_20000baseKR2_Full;
 	if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
-		speed_mask |= ADVERTISED_40000baseKR4_Full |
-			      ADVERTISED_40000baseCR4_Full |
-			      ADVERTISED_40000baseSR4_Full |
-			      ADVERTISED_40000baseLR4_Full;
+		speed_mask |= ADVERTISED_40000baseCR4_Full;
 	return speed_mask;
 }
 
@@ -557,11 +543,12 @@
 	u16 ethtool_speed;
 
 	cmd->supported = bnxt_fw_to_ethtool_support_spds(link_info);
+	cmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 
 	if (link_info->auto_link_speeds)
 		cmd->supported |= SUPPORTED_Autoneg;
 
-	if (BNXT_AUTO_MODE(link_info->auto_mode)) {
+	if (link_info->autoneg) {
 		cmd->advertising =
 			bnxt_fw_to_ethtool_advertised_spds(link_info);
 		cmd->advertising |= ADVERTISED_Autoneg;
@@ -570,28 +557,16 @@
 		cmd->autoneg = AUTONEG_DISABLE;
 		cmd->advertising = 0;
 	}
-	if (link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) {
+	if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) {
 		if ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) ==
 		    BNXT_LINK_PAUSE_BOTH) {
 			cmd->advertising |= ADVERTISED_Pause;
-			cmd->supported |= SUPPORTED_Pause;
 		} else {
 			cmd->advertising |= ADVERTISED_Asym_Pause;
-			cmd->supported |= SUPPORTED_Asym_Pause;
 			if (link_info->auto_pause_setting &
 			    BNXT_LINK_PAUSE_RX)
 				cmd->advertising |= ADVERTISED_Pause;
 		}
-	} else if (link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) {
-		if ((link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) ==
-		    BNXT_LINK_PAUSE_BOTH) {
-			cmd->supported |= SUPPORTED_Pause;
-		} else {
-			cmd->supported |= SUPPORTED_Asym_Pause;
-			if (link_info->force_pause_setting &
-			    BNXT_LINK_PAUSE_RX)
-				cmd->supported |= SUPPORTED_Pause;
-		}
 	}
 
 	cmd->port = PORT_NONE;
@@ -670,6 +645,9 @@
 	if (advertising & ADVERTISED_10000baseT_Full)
 		fw_speed_mask |= BNXT_LINK_SPEED_MSK_10GB;
 
+	if (advertising & ADVERTISED_40000baseCR4_Full)
+		fw_speed_mask |= BNXT_LINK_SPEED_MSK_40GB;
+
 	return fw_speed_mask;
 }
 
@@ -729,7 +707,7 @@
 		speed = ethtool_cmd_speed(cmd);
 		link_info->req_link_speed = bnxt_get_fw_speed(dev, speed);
 		link_info->req_duplex = BNXT_LINK_DUPLEX_FULL;
-		link_info->autoneg &= ~BNXT_AUTONEG_SPEED;
+		link_info->autoneg = 0;
 		link_info->advertising = 0;
 	}
 
@@ -748,8 +726,7 @@
 
 	if (BNXT_VF(bp))
 		return;
-	epause->autoneg = !!(link_info->auto_pause_setting &
-			     BNXT_LINK_PAUSE_BOTH);
+	epause->autoneg = !!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL);
 	epause->rx_pause = ((link_info->pause & BNXT_LINK_PAUSE_RX) != 0);
 	epause->tx_pause = ((link_info->pause & BNXT_LINK_PAUSE_TX) != 0);
 }
@@ -765,6 +742,9 @@
 		return rc;
 
 	if (epause->autoneg) {
+		if (!(link_info->autoneg & BNXT_AUTONEG_SPEED))
+			return -EINVAL;
+
 		link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
 		link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_BOTH;
 	} else {

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index b15a60d..d7e01a7 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c

@@ -2445,8 +2445,7 @@
 	}
 
 	/* Link UP/DOWN event */
-	if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
-	    (priv->irq0_stat & UMAC_IRQ_LINK_EVENT)) {
+	if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
 		phy_mac_interrupt(priv->phydev,
 				  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
 		priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;

diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
index 04b0d16..95bc470 100644
--- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
+++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c

@@ -987,7 +987,7 @@
 	if (!list_empty(&rxf->ucast_pending_add_q)) {
 		mac = list_first_entry(&rxf->ucast_pending_add_q,
 				       struct bna_mac, qe);
-		list_add_tail(&mac->qe, &rxf->ucast_active_q);
+		list_move_tail(&mac->qe, &rxf->ucast_active_q);
 		bna_bfi_ucast_req(rxf, mac, BFI_ENET_H2I_MAC_UCAST_ADD_REQ);
 		return 1;
 	}

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8727655..34d269c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c

@@ -1683,7 +1683,7 @@
 	dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
 	/* droq creation and local register settings. */
 	ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
-	if (ret_val == -1)
+	if (ret_val < 0)
 		return ret_val;
 
 	if (ret_val == 1) {
@@ -2524,7 +2524,7 @@
 
 	octeon_swap_8B_data(&resp->timestamp, 1);
 
-	if (unlikely((skb_shinfo(skb)->tx_flags | SKBTX_IN_PROGRESS) != 0)) {
+	if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) != 0)) {
 		struct skb_shared_hwtstamps ts;
 		u64 ns = resp->timestamp;
 

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 4dba86e..174072b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c

@@ -983,5 +983,5 @@
 
 create_droq_fail:
 	octeon_delete_droq(oct, q_no);
-	return -1;
+	return -ENOMEM;
 }

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 6888288..34e9ace 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h

@@ -116,6 +116,15 @@
 #define NIC_PF_INTR_ID_MBOX0		8
 #define NIC_PF_INTR_ID_MBOX1		9
 
+/* Minimum FIFO level before all packets for the CQ are dropped
+ *
+ * This value ensures that once a packet has been "accepted"
+ * for reception it will not get dropped due to non-availability
+ * of CQ descriptor. An errata in HW mandates this value to be
+ * atleast 0x100.
+ */
+#define NICPF_CQM_MIN_DROP_LEVEL       0x100
+
 /* Global timer for CQ timer thresh interrupts
  * Calculated for SCLK of 700Mhz
  * value written should be a 1/16th of what is expected

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 4dded90..95f17f8 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c

@@ -304,6 +304,7 @@
 static void nic_init_hw(struct nicpf *nic)
 {
 	int i;
+	u64 cqm_cfg;
 
 	/* Enable NIC HW block */
 	nic_reg_write(nic, NIC_PF_CFG, 0x3);
@@ -340,6 +341,11 @@
 	/* Enable VLAN ethertype matching and stripping */
 	nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7,
 		      (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q);
+
+	/* Check if HW expected value is higher (could be in future chips) */
+	cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
+	if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
+		nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
 }
 
 /* Channel parse index configuration */

diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
index dd536be..afb10e3 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_reg.h
+++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h

@@ -21,7 +21,7 @@
 #define   NIC_PF_TCP_TIMER			(0x0060)
 #define   NIC_PF_BP_CFG				(0x0080)
 #define   NIC_PF_RRM_CFG			(0x0088)
-#define   NIC_PF_CQM_CF				(0x00A0)
+#define   NIC_PF_CQM_CFG			(0x00A0)
 #define   NIC_PF_CNM_CF				(0x00A8)
 #define   NIC_PF_CNM_STATUS			(0x00B0)
 #define   NIC_PF_CQ_AVG_CFG			(0x00C0)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index c24cb2a..a009bc3 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c

@@ -574,8 +574,7 @@
 
 static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 				  struct napi_struct *napi,
-				  struct cmp_queue *cq,
-				  struct cqe_rx_t *cqe_rx, int cqe_type)
+				  struct cqe_rx_t *cqe_rx)
 {
 	struct sk_buff *skb;
 	struct nicvf *nic = netdev_priv(netdev);
@@ -591,7 +590,7 @@
 	}
 
 	/* Check for errors */
-	err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
+	err = nicvf_check_cqe_rx_errs(nic, cqe_rx);
 	if (err && !cqe_rx->rb_cnt)
 		return;
 
@@ -682,8 +681,7 @@
 			   cq_idx, cq_desc->cqe_type);
 		switch (cq_desc->cqe_type) {
 		case CQE_TYPE_RX:
-			nicvf_rcv_pkt_handler(netdev, napi, cq,
-					      cq_desc, CQE_TYPE_RX);
+			nicvf_rcv_pkt_handler(netdev, napi, cq_desc);
 			work_done++;
 		break;
 		case CQE_TYPE_SEND:
@@ -1125,7 +1123,6 @@
 
 	/* Clear multiqset info */
 	nic->pnicvf = nic;
-	nic->sqs_count = 0;
 
 	return 0;
 }
@@ -1354,6 +1351,9 @@
 	drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
 				  stats->tx_bcast_frames_ok +
 				  stats->tx_mcast_frames_ok;
+	drv_stats->rx_frames_ok = stats->rx_ucast_frames +
+				  stats->rx_bcast_frames +
+				  stats->rx_mcast_frames;
 	drv_stats->rx_drops = stats->rx_drop_red +
 			      stats->rx_drop_overrun;
 	drv_stats->tx_drops = stats->tx_drops;
@@ -1538,6 +1538,9 @@
 
 	nicvf_send_vf_struct(nic);
 
+	if (!pass1_silicon(nic->pdev))
+		nic->hw_tso = true;
+
 	/* Check if this VF is in QS only mode */
 	if (nic->sqs_mode)
 		return 0;
@@ -1557,9 +1560,6 @@
 
 	netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 
-	if (!pass1_silicon(nic->pdev))
-		nic->hw_tso = true;
-
 	netdev->netdev_ops = &nicvf_netdev_ops;
 	netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
 

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index d0d1b54..767347b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c

@@ -1329,16 +1329,12 @@
 }
 
 /* Check for errors in the receive cmp.queue entry */
-int nicvf_check_cqe_rx_errs(struct nicvf *nic,
-			    struct cmp_queue *cq, struct cqe_rx_t *cqe_rx)
+int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 {
 	struct nicvf_hw_stats *stats = &nic->hw_stats;
-	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
 
-	if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
-		drv_stats->rx_frames_ok++;
+	if (!cqe_rx->err_level && !cqe_rx->err_opcode)
 		return 0;
-	}
 
 	if (netif_msg_rx_err(nic))
 		netdev_err(nic->netdev,

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index c5030a7..6673e11 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h

@@ -338,8 +338,7 @@
 /* Stats */
 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx);
 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx);
-int nicvf_check_cqe_rx_errs(struct nicvf *nic,
-			    struct cmp_queue *cq, struct cqe_rx_t *cqe_rx);
+int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
 int nicvf_check_cqe_tx_errs(struct nicvf *nic,
 			    struct cmp_queue *cq, struct cqe_send_t *cqe_tx);
 #endif /* NICVF_QUEUES_H */

diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index ee04caa..a89721f 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c

@@ -681,6 +681,24 @@
 	return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0);
 }
 
+static int vpdstrtouint(char *s, int len, unsigned int base, unsigned int *val)
+{
+	char tok[len + 1];
+
+	memcpy(tok, s, len);
+	tok[len] = 0;
+	return kstrtouint(strim(tok), base, val);
+}
+
+static int vpdstrtou16(char *s, int len, unsigned int base, u16 *val)
+{
+	char tok[len + 1];
+
+	memcpy(tok, s, len);
+	tok[len] = 0;
+	return kstrtou16(strim(tok), base, val);
+}
+
 /**
  *	get_vpd_params - read VPD parameters from VPD EEPROM
  *	@adapter: adapter to read
@@ -709,19 +727,19 @@
 			return ret;
 	}
 
-	ret = kstrtouint(vpd.cclk_data, 10, &p->cclk);
+	ret = vpdstrtouint(vpd.cclk_data, vpd.cclk_len, 10, &p->cclk);
 	if (ret)
 		return ret;
-	ret = kstrtouint(vpd.mclk_data, 10, &p->mclk);
+	ret = vpdstrtouint(vpd.mclk_data, vpd.mclk_len, 10, &p->mclk);
 	if (ret)
 		return ret;
-	ret = kstrtouint(vpd.uclk_data, 10, &p->uclk);
+	ret = vpdstrtouint(vpd.uclk_data, vpd.uclk_len, 10, &p->uclk);
 	if (ret)
 		return ret;
-	ret = kstrtouint(vpd.mdc_data, 10, &p->mdc);
+	ret = vpdstrtouint(vpd.mdc_data, vpd.mdc_len, 10, &p->mdc);
 	if (ret)
 		return ret;
-	ret = kstrtouint(vpd.mt_data, 10, &p->mem_timing);
+	ret = vpdstrtouint(vpd.mt_data, vpd.mt_len, 10, &p->mem_timing);
 	if (ret)
 		return ret;
 	memcpy(p->sn, vpd.sn_data, SERNUM_LEN);
@@ -733,10 +751,12 @@
 	} else {
 		p->port_type[0] = hex_to_bin(vpd.port0_data[0]);
 		p->port_type[1] = hex_to_bin(vpd.port1_data[0]);
-		ret = kstrtou16(vpd.xaui0cfg_data, 16, &p->xauicfg[0]);
+		ret = vpdstrtou16(vpd.xaui0cfg_data, vpd.xaui0cfg_len, 16,
+				  &p->xauicfg[0]);
 		if (ret)
 			return ret;
-		ret = kstrtou16(vpd.xaui1cfg_data, 16, &p->xauicfg[1]);
+		ret = vpdstrtou16(vpd.xaui1cfg_data, vpd.xaui1cfg_len, 16,
+				  &p->xauicfg[1]);
 		if (ret)
 			return ret;
 	}

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index a8dda63..06bc2d2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h

@@ -165,6 +165,7 @@
 	CH_PCI_ID_TABLE_FENTRY(0x5098),	/* Custom 2x40G QSFP */
 	CH_PCI_ID_TABLE_FENTRY(0x5099),	/* Custom 2x40G QSFP */
 	CH_PCI_ID_TABLE_FENTRY(0x509a),	/* Custom T520-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x509b),	/* Custom T540-CR LOM */
 
 	/* T6 adapters:
 	 */

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index cf94b72..48d9194 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c

@@ -128,7 +128,6 @@
 	struct resource *data_res;
 	struct resource	*addr_req;   /* resources requested */
 	struct resource *data_req;
-	struct resource *irq_res;
 
 	int		 irq_wake;
 
@@ -1300,22 +1299,16 @@
 dm9000_open(struct net_device *dev)
 {
 	struct board_info *db = netdev_priv(dev);
-	unsigned long irqflags = db->irq_res->flags & IRQF_TRIGGER_MASK;
 
 	if (netif_msg_ifup(db))
 		dev_dbg(db->dev, "enabling %s\n", dev->name);
 
-	/* If there is no IRQ type specified, default to something that
-	 * may work, and tell the user that this is a problem */
-
-	if (irqflags == IRQF_TRIGGER_NONE)
-		irqflags = irq_get_trigger_type(dev->irq);
-
-	if (irqflags == IRQF_TRIGGER_NONE)
+	/* If there is no IRQ type specified, tell the user that this is a
+	 * problem
+	 */
+	if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE)
 		dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
 
-	irqflags |= IRQF_SHARED;
-
 	/* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
 	iow(db, DM9000_GPR, 0);	/* REG_1F bit0 activate phyxcer */
 	mdelay(1); /* delay needs by DM9000B */
@@ -1323,7 +1316,8 @@
 	/* Initialize DM9000 board */
 	dm9000_init_dm9000(dev);
 
-	if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev))
+	if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED,
+			dev->name, dev))
 		return -EAGAIN;
 	/* Now that we have an interrupt handler hooked up we can unmask
 	 * our interrupts
@@ -1500,15 +1494,22 @@
 
 	db->addr_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	db->data_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	db->irq_res  = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 
-	if (db->addr_res == NULL || db->data_res == NULL ||
-	    db->irq_res == NULL) {
-		dev_err(db->dev, "insufficient resources\n");
+	if (!db->addr_res || !db->data_res) {
+		dev_err(db->dev, "insufficient resources addr=%p data=%p\n",
+			db->addr_res, db->data_res);
 		ret = -ENOENT;
 		goto out;
 	}
 
+	ndev->irq = platform_get_irq(pdev, 0);
+	if (ndev->irq < 0) {
+		dev_err(db->dev, "interrupt resource unavailable: %d\n",
+			ndev->irq);
+		ret = ndev->irq;
+		goto out;
+	}
+
 	db->irq_wake = platform_get_irq(pdev, 1);
 	if (db->irq_wake >= 0) {
 		dev_dbg(db->dev, "wakeup irq %d\n", db->irq_wake);
@@ -1570,7 +1571,6 @@
 
 	/* fill in parameters for net-dev structure */
 	ndev->base_addr = (unsigned long)db->io_addr;
-	ndev->irq	= db->irq_res->start;
 
 	/* ensure at least we have a default set of IO routines */
 	dm9000_set_io(db, iosize);

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index cf83783..f975129 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h

@@ -531,6 +531,7 @@
 
 	struct delayed_work be_err_detection_work;
 	u8 err_flags;
+	bool pcicfg_mapped;	/* pcicfg obtained via pci_iomap() */
 	u32 flags;
 	u32 cmd_privileges;
 	/* Ethtool knobs and info */

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 241819b..6d9a8d7 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h

@@ -622,10 +622,13 @@
 					 BE_IF_FLAGS_VLAN_PROMISCUOUS |\
 					 BE_IF_FLAGS_MCAST_PROMISCUOUS)
 
-#define BE_IF_EN_FLAGS	(BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_PASS_L3L4_ERRORS |\
-			BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_UNTAGGED)
+#define BE_IF_FILT_FLAGS_BASIC (BE_IF_FLAGS_BROADCAST | \
+				BE_IF_FLAGS_PASS_L3L4_ERRORS | \
+				BE_IF_FLAGS_UNTAGGED)
 
-#define BE_IF_ALL_FILT_FLAGS	(BE_IF_EN_FLAGS | BE_IF_FLAGS_ALL_PROMISCUOUS)
+#define BE_IF_ALL_FILT_FLAGS	(BE_IF_FILT_FLAGS_BASIC | \
+				 BE_IF_FLAGS_MULTICAST | \
+				 BE_IF_FLAGS_ALL_PROMISCUOUS)
 
 /* An RX interface is an object with one or more MAC addresses and
  * filtering capabilities. */

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index f99de36..d1cf127 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c

@@ -125,6 +125,11 @@
 	"Unknown"
 };
 
+#define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
+				 BE_IF_FLAGS_BROADCAST | \
+				 BE_IF_FLAGS_MULTICAST | \
+				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
+
 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 {
 	struct be_dma_mem *mem = &q->dma_mem;
@@ -3537,7 +3542,7 @@
 {
 	int status;
 
-	status = be_cmd_rx_filter(adapter, BE_IF_EN_FLAGS, ON);
+	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
 	if (status)
 		return status;
 
@@ -3857,8 +3862,7 @@
 	int status;
 
 	/* If a FW profile exists, then cap_flags are updated */
-	cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
-		    BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS;
+	cap_flags = BE_VF_IF_EN_FLAGS;
 
 	for_all_vfs(adapter, vf_cfg, vf) {
 		if (!BE3_chip(adapter)) {
@@ -3874,10 +3878,8 @@
 			}
 		}
 
-		en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED |
-					BE_IF_FLAGS_BROADCAST |
-					BE_IF_FLAGS_MULTICAST |
-					BE_IF_FLAGS_PASS_L3L4_ERRORS);
+		/* PF should enable IF flags during proxy if_create call */
+		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
 					  &vf_cfg->if_handle, vf + 1);
 		if (status)
@@ -4968,6 +4970,8 @@
 		pci_iounmap(adapter->pdev, adapter->csr);
 	if (adapter->db)
 		pci_iounmap(adapter->pdev, adapter->db);
+	if (adapter->pcicfg && adapter->pcicfg_mapped)
+		pci_iounmap(adapter->pdev, adapter->pcicfg);
 }
 
 static int db_bar(struct be_adapter *adapter)
@@ -5019,8 +5023,10 @@
 			if (!addr)
 				goto pci_map_err;
 			adapter->pcicfg = addr;
+			adapter->pcicfg_mapped = true;
 		} else {
 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
+			adapter->pcicfg_mapped = false;
 		}
 	}
 

diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 62fa136..41b01064 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c

@@ -1265,7 +1265,6 @@
 
 		if (priv->mdio) {
 			mdiobus_unregister(priv->mdio);
-			kfree(priv->mdio->irq);
 			mdiobus_free(priv->mdio);
 		}
 		if (priv->clk)

diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 623aa1c..79a210a 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c

@@ -2791,6 +2791,8 @@
 		goto fman_free;
 	}
 
+	fman->dev = &of_dev->dev;
+
 	return fman;
 
 fman_node_put:
@@ -2845,8 +2847,6 @@
 
 	dev_set_drvdata(dev, fman);
 
-	fman->dev = dev;
-
 	dev_dbg(dev, "FMan%d probed\n", fman->dts_params.id);
 
 	return 0;

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 2aa7b40..b9ecf19 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c

@@ -1111,8 +1111,10 @@
 
 	if ((SVR_SOC_VER(svr) == SVR_8548) && (SVR_REV(svr) == 0x20))
 		priv->errata |= GFAR_ERRATA_12;
+	/* P2020/P1010 Rev 1; MPC8548 Rev 2 */
 	if (((SVR_SOC_VER(svr) == SVR_P2020) && (SVR_REV(svr) < 0x20)) ||
-	    ((SVR_SOC_VER(svr) == SVR_P2010) && (SVR_REV(svr) < 0x20)))
+	    ((SVR_SOC_VER(svr) == SVR_P2010) && (SVR_REV(svr) < 0x20)) ||
+	    ((SVR_SOC_VER(svr) == SVR_8548) && (SVR_REV(svr) < 0x31)))
 		priv->errata |= GFAR_ERRATA_76; /* aka eTSEC 20 */
 }
 #endif

diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index a7139f5..678f501 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c

@@ -469,8 +469,8 @@
 		    goto failed;
 	    }
 	    /* Read MACID from CIS */
-	    for (i = 5; i < 11; i++)
-		    dev->dev_addr[i] = buf[i];
+	    for (i = 0; i < 6; i++)
+		    dev->dev_addr[i] = buf[i + 5];
 	    kfree(buf);
 	} else {
 	    if (pcmcia_get_mac_from_cis(link, dev))

diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 74beb18..4ccc032 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig

@@ -25,6 +25,7 @@
 
 config HIP04_ETH
 	tristate "HISILICON P04 Ethernet support"
+	depends on HAS_IOMEM	# For MFD_SYSCON
 	select MARVELL_PHY
 	select MFD_SYSCON
 	select HNS_MDIO

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index a0070d0..d4f92ed 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c

@@ -675,8 +675,12 @@
 {
 	int ret;
 	struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
+	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
 
 	switch (loop) {
+	case MAC_INTERNALLOOP_PHY:
+		ret = 0;
+		break;
 	case MAC_INTERNALLOOP_SERDES:
 		ret = hns_mac_config_sds_loopback(vf_cb->mac_cb, en);
 		break;
@@ -686,6 +690,10 @@
 	default:
 		ret = -EINVAL;
 	}
+
+	if (!ret)
+		hns_dsaf_set_inner_lb(mac_cb->dsaf_dev, mac_cb->mac_id, en);
+
 	return ret;
 }
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 9439f04..38fc5be 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c

@@ -230,6 +230,30 @@
 	}
 }
 
+static void hns_dsaf_inner_qid_cfg(struct dsaf_device *dsaf_dev)
+{
+	u16 max_q_per_vf, max_vfn;
+	u32 q_id, q_num_per_port;
+	u32 mac_id;
+
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+		return;
+
+	hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode,
+			       HNS_DSAF_COMM_SERVICE_NW_IDX,
+			       &max_vfn, &max_q_per_vf);
+	q_num_per_port = max_vfn * max_q_per_vf;
+
+	for (mac_id = 0, q_id = 0; mac_id < DSAF_SERVICE_NW_NUM; mac_id++) {
+		dsaf_set_dev_field(dsaf_dev,
+				   DSAFV2_SERDES_LBK_0_REG + 4 * mac_id,
+				   DSAFV2_SERDES_LBK_QID_M,
+				   DSAFV2_SERDES_LBK_QID_S,
+				   q_id);
+		q_id += q_num_per_port;
+	}
+}
+
 /**
  * hns_dsaf_sw_port_type_cfg - cfg sw type
  * @dsaf_id: dsa fabric id
@@ -691,6 +715,16 @@
 	dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG, DSAF_CFG_MIX_MODE_S, !!en);
 }
 
+void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en)
+{
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver) ||
+	    dsaf_dev->mac_cb[mac_id].mac_type == HNAE_PORT_DEBUG)
+		return;
+
+	dsaf_set_dev_bit(dsaf_dev, DSAFV2_SERDES_LBK_0_REG + 4 * mac_id,
+			 DSAFV2_SERDES_LBK_EN_B, !!en);
+}
+
 /**
  * hns_dsaf_tbl_stat_en - tbl
  * @dsaf_id: dsa fabric id
@@ -1022,6 +1056,9 @@
 	/* set promisc def queue id */
 	hns_dsaf_mix_def_qid_cfg(dsaf_dev);
 
+	/* set inner loopback queue id */
+	hns_dsaf_inner_qid_cfg(dsaf_dev);
+
 	/* in non switch mode, set all port to access mode */
 	hns_dsaf_sw_port_type_cfg(dsaf_dev, DSAF_SW_PORT_TYPE_NON_VLAN);
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 40205b9..5fea226 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h

@@ -417,5 +417,6 @@
 void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data);
 int hns_dsaf_get_regs_count(void);
 void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en);
+void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en);
 
 #endif /* __HNS_DSAF_MAIN_H__ */

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index f0c4f9b0..60d695d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h

@@ -134,6 +134,7 @@
 #define DSAF_XGE_INT_STS_0_REG		0x1C0
 #define DSAF_PPE_INT_STS_0_REG		0x1E0
 #define DSAF_ROCEE_INT_STS_0_REG	0x200
+#define DSAFV2_SERDES_LBK_0_REG         0x220
 #define DSAF_PPE_QID_CFG_0_REG		0x300
 #define DSAF_SW_PORT_TYPE_0_REG		0x320
 #define DSAF_STP_PORT_TYPE_0_REG	0x340
@@ -857,6 +858,10 @@
 #define PPEV2_CFG_RSS_TBL_4N3_S	24
 #define PPEV2_CFG_RSS_TBL_4N3_M	(((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N3_S)
 
+#define DSAFV2_SERDES_LBK_EN_B  8
+#define DSAFV2_SERDES_LBK_QID_S 0
+#define DSAFV2_SERDES_LBK_QID_M	(((1UL << 8) - 1) << DSAFV2_SERDES_LBK_QID_S)
+
 #define PPE_CNT_CLR_CE_B	0
 #define PPE_CNT_CLR_SNAP_EN_B	1
 

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 3df2284..3c4a3bc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c

@@ -295,8 +295,10 @@
 
 	switch (loop) {
 	case MAC_INTERNALLOOP_PHY:
-		if ((phy_dev) && (!phy_dev->is_c45))
+		if ((phy_dev) && (!phy_dev->is_c45)) {
 			ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
+			ret |= h->dev->ops->set_loopback(h, loop, 0x1);
+		}
 		break;
 	case MAC_INTERNALLOOP_MAC:
 		if ((h->dev->ops->set_loopback) &&
@@ -376,6 +378,7 @@
 			       struct sk_buff *skb)
 {
 	struct net_device *ndev;
+	struct hns_nic_priv *priv;
 	struct hnae_ring *ring;
 	struct netdev_queue *dev_queue;
 	struct sk_buff *new_skb;
@@ -385,8 +388,17 @@
 	char buff[33]; /* 32B data and the last character '\0' */
 
 	if (!ring_data) { /* Just for doing create frame*/
+		ndev = skb->dev;
+		priv = netdev_priv(ndev);
+
 		frame_size = skb->len;
 		memset(skb->data, 0xFF, frame_size);
+		if ((!AE_IS_VER1(priv->enet_ver)) &&
+		    (priv->ae_handle->port_type == HNAE_PORT_SERVICE)) {
+			memcpy(skb->data, ndev->dev_addr, 6);
+			skb->data[5] += 0x1f;
+		}
+
 		frame_size &= ~1ul;
 		memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1);
 		memset(&skb->data[frame_size / 2 + 10], 0xBE,
@@ -486,6 +498,7 @@
 
 	/* place data into test skb */
 	(void)skb_put(skb, size);
+	skb->dev = ndev;
 	__lb_other_process(NULL, skb);
 	skb->queue_mapping = NIC_LB_TEST_RING_ID;
 

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 335417b..ebe6071 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c

@@ -1166,7 +1166,10 @@
 	if (!firmware_has_feature(FW_FEATURE_CMO))
 		netdev_err(netdev, "tx: unable to map xmit buffer\n");
 	adapter->tx_map_failed++;
-	skb_linearize(skb);
+	if (skb_linearize(skb)) {
+		netdev->stats.tx_dropped++;
+		goto out;
+	}
 	force_bounce = 1;
 	goto retry_bounce;
 }

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7d657084..6e9e16ee 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c

@@ -1348,44 +1348,44 @@
 	crq.request_capability.cmd = REQUEST_CAPABILITY;
 
 	crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES);
-	crq.request_capability.number = cpu_to_be32(adapter->req_tx_queues);
+	crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES);
-	crq.request_capability.number = cpu_to_be32(adapter->req_rx_queues);
+	crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES);
-	crq.request_capability.number = cpu_to_be32(adapter->req_rx_add_queues);
+	crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability =
 	    cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ);
 	crq.request_capability.number =
-	    cpu_to_be32(adapter->req_tx_entries_per_subcrq);
+	    cpu_to_be64(adapter->req_tx_entries_per_subcrq);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability =
 	    cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ);
 	crq.request_capability.number =
-	    cpu_to_be32(adapter->req_rx_add_entries_per_subcrq);
+	    cpu_to_be64(adapter->req_rx_add_entries_per_subcrq);
 	ibmvnic_send_crq(adapter, &crq);
 
 	crq.request_capability.capability = cpu_to_be16(REQ_MTU);
-	crq.request_capability.number = cpu_to_be32(adapter->req_mtu);
+	crq.request_capability.number = cpu_to_be64(adapter->req_mtu);
 	ibmvnic_send_crq(adapter, &crq);
 
 	if (adapter->netdev->flags & IFF_PROMISC) {
 		if (adapter->promisc_supported) {
 			crq.request_capability.capability =
 			    cpu_to_be16(PROMISC_REQUESTED);
-			crq.request_capability.number = cpu_to_be32(1);
+			crq.request_capability.number = cpu_to_be64(1);
 			ibmvnic_send_crq(adapter, &crq);
 		}
 	} else {
 		crq.request_capability.capability =
 		    cpu_to_be16(PROMISC_REQUESTED);
-		crq.request_capability.number = cpu_to_be32(0);
+		crq.request_capability.number = cpu_to_be64(0);
 		ibmvnic_send_crq(adapter, &crq);
 	}
 
@@ -2312,93 +2312,93 @@
 	switch (be16_to_cpu(crq->query_capability.capability)) {
 	case MIN_TX_QUEUES:
 		adapter->min_tx_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_tx_queues = %lld\n",
 			   adapter->min_tx_queues);
 		break;
 	case MIN_RX_QUEUES:
 		adapter->min_rx_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_rx_queues = %lld\n",
 			   adapter->min_rx_queues);
 		break;
 	case MIN_RX_ADD_QUEUES:
 		adapter->min_rx_add_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_rx_add_queues = %lld\n",
 			   adapter->min_rx_add_queues);
 		break;
 	case MAX_TX_QUEUES:
 		adapter->max_tx_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_tx_queues = %lld\n",
 			   adapter->max_tx_queues);
 		break;
 	case MAX_RX_QUEUES:
 		adapter->max_rx_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_rx_queues = %lld\n",
 			   adapter->max_rx_queues);
 		break;
 	case MAX_RX_ADD_QUEUES:
 		adapter->max_rx_add_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_rx_add_queues = %lld\n",
 			   adapter->max_rx_add_queues);
 		break;
 	case MIN_TX_ENTRIES_PER_SUBCRQ:
 		adapter->min_tx_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_tx_entries_per_subcrq = %lld\n",
 			   adapter->min_tx_entries_per_subcrq);
 		break;
 	case MIN_RX_ADD_ENTRIES_PER_SUBCRQ:
 		adapter->min_rx_add_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_rx_add_entrs_per_subcrq = %lld\n",
 			   adapter->min_rx_add_entries_per_subcrq);
 		break;
 	case MAX_TX_ENTRIES_PER_SUBCRQ:
 		adapter->max_tx_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_tx_entries_per_subcrq = %lld\n",
 			   adapter->max_tx_entries_per_subcrq);
 		break;
 	case MAX_RX_ADD_ENTRIES_PER_SUBCRQ:
 		adapter->max_rx_add_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_rx_add_entrs_per_subcrq = %lld\n",
 			   adapter->max_rx_add_entries_per_subcrq);
 		break;
 	case TCP_IP_OFFLOAD:
 		adapter->tcp_ip_offload =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "tcp_ip_offload = %lld\n",
 			   adapter->tcp_ip_offload);
 		break;
 	case PROMISC_SUPPORTED:
 		adapter->promisc_supported =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "promisc_supported = %lld\n",
 			   adapter->promisc_supported);
 		break;
 	case MIN_MTU:
-		adapter->min_mtu = be32_to_cpu(crq->query_capability.number);
+		adapter->min_mtu = be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu);
 		break;
 	case MAX_MTU:
-		adapter->max_mtu = be32_to_cpu(crq->query_capability.number);
+		adapter->max_mtu = be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu);
 		break;
 	case MAX_MULTICAST_FILTERS:
 		adapter->max_multicast_filters =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_multicast_filters = %lld\n",
 			   adapter->max_multicast_filters);
 		break;
 	case VLAN_HEADER_INSERTION:
 		adapter->vlan_header_insertion =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		if (adapter->vlan_header_insertion)
 			netdev->features |= NETIF_F_HW_VLAN_STAG_TX;
 		netdev_dbg(netdev, "vlan_header_insertion = %lld\n",
@@ -2406,43 +2406,43 @@
 		break;
 	case MAX_TX_SG_ENTRIES:
 		adapter->max_tx_sg_entries =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "max_tx_sg_entries = %lld\n",
 			   adapter->max_tx_sg_entries);
 		break;
 	case RX_SG_SUPPORTED:
 		adapter->rx_sg_supported =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "rx_sg_supported = %lld\n",
 			   adapter->rx_sg_supported);
 		break;
 	case OPT_TX_COMP_SUB_QUEUES:
 		adapter->opt_tx_comp_sub_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_tx_comp_sub_queues = %lld\n",
 			   adapter->opt_tx_comp_sub_queues);
 		break;
 	case OPT_RX_COMP_QUEUES:
 		adapter->opt_rx_comp_queues =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_rx_comp_queues = %lld\n",
 			   adapter->opt_rx_comp_queues);
 		break;
 	case OPT_RX_BUFADD_Q_PER_RX_COMP_Q:
 		adapter->opt_rx_bufadd_q_per_rx_comp_q =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_rx_bufadd_q_per_rx_comp_q = %lld\n",
 			   adapter->opt_rx_bufadd_q_per_rx_comp_q);
 		break;
 	case OPT_TX_ENTRIES_PER_SUBCRQ:
 		adapter->opt_tx_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_tx_entries_per_subcrq = %lld\n",
 			   adapter->opt_tx_entries_per_subcrq);
 		break;
 	case OPT_RXBA_ENTRIES_PER_SUBCRQ:
 		adapter->opt_rxba_entries_per_subcrq =
-		    be32_to_cpu(crq->query_capability.number);
+		    be64_to_cpu(crq->query_capability.number);
 		netdev_dbg(netdev, "opt_rxba_entries_per_subcrq = %lld\n",
 			   adapter->opt_rxba_entries_per_subcrq);
 		break;

diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 1242925..1a9993c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h

@@ -319,10 +319,8 @@
 	u8 first;
 	u8 cmd;
 	__be16 capability; /* one of ibmvnic_capabilities */
+	__be64 number;
 	struct ibmvnic_rc rc;
-	__be32 number; /*FIX: should be __be64, but I'm getting the least
-			* significant word first
-			*/
 } __packed __aligned(8);
 
 struct ibmvnic_login {

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index fa593dd..3772f3a 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig

@@ -83,6 +83,15 @@
 	  To compile this driver as a module, choose M here. The module
 	  will be called e1000e.
 
+config E1000E_HWTS
+	bool "Support HW cross-timestamp on PCH devices"
+	default y
+	depends on E1000E && X86
+	---help---
+	 Say Y to enable hardware supported cross-timestamping on PCH
+	 devices. The cross-timestamp is available through the PTP clock
+	 driver precise cross-timestamp ioctl (PTP_SYS_OFFSET_PRECISE).
+
 config IGB
 	tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support"
 	depends on PCI

diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index f7c7804..0641c00 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h

@@ -528,6 +528,11 @@
 #define E1000_RXCW_C          0x20000000        /* Receive config */
 #define E1000_RXCW_SYNCH      0x40000000        /* Receive config synch */
 
+/* HH Time Sync */
+#define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK	0x0000F000 /* max delay */
+#define E1000_TSYNCTXCTL_SYNC_COMP		0x40000000 /* sync complete */
+#define E1000_TSYNCTXCTL_START_SYNC		0x80000000 /* initiate sync */
+
 #define E1000_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
 #define E1000_TSYNCTXCTL_ENABLED	0x00000010 /* enable Tx timestamping */
 

diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 25a0ad5..e2ff3ef 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c

@@ -26,6 +26,12 @@
 
 #include "e1000.h"
 
+#ifdef CONFIG_E1000E_HWTS
+#include <linux/clocksource.h>
+#include <linux/ktime.h>
+#include <asm/tsc.h>
+#endif
+
 /**
  * e1000e_phc_adjfreq - adjust the frequency of the hardware clock
  * @ptp: ptp clock structure
@@ -98,6 +104,78 @@
 	return 0;
 }
 
+#ifdef CONFIG_E1000E_HWTS
+#define MAX_HW_WAIT_COUNT (3)
+
+/**
+ * e1000e_phc_get_syncdevicetime - Callback given to timekeeping code reads system/device registers
+ * @device: current device time
+ * @system: system counter value read synchronously with device time
+ * @ctx: context provided by timekeeping code
+ *
+ * Read device and system (ART) clock simultaneously and return the corrected
+ * clock values in ns.
+ **/
+static int e1000e_phc_get_syncdevicetime(ktime_t *device,
+					 struct system_counterval_t *system,
+					 void *ctx)
+{
+	struct e1000_adapter *adapter = (struct e1000_adapter *)ctx;
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long flags;
+	int i;
+	u32 tsync_ctrl;
+	cycle_t dev_cycles;
+	cycle_t sys_cycles;
+
+	tsync_ctrl = er32(TSYNCTXCTL);
+	tsync_ctrl |= E1000_TSYNCTXCTL_START_SYNC |
+		E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK;
+	ew32(TSYNCTXCTL, tsync_ctrl);
+	for (i = 0; i < MAX_HW_WAIT_COUNT; ++i) {
+		udelay(1);
+		tsync_ctrl = er32(TSYNCTXCTL);
+		if (tsync_ctrl & E1000_TSYNCTXCTL_SYNC_COMP)
+			break;
+	}
+
+	if (i == MAX_HW_WAIT_COUNT)
+		return -ETIMEDOUT;
+
+	dev_cycles = er32(SYSSTMPH);
+	dev_cycles <<= 32;
+	dev_cycles |= er32(SYSSTMPL);
+	spin_lock_irqsave(&adapter->systim_lock, flags);
+	*device = ns_to_ktime(timecounter_cyc2time(&adapter->tc, dev_cycles));
+	spin_unlock_irqrestore(&adapter->systim_lock, flags);
+
+	sys_cycles = er32(PLTSTMPH);
+	sys_cycles <<= 32;
+	sys_cycles |= er32(PLTSTMPL);
+	*system = convert_art_to_tsc(sys_cycles);
+
+	return 0;
+}
+
+/**
+ * e1000e_phc_getsynctime - Reads the current system/device cross timestamp
+ * @ptp: ptp clock structure
+ * @cts: structure containing timestamp
+ *
+ * Read device and system (ART) clock simultaneously and return the scaled
+ * clock values in ns.
+ **/
+static int e1000e_phc_getcrosststamp(struct ptp_clock_info *ptp,
+				     struct system_device_crosststamp *xtstamp)
+{
+	struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter,
+						     ptp_clock_info);
+
+	return get_device_system_crosststamp(e1000e_phc_get_syncdevicetime,
+						adapter, NULL, xtstamp);
+}
+#endif/*CONFIG_E1000E_HWTS*/
+
 /**
  * e1000e_phc_gettime - Reads the current time from the hardware clock
  * @ptp: ptp clock structure
@@ -236,6 +314,13 @@
 		break;
 	}
 
+#ifdef CONFIG_E1000E_HWTS
+	/* CPU must have ART and GBe must be from Sunrise Point or greater */
+	if (hw->mac.type >= e1000_pch_spt && boot_cpu_has(X86_FEATURE_ART))
+		adapter->ptp_clock_info.getcrosststamp =
+			e1000e_phc_getcrosststamp;
+#endif/*CONFIG_E1000E_HWTS*/
+
 	INIT_DELAYED_WORK(&adapter->systim_overflow_work,
 			  e1000e_systim_overflow_work);
 

diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
index 1d5e0b7..0cb4d36 100644
--- a/drivers/net/ethernet/intel/e1000e/regs.h
+++ b/drivers/net/ethernet/intel/e1000e/regs.h

@@ -245,6 +245,10 @@
 #define E1000_SYSTIML	0x0B600	/* System time register Low - RO */
 #define E1000_SYSTIMH	0x0B604	/* System time register High - RO */
 #define E1000_TIMINCA	0x0B608	/* Increment attributes register - RW */
+#define E1000_SYSSTMPL  0x0B648 /* HH Timesync system stamp low register */
+#define E1000_SYSSTMPH  0x0B64C /* HH Timesync system stamp hi register */
+#define E1000_PLTSTMPL  0x0B640 /* HH Timesync platform stamp low register */
+#define E1000_PLTSTMPH  0x0B644 /* HH Timesync platform stamp hi register */
 #define E1000_RXMTRL	0x0B634	/* Time sync Rx EtherType and Msg Type - RW */
 #define E1000_RXUDP	0x0B638	/* Time Sync Rx UDP Port - RW */
 

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index b1de7af..3ddf657 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c

@@ -270,11 +270,17 @@
 }
 
 static inline void
-jme_clear_pm(struct jme_adapter *jme)
+jme_clear_pm_enable_wol(struct jme_adapter *jme)
 {
 	jwrite32(jme, JME_PMCS, PMCS_STMASK | jme->reg_pmcs);
 }
 
+static inline void
+jme_clear_pm_disable_wol(struct jme_adapter *jme)
+{
+	jwrite32(jme, JME_PMCS, PMCS_STMASK);
+}
+
 static int
 jme_reload_eeprom(struct jme_adapter *jme)
 {
@@ -1853,7 +1859,7 @@
 	struct jme_adapter *jme = netdev_priv(netdev);
 	int rc;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	JME_NAPI_ENABLE(jme);
 
 	tasklet_init(&jme->linkch_task, jme_link_change_tasklet,
@@ -1925,11 +1931,11 @@
 static void
 jme_powersave_phy(struct jme_adapter *jme)
 {
-	if (jme->reg_pmcs) {
+	if (jme->reg_pmcs && device_may_wakeup(&jme->pdev->dev)) {
 		jme_set_100m_half(jme);
 		if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN))
 			jme_wait_link(jme);
-		jme_clear_pm(jme);
+		jme_clear_pm_enable_wol(jme);
 	} else {
 		jme_phy_off(jme);
 	}
@@ -2646,9 +2652,6 @@
 	if (wol->wolopts & WAKE_MAGIC)
 		jme->reg_pmcs |= PMCS_MFEN;
 
-	jwrite32(jme, JME_PMCS, jme->reg_pmcs);
-	device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs));
-
 	return 0;
 }
 
@@ -3172,8 +3175,8 @@
 	jme->mii_if.mdio_read = jme_mdio_read;
 	jme->mii_if.mdio_write = jme_mdio_write;
 
-	jme_clear_pm(jme);
-	device_set_wakeup_enable(&pdev->dev, true);
+	jme_clear_pm_disable_wol(jme);
+	device_init_wakeup(&pdev->dev, true);
 
 	jme_set_phyfifo_5level(jme);
 	jme->pcirev = pdev->revision;
@@ -3304,7 +3307,7 @@
 	if (!netif_running(netdev))
 		return 0;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	jme_phy_on(jme);
 	if (test_bit(JME_FLAG_SSET, &jme->flags))
 		jme_set_settings(netdev, &jme->old_ecmd);
@@ -3312,13 +3315,14 @@
 		jme_reset_phy_processor(jme);
 	jme_phy_calibration(jme);
 	jme_phy_setEA(jme);
-	jme_start_irq(jme);
 	netif_device_attach(netdev);
 
 	atomic_inc(&jme->link_changing);
 
 	jme_reset_link(jme);
 
+	jme_start_irq(jme);
+
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 662c2ee..b0ae69f 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c

@@ -370,6 +370,11 @@
 	struct net_device *dev;
 	struct notifier_block cpu_notifier;
 	int rxq_def;
+	/* Protect the access to the percpu interrupt registers,
+	 * ensuring that the configuration remains coherent.
+	 */
+	spinlock_t lock;
+	bool is_stopped;
 
 	/* Core clock */
 	struct clk *clk;
@@ -1038,6 +1043,43 @@
 	}
 }
 
+static void mvneta_percpu_unmask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are unmasked, but actually only the ones
+	 * mapped to this CPU will be unmasked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+		    MVNETA_RX_INTR_MASK_ALL |
+		    MVNETA_TX_INTR_MASK_ALL |
+		    MVNETA_MISCINTR_INTR_MASK);
+}
+
+static void mvneta_percpu_mask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are masked, but actually only the ones
+	 * mapped to this CPU will be masked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+}
+
+static void mvneta_percpu_clear_intr_cause(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are cleared, but actually only the ones
+	 * mapped to this CPU will be cleared
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+}
+
 /* This method sets defaults to the NETA port:
  *	Clears interrupt Cause and Mask registers.
  *	Clears all MAC tables.
@@ -1055,14 +1097,10 @@
 	int max_cpu = num_present_cpus();
 
 	/* Clear all Cause registers */
-	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
 	/* Mask all interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 	mvreg_write(pp, MVNETA_INTR_ENABLE, 0);
 
 	/* Enable MBUS Retry bit16 */
@@ -2528,34 +2566,9 @@
 	return 0;
 }
 
-static void mvneta_percpu_unmask_interrupt(void *arg)
-{
-	struct mvneta_port *pp = arg;
-
-	/* All the queue are unmasked, but actually only the ones
-	 * maped to this CPU will be unmasked
-	 */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-		    MVNETA_RX_INTR_MASK_ALL |
-		    MVNETA_TX_INTR_MASK_ALL |
-		    MVNETA_MISCINTR_INTR_MASK);
-}
-
-static void mvneta_percpu_mask_interrupt(void *arg)
-{
-	struct mvneta_port *pp = arg;
-
-	/* All the queue are masked, but actually only the ones
-	 * maped to this CPU will be masked
-	 */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
-}
-
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
-	unsigned int cpu;
+	int cpu;
 
 	mvneta_max_rx_size_set(pp, pp->pkt_size);
 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
@@ -2564,16 +2577,15 @@
 	mvneta_port_enable(pp);
 
 	/* Enable polling on the port */
-	for_each_present_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
 		napi_enable(&port->napi);
 	}
 
 	/* Unmask interrupts. It has to be done from each CPU */
-	for_each_online_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
-					 pp, true);
+	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+
 	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
 		    MVNETA_CAUSE_LINK_CHANGE |
@@ -2589,7 +2601,7 @@
 
 	phy_stop(pp->phy_dev);
 
-	for_each_present_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
 		napi_disable(&port->napi);
@@ -2604,13 +2616,10 @@
 	mvneta_port_disable(pp);
 
 	/* Clear all ethernet port interrupts */
-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
 	/* Mask all ethernet port interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 
 	mvneta_tx_reset(pp);
 	mvneta_rx_reset(pp);
@@ -2847,11 +2856,20 @@
 	disable_percpu_irq(pp->dev->irq);
 }
 
+/* Electing a CPU must be done in an atomic way: it should be done
+ * after or before the removal/insertion of a CPU and this function is
+ * not reentrant.
+ */
 static void mvneta_percpu_elect(struct mvneta_port *pp)
 {
-	int online_cpu_idx, max_cpu, cpu, i = 0;
+	int elected_cpu = 0, max_cpu, cpu, i = 0;
 
-	online_cpu_idx = pp->rxq_def % num_online_cpus();
+	/* Use the cpu associated to the rxq when it is online, in all
+	 * the other cases, use the cpu 0 which can't be offline.
+	 */
+	if (cpu_online(pp->rxq_def))
+		elected_cpu = pp->rxq_def;
+
 	max_cpu = num_present_cpus();
 
 	for_each_online_cpu(cpu) {
@@ -2862,7 +2880,7 @@
 			if ((rxq % max_cpu) == cpu)
 				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
 
-		if (i == online_cpu_idx)
+		if (cpu == elected_cpu)
 			/* Map the default receive queue queue to the
 			 * elected CPU
 			 */
@@ -2873,7 +2891,7 @@
 		 * the CPU bound to the default RX queue
 		 */
 		if (txq_number == 1)
-			txq_map = (i == online_cpu_idx) ?
+			txq_map = (cpu == elected_cpu) ?
 				MVNETA_CPU_TXQ_ACCESS(1) : 0;
 		else
 			txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
@@ -2902,6 +2920,14 @@
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
+		spin_lock(&pp->lock);
+		/* Configuring the driver for a new CPU while the
+		 * driver is stopping is racy, so just avoid it.
+		 */
+		if (pp->is_stopped) {
+			spin_unlock(&pp->lock);
+			break;
+		}
 		netif_tx_stop_all_queues(pp->dev);
 
 		/* We have to synchronise on tha napi of each CPU
@@ -2917,9 +2943,7 @@
 		}
 
 		/* Mask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 		napi_enable(&port->napi);
 
 
@@ -2934,27 +2958,25 @@
 		 */
 		mvneta_percpu_elect(pp);
 
-		/* Unmask all ethernet port interrupts, as this
-		 * notifier is called for each CPU then the CPU to
-		 * Queue mapping is applied
-		 */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			MVNETA_RX_INTR_MASK(rxq_number) |
-			MVNETA_TX_INTR_MASK(txq_number) |
-			MVNETA_MISCINTR_INTR_MASK);
+		/* Unmask all ethernet port interrupts */
+		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 			MVNETA_CAUSE_PHY_STATUS_CHANGE |
 			MVNETA_CAUSE_LINK_CHANGE |
 			MVNETA_CAUSE_PSC_SYNC_CHANGE);
 		netif_tx_start_all_queues(pp->dev);
+		spin_unlock(&pp->lock);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		netif_tx_stop_all_queues(pp->dev);
+		/* Thanks to this lock we are sure that any pending
+		 * cpu election is done
+		 */
+		spin_lock(&pp->lock);
 		/* Mask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+		spin_unlock(&pp->lock);
 
 		napi_synchronize(&port->napi);
 		napi_disable(&port->napi);
@@ -2968,12 +2990,11 @@
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		/* Check if a new CPU must be elected now this on is down */
+		spin_lock(&pp->lock);
 		mvneta_percpu_elect(pp);
+		spin_unlock(&pp->lock);
 		/* Unmask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			MVNETA_RX_INTR_MASK(rxq_number) |
-			MVNETA_TX_INTR_MASK(txq_number) |
-			MVNETA_MISCINTR_INTR_MASK);
+		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 			MVNETA_CAUSE_PHY_STATUS_CHANGE |
 			MVNETA_CAUSE_LINK_CHANGE |
@@ -2988,7 +3009,7 @@
 static int mvneta_open(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
-	int ret, cpu;
+	int ret;
 
 	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
@@ -3010,22 +3031,12 @@
 		goto err_cleanup_txqs;
 	}
 
-	/* Even though the documentation says that request_percpu_irq
-	 * doesn't enable the interrupts automatically, it actually
-	 * does so on the local CPU.
-	 *
-	 * Make sure it's disabled.
-	 */
-	mvneta_percpu_disable(pp);
-
 	/* Enable per-CPU interrupt on all the CPU to handle our RX
 	 * queue interrupts
 	 */
-	for_each_online_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_enable,
-					 pp, true);
+	on_each_cpu(mvneta_percpu_enable, pp, true);
 
-
+	pp->is_stopped = false;
 	/* Register a CPU notifier to handle the case where our CPU
 	 * might be taken offline.
 	 */
@@ -3057,13 +3068,20 @@
 static int mvneta_stop(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
-	int cpu;
 
+	/* Inform that we are stopping so we don't want to setup the
+	 * driver for new CPUs in the notifiers
+	 */
+	spin_lock(&pp->lock);
+	pp->is_stopped = true;
 	mvneta_stop_dev(pp);
 	mvneta_mdio_remove(pp);
 	unregister_cpu_notifier(&pp->cpu_notifier);
-	for_each_present_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_disable, pp, true);
+	/* Now that the notifier are unregistered, we can release le
+	 * lock
+	 */
+	spin_unlock(&pp->lock);
+	on_each_cpu(mvneta_percpu_disable, pp, true);
 	free_percpu_irq(dev->irq, pp->ports);
 	mvneta_cleanup_rxqs(pp);
 	mvneta_cleanup_txqs(pp);
@@ -3312,9 +3330,7 @@
 
 	netif_tx_stop_all_queues(pp->dev);
 
-	for_each_online_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_mask_interrupt,
-					 pp, true);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 
 	/* We have to synchronise on the napi of each CPU */
 	for_each_online_cpu(cpu) {
@@ -3335,7 +3351,9 @@
 	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
 
 	/* Update the elected CPU matching the new rxq_def */
+	spin_lock(&pp->lock);
 	mvneta_percpu_elect(pp);
+	spin_unlock(&pp->lock);
 
 	/* We have to synchronise on the napi of each CPU */
 	for_each_online_cpu(cpu) {

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index a4beccf..c797971a 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c

@@ -3061,7 +3061,7 @@
 
 		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
 		if (!pe)
-			return -1;
+			return -ENOMEM;
 		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
 		pe->index = tid;
 
@@ -3077,7 +3077,7 @@
 	if (pmap == 0) {
 		if (add) {
 			kfree(pe);
-			return -1;
+			return -EINVAL;
 		}
 		mvpp2_prs_hw_inv(priv, pe->index);
 		priv->prs_shadow[pe->index].valid = false;

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 715de8a..c7e9399 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c

@@ -182,10 +182,17 @@
 		err = mlx4_reset_slave(dev);
 	else
 		err = mlx4_reset_master(dev);
-	BUG_ON(err != 0);
 
+	if (!err) {
+		mlx4_err(dev, "device was reset successfully\n");
+	} else {
+		/* EEH could have disabled the PCI channel during reset. That's
+		 * recoverable and the PCI error flow will handle it.
+		 */
+		if (!pci_channel_offline(dev->persist->pdev))
+			BUG_ON(1);
+	}
 	dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
-	mlx4_err(dev, "device was reset successfully\n");
 	mutex_unlock(&persist->device_state_mutex);
 
 	/* At that step HW was already reset, now notify clients */

diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 3348e64..a849da9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c

@@ -318,7 +318,9 @@
 	if (timestamp_en)
 		cq_context->flags  |= cpu_to_be32(1 << 19);
 
-	cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
+	cq_context->logsize_usrpage =
+		cpu_to_be32((ilog2(nent) << 24) |
+			    mlx4_to_hw_uar_index(dev, uar->index));
 	cq_context->comp_eqn	    = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn;
 	cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 038f9ce..1494997 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c

@@ -236,6 +236,24 @@
 	.enable		= mlx4_en_phc_enable,
 };
 
+#define MLX4_EN_WRAP_AROUND_SEC	10ULL
+
+/* This function calculates the max shift that enables the user range
+ * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register.
+ */
+static u32 freq_to_shift(u16 freq)
+{
+	u32 freq_khz = freq * 1000;
+	u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
+	u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ?
+		max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1;
+	/* calculate max possible multiplier in order to fit in 64bit */
+	u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded);
+
+	/* This comes from the reverse of clocksource_khz2mult */
+	return ilog2(div_u64(max_mul * freq_khz, 1000000));
+}
+
 void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
 {
 	struct mlx4_dev *dev = mdev->dev;
@@ -254,12 +272,7 @@
 	memset(&mdev->cycles, 0, sizeof(mdev->cycles));
 	mdev->cycles.read = mlx4_en_read_clock;
 	mdev->cycles.mask = CLOCKSOURCE_MASK(48);
-	/* Using shift to make calculation more accurate. Since current HW
-	 * clock frequency is 427 MHz, and cycles are given using a 48 bits
-	 * register, the biggest shift when calculating using u64, is 14
-	 * (max_cycles * multiplier < 2^64)
-	 */
-	mdev->cycles.shift = 14;
+	mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock);
 	mdev->cycles.mult =
 		clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
 	mdev->nominal_c_mult = mdev->cycles.mult;

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0c7e3f6..21e2c09 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c

@@ -2245,7 +2245,7 @@
 	struct mlx4_en_dev *mdev = en_priv->mdev;
 	u64 mac_u64 = mlx4_mac_to_u64(mac);
 
-	if (!is_valid_ether_addr(mac))
+	if (is_multicast_ether_addr(mac))
 		return -EINVAL;
 
 	return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64);
@@ -2344,8 +2344,6 @@
 	/* set offloads */
 	priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
-	priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
-	priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
 }
 
 static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
@@ -2356,8 +2354,6 @@
 	/* unset offloads */
 	priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 				      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL);
-	priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
-	priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
 
 	ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
 				  VXLAN_STEER_BY_OUTER_MAC, 0);
@@ -2980,6 +2976,11 @@
 		priv->rss_hash_fn = ETH_RSS_HASH_TOP;
 	}
 
+	if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+		dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+		dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
+	}
+
 	mdev->pndev[port] = dev;
 	mdev->upper[port] = NULL;
 

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index ee99e67..3904b5f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c

@@ -238,11 +238,11 @@
 	stats->collisions = 0;
 	stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP);
 	stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
-	stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+	stats->rx_over_errors = 0;
 	stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
 	stats->rx_frame_errors = 0;
 	stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
-	stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+	stats->rx_missed_errors = 0;
 	stats->tx_aborted_errors = 0;
 	stats->tx_carrier_errors = 0;
 	stats->tx_fifo_errors = 0;

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 12aab5a..02e925d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c

@@ -58,7 +58,8 @@
 	} else {
 		context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
 	}
-	context->usr_page = cpu_to_be32(mdev->priv_uar.index);
+	context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
+					mdev->priv_uar.index));
 	context->local_qpn = cpu_to_be32(qpn);
 	context->pri_path.ackto = 1 & 0x07;
 	context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4421bf5..e0946ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c

@@ -213,7 +213,9 @@
 	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
 				ring->cqn, user_prio, &ring->context);
 	if (ring->bf_alloced)
-		ring->context.usr_page = cpu_to_be32(ring->bf.uar->index);
+		ring->context.usr_page =
+			cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
+							 ring->bf.uar->index));
 
 	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
 			       &ring->qp, &ring->qp_state);

diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 4696053..f613977 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c

@@ -940,9 +940,10 @@
 
 	if (!priv->eq_table.uar_map[index]) {
 		priv->eq_table.uar_map[index] =
-			ioremap(pci_resource_start(dev->persist->pdev, 2) +
-				((eq->eqn / 4) << PAGE_SHIFT),
-				PAGE_SIZE);
+			ioremap(
+				pci_resource_start(dev->persist->pdev, 2) +
+				((eq->eqn / 4) << (dev->uar_page_shift)),
+				(1 << (dev->uar_page_shift)));
 		if (!priv->eq_table.uar_map[index]) {
 			mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
 				 eq->eqn);

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index f1b6d21..f8674ae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c

@@ -168,6 +168,20 @@
 
 static atomic_t pf_loading = ATOMIC_INIT(0);
 
+static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
+					      struct mlx4_dev_cap *dev_cap)
+{
+	/* The reserved_uars is calculated by system page size unit.
+	 * Therefore, adjustment is added when the uar page size is less
+	 * than the system page size
+	 */
+	dev->caps.reserved_uars	=
+		max_t(int,
+		      mlx4_get_num_reserved_uar(dev),
+		      dev_cap->reserved_uars /
+			(1 << (PAGE_SHIFT - dev->uar_page_shift)));
+}
+
 int mlx4_check_port_params(struct mlx4_dev *dev,
 			   enum mlx4_port_type *port_type)
 {
@@ -386,8 +400,6 @@
 	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
 
-	/* The first 128 UARs are used for EQ doorbells */
-	dev->caps.reserved_uars	     = max_t(int, 128, dev_cap->reserved_uars);
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
 	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 					dev_cap->reserved_xrcds : 0;
@@ -405,6 +417,15 @@
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 
+	/* Save uar page shift */
+	if (!mlx4_is_slave(dev)) {
+		/* Virtual PCI function needs to determine UAR page size from
+		 * firmware. Only master PCI function can set the uar page size
+		 */
+		dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
+		mlx4_set_num_reserved_uars(dev, dev_cap);
+	}
+
 	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
 		struct mlx4_init_hca_param hca_param;
 
@@ -815,16 +836,25 @@
 		return -ENODEV;
 	}
 
-	/* slave gets uar page size from QUERY_HCA fw command */
-	dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
+	/* Set uar_page_shift for VF */
+	dev->uar_page_shift = hca_param.uar_page_sz + 12;
 
-	/* TODO: relax this assumption */
-	if (dev->caps.uar_page_size != PAGE_SIZE) {
-		mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
-			 dev->caps.uar_page_size, PAGE_SIZE);
-		return -ENODEV;
+	/* Make sure the master uar page size is valid */
+	if (dev->uar_page_shift > PAGE_SHIFT) {
+		mlx4_err(dev,
+			 "Invalid configuration: uar page size is larger than system page size\n");
+		return  -ENODEV;
 	}
 
+	/* Set reserved_uars based on the uar_page_shift */
+	mlx4_set_num_reserved_uars(dev, &dev_cap);
+
+	/* Although uar page size in FW differs from system page size,
+	 * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
+	 * still works with assumption that uar page size == system page size
+	 */
+	dev->caps.uar_page_size = PAGE_SIZE;
+
 	memset(&func_cap, 0, sizeof(func_cap));
 	err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
 	if (err) {
@@ -1226,6 +1256,7 @@
 static int mlx4_mf_bond(struct mlx4_dev *dev)
 {
 	int err = 0;
+	int nvfs;
 	struct mlx4_slaves_pport slaves_port1;
 	struct mlx4_slaves_pport slaves_port2;
 	DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
@@ -1242,11 +1273,18 @@
 		return -EINVAL;
 	}
 
+	/* number of virtual functions is number of total functions minus one
+	 * physical function for each port.
+	 */
+	nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
+		bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
+
 	/* limit on maximum allowed VFs */
-	if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
-	    bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) >
-	    MAX_MF_BOND_ALLOWED_SLAVES)
+	if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
+		mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
+			  nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
 		return -EINVAL;
+	}
 
 	if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
 		mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
@@ -2179,8 +2217,12 @@
 
 		dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
 
-		init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
-		init_hca.uar_page_sz = PAGE_SHIFT - 12;
+		/* Always set UAR page size 4KB, set log_uar_sz accordingly */
+		init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
+				      PAGE_SHIFT -
+				      DEFAULT_UAR_PAGE_SHIFT;
+		init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
+
 		init_hca.mw_enabled = 0;
 		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
 		    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)

diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 609c59d..b3cc3ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c

@@ -269,9 +269,15 @@
 
 int mlx4_init_uar_table(struct mlx4_dev *dev)
 {
-	if (dev->caps.num_uars <= 128) {
-		mlx4_err(dev, "Only %d UAR pages (need more than 128)\n",
-			 dev->caps.num_uars);
+	int num_reserved_uar = mlx4_get_num_reserved_uar(dev);
+
+	mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift);
+	mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars);
+
+	if (dev->caps.num_uars <= num_reserved_uar) {
+		mlx4_err(
+			dev, "Only %d UAR pages (need more than %d)\n",
+			dev->caps.num_uars, num_reserved_uar);
 		mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
 		return -ENODEV;
 	}

diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 787b7bb..211c650 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c

@@ -193,10 +193,10 @@
 	if (need_mf_bond) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
@@ -389,10 +389,10 @@
 	if (dup) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
@@ -479,10 +479,10 @@
 	if (dup) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
@@ -588,10 +588,10 @@
 	if (need_mf_bond) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);
@@ -764,10 +764,10 @@
 	if (dup) {
 		if (port == 1) {
 			mutex_lock(&table->mutex);
-			mutex_lock(&dup_table->mutex);
+			mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
 		} else {
 			mutex_lock(&dup_table->mutex);
-			mutex_lock(&table->mutex);
+			mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
 		}
 	} else {
 		mutex_lock(&table->mutex);

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index b46dbe2..25ce1b0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c

@@ -915,11 +915,13 @@
 
 	spin_lock_irq(mlx4_tlock(dev));
 	r = find_res(dev, counter_index, RES_COUNTER);
-	if (!r || r->owner != slave)
+	if (!r || r->owner != slave) {
 		ret = -EINVAL;
-	counter = container_of(r, struct res_counter, com);
-	if (!counter->port)
-		counter->port = port;
+	} else {
+		counter = container_of(r, struct res_counter, com);
+		if (!counter->port)
+			counter->port = port;
+	}
 
 	spin_unlock_irq(mlx4_tlock(dev));
 	return ret;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index aac071a..5b17532 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h

@@ -223,6 +223,7 @@
 
 static const char rq_stats_strings[][ETH_GSTRING_LEN] = {
 	"packets",
+	"bytes",
 	"csum_none",
 	"csum_sw",
 	"lro_packets",
@@ -232,16 +233,18 @@
 
 struct mlx5e_rq_stats {
 	u64 packets;
+	u64 bytes;
 	u64 csum_none;
 	u64 csum_sw;
 	u64 lro_packets;
 	u64 lro_bytes;
 	u64 wqe_err;
-#define NUM_RQ_STATS 6
+#define NUM_RQ_STATS 7
 };
 
 static const char sq_stats_strings[][ETH_GSTRING_LEN] = {
 	"packets",
+	"bytes",
 	"tso_packets",
 	"tso_bytes",
 	"csum_offload_none",
@@ -253,6 +256,7 @@
 
 struct mlx5e_sq_stats {
 	u64 packets;
+	u64 bytes;
 	u64 tso_packets;
 	u64 tso_bytes;
 	u64 csum_offload_none;
@@ -260,7 +264,7 @@
 	u64 wake;
 	u64 dropped;
 	u64 nop;
-#define NUM_SQ_STATS 8
+#define NUM_SQ_STATS 9
 };
 
 struct mlx5e_stats {
@@ -304,14 +308,9 @@
 	MLX5E_RQ_STATE_POST_WQES_ENABLE,
 };
 
-enum cq_flags {
-	MLX5E_CQ_HAS_CQES = 1,
-};
-
 struct mlx5e_cq {
 	/* data path - accessed per cqe */
 	struct mlx5_cqwq           wq;
-	unsigned long              flags;
 
 	/* data path - accessed per napi poll */
 	struct napi_struct        *napi;
@@ -452,6 +451,8 @@
 	MLX5E_NUM_TT,
 };
 
+#define IS_HASHING_TT(tt) (tt != MLX5E_TT_ANY)
+
 enum mlx5e_rqt_ix {
 	MLX5E_INDIRECTION_RQT,
 	MLX5E_SINGLE_RQ_RQT,
@@ -618,9 +619,12 @@
 void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
 
 int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix);
+void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv);
 
 int mlx5e_open_locked(struct net_device *netdev);
 int mlx5e_close_locked(struct net_device *netdev);
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+				   int num_channels);
 
 static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
 				      struct mlx5e_tx_wqe *wqe, int bf_sz)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index be65435..2018eeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c

@@ -62,10 +62,11 @@
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
 						   overflow_work);
+	unsigned long flags;
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_read(&tstamp->clock);
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 	schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period);
 }
 
@@ -136,10 +137,11 @@
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						   ptp_info);
 	u64 ns = timespec64_to_ns(ts);
+	unsigned long flags;
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_init(&tstamp->clock, &tstamp->cycles, ns);
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 
 	return 0;
 }
@@ -150,10 +152,11 @@
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						   ptp_info);
 	u64 ns;
+	unsigned long flags;
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	ns = timecounter_read(&tstamp->clock);
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 
 	*ts = ns_to_timespec64(ns);
 
@@ -164,10 +167,11 @@
 {
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						   ptp_info);
+	unsigned long flags;
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_adjtime(&tstamp->clock, delta);
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 
 	return 0;
 }
@@ -176,6 +180,7 @@
 {
 	u64 adj;
 	u32 diff;
+	unsigned long flags;
 	int neg_adj = 0;
 	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
 						  ptp_info);
@@ -189,11 +194,11 @@
 	adj *= delta;
 	diff = div_u64(adj, 1000000000ULL);
 
-	write_lock(&tstamp->lock);
+	write_lock_irqsave(&tstamp->lock, flags);
 	timecounter_read(&tstamp->clock);
 	tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff :
 					tstamp->nominal_c_mult + diff;
-	write_unlock(&tstamp->lock);
+	write_unlock_irqrestore(&tstamp->lock, flags);
 
 	return 0;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 65624ac..5abeb00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

@@ -385,6 +385,8 @@
 		mlx5e_close_locked(dev);
 
 	priv->params.num_channels = count;
+	mlx5e_build_default_indir_rqt(priv->params.indirection_rqt,
+				      MLX5E_INDIR_RQT_SIZE, count);
 
 	if (was_opened)
 		err = mlx5e_open_locked(dev);
@@ -703,18 +705,36 @@
 	return 0;
 }
 
+static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+	int i;
+
+	MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
+	mlx5e_build_tir_ctx_hash(tirc, priv);
+
+	for (i = 0; i < MLX5E_NUM_TT; i++)
+		if (IS_HASHING_TT(i))
+			mlx5_core_modify_tir(mdev, priv->tirn[i], in, inlen);
+}
+
 static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 			  const u8 *key, const u8 hfunc)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	bool close_open;
-	int err = 0;
+	int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	void *in;
 
 	if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
 	    (hfunc != ETH_RSS_HASH_XOR) &&
 	    (hfunc != ETH_RSS_HASH_TOP))
 		return -EINVAL;
 
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
 	mutex_lock(&priv->state_lock);
 
 	if (indir) {
@@ -723,11 +743,6 @@
 		mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT);
 	}
 
-	close_open = (key || (hfunc != ETH_RSS_HASH_NO_CHANGE)) &&
-		     test_bit(MLX5E_STATE_OPENED, &priv->state);
-	if (close_open)
-		mlx5e_close_locked(dev);
-
 	if (key)
 		memcpy(priv->params.toeplitz_hash_key, key,
 		       sizeof(priv->params.toeplitz_hash_key));
@@ -735,12 +750,13 @@
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE)
 		priv->params.rss_hfunc = hfunc;
 
-	if (close_open)
-		err = mlx5e_open_locked(priv->netdev);
+	mlx5e_modify_tirs_hash(priv, in, inlen);
 
 	mutex_unlock(&priv->state_lock);
 
-	return err;
+	kvfree(in);
+
+	return 0;
 }
 
 static int mlx5e_get_rxnfc(struct net_device *netdev,

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6a3e430..402994b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

@@ -141,6 +141,10 @@
 		return;
 
 	/* Collect firts the SW counters and then HW for consistency */
+	s->rx_packets		= 0;
+	s->rx_bytes		= 0;
+	s->tx_packets		= 0;
+	s->tx_bytes		= 0;
 	s->tso_packets		= 0;
 	s->tso_bytes		= 0;
 	s->tx_queue_stopped	= 0;
@@ -155,6 +159,8 @@
 	for (i = 0; i < priv->params.num_channels; i++) {
 		rq_stats = &priv->channel[i]->rq.stats;
 
+		s->rx_packets	+= rq_stats->packets;
+		s->rx_bytes	+= rq_stats->bytes;
 		s->lro_packets	+= rq_stats->lro_packets;
 		s->lro_bytes	+= rq_stats->lro_bytes;
 		s->rx_csum_none	+= rq_stats->csum_none;
@@ -164,6 +170,8 @@
 		for (j = 0; j < priv->params.num_tc; j++) {
 			sq_stats = &priv->channel[i]->sq[j].stats;
 
+			s->tx_packets		+= sq_stats->packets;
+			s->tx_bytes		+= sq_stats->bytes;
 			s->tso_packets		+= sq_stats->tso_packets;
 			s->tso_bytes		+= sq_stats->tso_bytes;
 			s->tx_queue_stopped	+= sq_stats->stopped;
@@ -225,23 +233,6 @@
 	s->tx_broadcast_bytes   =
 		MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
 
-	s->rx_packets =
-		s->rx_unicast_packets +
-		s->rx_multicast_packets +
-		s->rx_broadcast_packets;
-	s->rx_bytes =
-		s->rx_unicast_bytes +
-		s->rx_multicast_bytes +
-		s->rx_broadcast_bytes;
-	s->tx_packets =
-		s->tx_unicast_packets +
-		s->tx_multicast_packets +
-		s->tx_broadcast_packets;
-	s->tx_bytes =
-		s->tx_unicast_bytes +
-		s->tx_multicast_bytes +
-		s->tx_broadcast_bytes;
-
 	/* Update calculated offload counters */
 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
 	s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
@@ -1199,7 +1190,6 @@
 			ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE);
 
 		ix = priv->params.indirection_rqt[ix];
-		ix = ix % priv->params.num_channels;
 		MLX5_SET(rqtc, rqtc, rq_num[i],
 			 test_bit(MLX5E_STATE_OPENED, &priv->state) ?
 			 priv->channel[ix]->rq.rqn :
@@ -1317,7 +1307,22 @@
 			      lro_timer_supported_periods[2]));
 }
 
-static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
+void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv)
+{
+	MLX5_SET(tirc, tirc, rx_hash_fn,
+		 mlx5e_rx_hash_fn(priv->params.rss_hfunc));
+	if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) {
+		void *rss_key = MLX5_ADDR_OF(tirc, tirc,
+					     rx_hash_toeplitz_key);
+		size_t len = MLX5_FLD_SZ_BYTES(tirc,
+					       rx_hash_toeplitz_key);
+
+		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+		memcpy(rss_key, priv->params.toeplitz_hash_key, len);
+	}
+}
+
+static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 
@@ -1325,6 +1330,7 @@
 	void *tirc;
 	int inlen;
 	int err;
+	int tt;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
 	in = mlx5_vzalloc(inlen);
@@ -1336,7 +1342,11 @@
 
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
-	err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen);
+	for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
+		err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen);
+		if (err)
+			break;
+	}
 
 	kvfree(in);
 
@@ -1672,17 +1682,7 @@
 	default:
 		MLX5_SET(tirc, tirc, indirect_table,
 			 priv->rqtn[MLX5E_INDIRECTION_RQT]);
-		MLX5_SET(tirc, tirc, rx_hash_fn,
-			 mlx5e_rx_hash_fn(priv->params.rss_hfunc));
-		if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) {
-			void *rss_key = MLX5_ADDR_OF(tirc, tirc,
-						     rx_hash_toeplitz_key);
-			size_t len = MLX5_FLD_SZ_BYTES(tirc,
-						       rx_hash_toeplitz_key);
-
-			MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
-			memcpy(rss_key, priv->params.toeplitz_hash_key, len);
-		}
+		mlx5e_build_tir_ctx_hash(tirc, priv);
 		break;
 	}
 
@@ -1885,8 +1885,10 @@
 			mlx5e_close_locked(priv->netdev);
 
 		priv->params.lro_en = !!(features & NETIF_F_LRO);
-		mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV4_TCP);
-		mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV6_TCP);
+		err = mlx5e_modify_tirs_lro(priv);
+		if (err)
+			mlx5_core_warn(priv->mdev, "lro modify failed, %d\n",
+				       err);
 
 		if (was_opened)
 			err = mlx5e_open_locked(priv->netdev);
@@ -2024,18 +2026,37 @@
 					    vf_stats);
 }
 
-static struct net_device_ops mlx5e_netdev_ops = {
+static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
 	.ndo_start_xmit          = mlx5e_xmit,
 	.ndo_get_stats64         = mlx5e_get_stats,
 	.ndo_set_rx_mode         = mlx5e_set_rx_mode,
 	.ndo_set_mac_address     = mlx5e_set_mac,
-	.ndo_vlan_rx_add_vid	 = mlx5e_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	 = mlx5e_vlan_rx_kill_vid,
+	.ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
 	.ndo_set_features        = mlx5e_set_features,
-	.ndo_change_mtu		 = mlx5e_change_mtu,
-	.ndo_do_ioctl		 = mlx5e_ioctl,
+	.ndo_change_mtu          = mlx5e_change_mtu,
+	.ndo_do_ioctl            = mlx5e_ioctl,
+};
+
+static const struct net_device_ops mlx5e_netdev_ops_sriov = {
+	.ndo_open                = mlx5e_open,
+	.ndo_stop                = mlx5e_close,
+	.ndo_start_xmit          = mlx5e_xmit,
+	.ndo_get_stats64         = mlx5e_get_stats,
+	.ndo_set_rx_mode         = mlx5e_set_rx_mode,
+	.ndo_set_mac_address     = mlx5e_set_mac,
+	.ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
+	.ndo_set_features        = mlx5e_set_features,
+	.ndo_change_mtu          = mlx5e_change_mtu,
+	.ndo_do_ioctl            = mlx5e_ioctl,
+	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
+	.ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
+	.ndo_get_vf_config       = mlx5e_get_vf_config,
+	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
+	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2070,12 +2091,20 @@
 	       2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
 }
 
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+				   int num_channels)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		indirection_rqt[i] = i % num_channels;
+}
+
 static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 				    struct net_device *netdev,
 				    int num_channels)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	int i;
 
 	priv->params.log_sq_size           =
 		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
@@ -2099,8 +2128,8 @@
 	netdev_rss_key_fill(priv->params.toeplitz_hash_key,
 			    sizeof(priv->params.toeplitz_hash_key));
 
-	for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
-		priv->params.indirection_rqt[i] = i % num_channels;
+	mlx5e_build_default_indir_rqt(priv->params.indirection_rqt,
+				      MLX5E_INDIR_RQT_SIZE, num_channels);
 
 	priv->params.lro_wqe_sz            =
 		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -2137,18 +2166,11 @@
 
 	SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
 
-	if (priv->params.num_tc > 1)
-		mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue;
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
+	else
+		netdev->netdev_ops = &mlx5e_netdev_ops_basic;
 
-	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
-		mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac;
-		mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan;
-		mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config;
-		mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state;
-		mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats;
-	}
-
-	netdev->netdev_ops        = &mlx5e_netdev_ops;
 	netdev->watchdog_timeo    = 15 * HZ;
 
 	netdev->ethtool_ops	  = &mlx5e_ethtool_ops;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index dd959d9..59658b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

@@ -230,10 +230,6 @@
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
 	int work_done;
 
-	/* avoid accessing cq (dma coherent memory) if not needed */
-	if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags))
-		return 0;
-
 	for (work_done = 0; work_done < budget; work_done++) {
 		struct mlx5e_rx_wqe *wqe;
 		struct mlx5_cqe64 *cqe;
@@ -267,6 +263,7 @@
 
 		mlx5e_build_rx_skb(cqe, rq, skb);
 		rq->stats.packets++;
+		rq->stats.bytes += be32_to_cpu(cqe->byte_cnt);
 		napi_gro_receive(cq->napi, skb);
 
 wq_ll_pop:
@@ -279,8 +276,5 @@
 	/* ensure cq space is freed before enabling more cqes */
 	wmb();
 
-	if (work_done == budget)
-		set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
-
 	return work_done;
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 2c3fba0..bb4eeeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

@@ -179,6 +179,7 @@
 	unsigned int skb_len = skb->len;
 	u8  opcode = MLX5_OPCODE_SEND;
 	dma_addr_t dma_addr = 0;
+	unsigned int num_bytes;
 	bool bf = false;
 	u16 headlen;
 	u16 ds_cnt;
@@ -204,8 +205,7 @@
 		opcode       = MLX5_OPCODE_LSO;
 		ihs          = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		payload_len  = skb->len - ihs;
-		wi->num_bytes = skb->len +
-				(skb_shinfo(skb)->gso_segs - 1) * ihs;
+		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
 		sq->stats.tso_packets++;
 		sq->stats.tso_bytes += payload_len;
 	} else {
@@ -213,9 +213,11 @@
 		     !skb->xmit_more &&
 		     !skb_shinfo(skb)->nr_frags;
 		ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
-		wi->num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 	}
 
+	wi->num_bytes = num_bytes;
+
 	if (skb_vlan_tag_present(skb)) {
 		mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data,
 				  &skb_len);
@@ -307,6 +309,7 @@
 	sq->bf_budget = bf ? sq->bf_budget - 1 : 0;
 
 	sq->stats.packets++;
+	sq->stats.bytes += num_bytes;
 	return NETDEV_TX_OK;
 
 dma_unmap_wqe_err:
@@ -335,10 +338,6 @@
 	u16 sqcc;
 	int i;
 
-	/* avoid accessing cq (dma coherent memory) if not needed */
-	if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags))
-		return false;
-
 	sq = container_of(cq, struct mlx5e_sq, cq);
 
 	npkts = 0;
@@ -422,10 +421,6 @@
 				netif_tx_wake_queue(sq->txq);
 				sq->stats.wake++;
 	}
-	if (i == MLX5E_TX_CQ_POLL_BUDGET) {
-		set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
-		return true;
-	}
 
-	return false;
+	return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 4ac8d71..66d51a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c

@@ -88,7 +88,6 @@
 {
 	struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
 
-	set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
 	set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags);
 	barrier();
 	napi_schedule(cq->napi);

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index c071077..7992c55 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c

@@ -215,7 +215,7 @@
 {
 	int index = q->producer_counter & (q->count - 1);
 
-	if ((q->producer_counter - q->consumer_counter) == q->count)
+	if ((u16) (q->producer_counter - q->consumer_counter) == q->count)
 		return NULL;
 	return mlxsw_pci_queue_elem_info_get(q, index);
 }

diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h
index 726f543..ae65b99 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/port.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/port.h

@@ -49,7 +49,7 @@
 #define MLXSW_PORT_MID			0xd000
 
 #define MLXSW_PORT_MAX_PHY_PORTS	0x40
-#define MLXSW_PORT_MAX_PORTS		MLXSW_PORT_MAX_PHY_PORTS
+#define MLXSW_PORT_MAX_PORTS		(MLXSW_PORT_MAX_PHY_PORTS + 1)
 
 #define MLXSW_PORT_DEVID_BITS_OFFSET	10
 #define MLXSW_PORT_PHY_BITS_OFFSET	4

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index bb77e22..ffe4c03 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h

@@ -873,6 +873,62 @@
 	}
 }
 
+/* SPAFT - Switch Port Acceptable Frame Types
+ * ------------------------------------------
+ * The Switch Port Acceptable Frame Types register configures the frame
+ * admittance of the port.
+ */
+#define MLXSW_REG_SPAFT_ID 0x2010
+#define MLXSW_REG_SPAFT_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_spaft = {
+	.id = MLXSW_REG_SPAFT_ID,
+	.len = MLXSW_REG_SPAFT_LEN,
+};
+
+/* reg_spaft_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is not supported (all tag types are allowed).
+ */
+MLXSW_ITEM32(reg, spaft, local_port, 0x00, 16, 8);
+
+/* reg_spaft_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, sub_port, 0x00, 8, 8);
+
+/* reg_spaft_allow_untagged
+ * When set, untagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_untagged, 0x04, 31, 1);
+
+/* reg_spaft_allow_prio_tagged
+ * When set, priority tagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_prio_tagged, 0x04, 30, 1);
+
+/* reg_spaft_allow_tagged
+ * When set, tagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_tagged, 0x04, 29, 1);
+
+static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port,
+					bool allow_untagged)
+{
+	MLXSW_REG_ZERO(spaft, payload);
+	mlxsw_reg_spaft_local_port_set(payload, local_port);
+	mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged);
+	mlxsw_reg_spaft_allow_prio_tagged_set(payload, true);
+	mlxsw_reg_spaft_allow_tagged_set(payload, true);
+}
+
 /* SFGC - Switch Flooding Group Configuration
  * ------------------------------------------
  * The following register controls the association of flooding tables and MIDs
@@ -3203,6 +3259,8 @@
 		return "SPVID";
 	case MLXSW_REG_SPVM_ID:
 		return "SPVM";
+	case MLXSW_REG_SPAFT_ID:
+		return "SPAFT";
 	case MLXSW_REG_SFGC_ID:
 		return "SFGC";
 	case MLXSW_REG_SFTR_ID:

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 217856b..a94daa8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c

@@ -2123,6 +2123,8 @@
 	if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port))
 		netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n");
 
+	mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+
 	mlxsw_sp_port->learning = 0;
 	mlxsw_sp_port->learning_sync = 0;
 	mlxsw_sp_port->uc_flood = 0;
@@ -2356,9 +2358,7 @@
 	if (mlxsw_sp_port->bridged) {
 		mlxsw_sp_port_active_vlans_del(mlxsw_sp_port);
 		mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false);
-
-		if (lag->ref_count == 1)
-			mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
+		mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
 	}
 
 	if (lag->ref_count == 1) {
@@ -2746,6 +2746,13 @@
 		goto err_vport_flood_set;
 	}
 
+	err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid,
+					  MLXSW_REG_SPMS_STATE_FORWARDING);
+	if (err) {
+		netdev_err(dev, "Failed to set STP state\n");
+		goto err_port_stp_state_set;
+	}
+
 	if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport))
 		netdev_err(dev, "Failed to flush FDB\n");
 
@@ -2763,6 +2770,7 @@
 
 	return 0;
 
+err_port_stp_state_set:
 err_vport_flood_set:
 err_port_vid_learning_set:
 err_port_vid_to_fid_validate:

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 7f42eb1..3b89ed2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h

@@ -254,5 +254,6 @@
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
 			     bool set, bool only_uc);
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
 
 #endif

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index e492ca2..7b56098 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c

@@ -370,7 +370,8 @@
 	return err;
 }
 
-static int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				    u16 vid)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char spvid_pl[MLXSW_REG_SPVID_LEN];
@@ -379,6 +380,53 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl);
 }
 
+static int mlxsw_sp_port_allow_untagged_set(struct mlxsw_sp_port *mlxsw_sp_port,
+					    bool allow)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char spaft_pl[MLXSW_REG_SPAFT_LEN];
+
+	mlxsw_reg_spaft_pack(spaft_pl, mlxsw_sp_port->local_port, allow);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spaft), spaft_pl);
+}
+
+int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+	struct net_device *dev = mlxsw_sp_port->dev;
+	int err;
+
+	if (!vid) {
+		err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, false);
+		if (err) {
+			netdev_err(dev, "Failed to disallow untagged traffic\n");
+			return err;
+		}
+	} else {
+		err = __mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid);
+		if (err) {
+			netdev_err(dev, "Failed to set PVID\n");
+			return err;
+		}
+
+		/* Only allow if not already allowed. */
+		if (!mlxsw_sp_port->pvid) {
+			err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port,
+							       true);
+			if (err) {
+				netdev_err(dev, "Failed to allow untagged traffic\n");
+				goto err_port_allow_untagged_set;
+			}
+		}
+	}
+
+	mlxsw_sp_port->pvid = vid;
+	return 0;
+
+err_port_allow_untagged_set:
+	__mlxsw_sp_port_pvid_set(mlxsw_sp_port, mlxsw_sp_port->pvid);
+	return err;
+}
+
 static int mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid)
 {
 	char sfmr_pl[MLXSW_REG_SFMR_LEN];
@@ -540,7 +588,12 @@
 			netdev_err(dev, "Unable to add PVID %d\n", vid_begin);
 			goto err_port_pvid_set;
 		}
-		mlxsw_sp_port->pvid = vid_begin;
+	} else if (!flag_pvid && old_pvid >= vid_begin && old_pvid <= vid_end) {
+		err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
+		if (err) {
+			netdev_err(dev, "Unable to del PVID\n");
+			goto err_port_pvid_set;
+		}
 	}
 
 	/* Changing activity bits only if HW operation succeded */
@@ -892,20 +945,18 @@
 		return err;
 	}
 
+	if (init)
+		goto out;
+
 	pvid = mlxsw_sp_port->pvid;
-	if (pvid >= vid_begin && pvid <= vid_end && pvid != 1) {
-		/* Default VLAN is always 1 */
-		err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+	if (pvid >= vid_begin && pvid <= vid_end) {
+		err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
 		if (err) {
 			netdev_err(dev, "Unable to del PVID %d\n", pvid);
 			return err;
 		}
-		mlxsw_sp_port->pvid = 1;
 	}
 
-	if (init)
-		goto out;
-
 	err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end,
 					false, false);
 	if (err) {

diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 00cfd95..3e67f45 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c

@@ -474,9 +474,9 @@
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ndev->base_addr = res->start;
 	priv->base = devm_ioremap_resource(p_dev, res);
-	ret = IS_ERR(priv->base);
-	if (ret) {
+	if (IS_ERR(priv->base)) {
 		dev_err(p_dev, "devm_ioremap_resource failed\n");
+		ret = PTR_ERR(priv->base);
 		goto init_fail;
 	}
 

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 689a4a5..1ef0393 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c

@@ -811,7 +811,7 @@
 	dev->netdev_ops = &qcaspi_netdev_ops;
 	qcaspi_set_ethtool_ops(dev);
 	dev->watchdog_timeo = QCASPI_TX_TIMEOUT;
-	dev->flags = IFF_MULTICAST;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->tx_queue_len = 100;
 
 	qca = netdev_priv(dev);

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 17d5571..dd2cf37 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c

@@ -4933,8 +4933,6 @@
 		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_40:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
-		break;
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_42:
 	case RTL_GIGA_MAC_VER_43:
@@ -4943,8 +4941,6 @@
 	case RTL_GIGA_MAC_VER_46:
 	case RTL_GIGA_MAC_VER_47:
 	case RTL_GIGA_MAC_VER_48:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST | RX_EARLY_OFF);
-		break;
 	case RTL_GIGA_MAC_VER_49:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
@@ -6137,28 +6133,28 @@
 		sw_cnt_1ms_ini = 16000000/rg_saw_cnt;
 		sw_cnt_1ms_ini &= 0x0fff;
 		data = r8168_mac_ocp_read(tp, 0xd412);
-		data &= 0x0fff;
+		data &= ~0x0fff;
 		data |= sw_cnt_1ms_ini;
 		r8168_mac_ocp_write(tp, 0xd412, data);
 	}
 
 	data = r8168_mac_ocp_read(tp, 0xe056);
-	data &= 0xf0;
-	data |= 0x07;
+	data &= ~0xf0;
+	data |= 0x70;
 	r8168_mac_ocp_write(tp, 0xe056, data);
 
 	data = r8168_mac_ocp_read(tp, 0xe052);
-	data &= 0x8008;
-	data |= 0x6000;
+	data &= ~0x6000;
+	data |= 0x8008;
 	r8168_mac_ocp_write(tp, 0xe052, data);
 
 	data = r8168_mac_ocp_read(tp, 0xe0d6);
-	data &= 0x01ff;
+	data &= ~0x01ff;
 	data |= 0x017f;
 	r8168_mac_ocp_write(tp, 0xe0d6, data);
 
 	data = r8168_mac_ocp_read(tp, 0xd420);
-	data &= 0x0fff;
+	data &= ~0x0fff;
 	data |= 0x047f;
 	r8168_mac_ocp_write(tp, 0xd420, data);
 
@@ -7730,10 +7726,13 @@
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
+	struct pci_dev *pdev = tp->pci_dev;
 	struct rtl8169_counters *counters = tp->counters;
 	unsigned int start;
 
-	if (netif_running(dev))
+	pm_runtime_get_noresume(&pdev->dev);
+
+	if (netif_running(dev) && pm_runtime_active(&pdev->dev))
 		rtl8169_rx_missed(dev, ioaddr);
 
 	do {
@@ -7761,7 +7760,8 @@
 	 * Fetch additonal counter values missing in stats collected by driver
 	 * from tally counters.
 	 */
-	rtl8169_update_counters(dev);
+	if (pm_runtime_active(&pdev->dev))
+		rtl8169_update_counters(dev);
 
 	/*
 	 * Subtract values fetched during initalization.
@@ -7774,6 +7774,8 @@
 	stats->tx_aborted_errors = le16_to_cpu(counters->tx_aborted) -
 		le16_to_cpu(tp->tc_offset.tx_aborted);
 
+	pm_runtime_put_noidle(&pdev->dev);
+
 	return stats;
 }
 
@@ -7853,6 +7855,10 @@
 
 	rtl8169_net_suspend(dev);
 
+	/* Update counters before going runtime suspend */
+	rtl8169_rx_missed(dev, tp->mmio_addr);
+	rtl8169_update_counters(dev);
+
 	return 0;
 }
 

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index ac43ed9..86449c3 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c

@@ -1139,7 +1139,8 @@
 	if (netif_running(ndev)) {
 		netif_device_detach(ndev);
 		/* Stop PTP Clock driver */
-		ravb_ptp_stop(ndev);
+		if (priv->chip_id == RCAR_GEN2)
+			ravb_ptp_stop(ndev);
 		/* Wait for DMA stopping */
 		error = ravb_stop_dma(ndev);
 		if (error) {
@@ -1170,7 +1171,8 @@
 		ravb_emac_init(ndev);
 
 		/* Initialise PTP Clock driver */
-		ravb_ptp_init(ndev, priv->pdev);
+		if (priv->chip_id == RCAR_GEN2)
+			ravb_ptp_init(ndev, priv->pdev);
 
 		netif_device_attach(ndev);
 	}
@@ -1298,7 +1300,8 @@
 	netif_tx_stop_all_queues(ndev);
 
 	/* Stop PTP Clock driver */
-	ravb_ptp_stop(ndev);
+	if (priv->chip_id == RCAR_GEN2)
+		ravb_ptp_stop(ndev);
 
 	/* Wait for DMA stopping */
 	ravb_stop_dma(ndev);
@@ -1311,7 +1314,8 @@
 	ravb_emac_init(ndev);
 
 	/* Initialise PTP Clock driver */
-	ravb_ptp_init(ndev, priv->pdev);
+	if (priv->chip_id == RCAR_GEN2)
+		ravb_ptp_init(ndev, priv->pdev);
 
 	netif_tx_start_all_queues(ndev);
 }
@@ -1718,7 +1722,6 @@
 static int ravb_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
-	const struct of_device_id *match;
 	struct ravb_private *priv;
 	enum ravb_chip_id chip_id;
 	struct net_device *ndev;
@@ -1750,8 +1753,7 @@
 	ndev->base_addr = res->start;
 	ndev->dma = -1;
 
-	match = of_match_device(of_match_ptr(ravb_match_table), &pdev->dev);
-	chip_id = (enum ravb_chip_id)match->data;
+	chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev);
 
 	if (chip_id == RCAR_GEN3)
 		irq = platform_get_irq_byname(pdev, "ch22");
@@ -1814,10 +1816,6 @@
 			   CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC);
 	}
 
-	/* Set CSEL value */
-	ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB,
-		   CCC);
-
 	/* Set GTI value */
 	error = ravb_set_gti(ndev);
 	if (error)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index dfa9e59..7384499 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c

@@ -3061,15 +3061,11 @@
 	mdp->ether_link_active_low = pd->ether_link_active_low;
 
 	/* set cpu data */
-	if (id) {
+	if (id)
 		mdp->cd = (struct sh_eth_cpu_data *)id->driver_data;
-	} else	{
-		const struct of_device_id *match;
+	else
+		mdp->cd = (struct sh_eth_cpu_data *)of_device_get_match_data(&pdev->dev);
 
-		match = of_match_device(of_match_ptr(sh_eth_match_table),
-					&pdev->dev);
-		mdp->cd = (struct sh_eth_cpu_data *)match->data;
-	}
 	mdp->reg_offset = sh_eth_get_register_offset(mdp->cd->register_type);
 	if (!mdp->reg_offset) {
 		dev_err(&pdev->dev, "Unknown register type (%d)\n",

diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 0e2fc1a..db7db8a 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c

@@ -2342,8 +2342,8 @@
 	}
 
 	ndev->irq = platform_get_irq(pdev, 0);
-	if (ndev->irq <= 0) {
-		ret = -ENODEV;
+	if (ndev->irq < 0) {
+		ret = ndev->irq;
 		goto out_release_io;
 	}
 	/*

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 0faf163..efb54f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c

@@ -199,21 +199,12 @@
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
 	int addr, found;
-	struct device_node *mdio_node = NULL;
-	struct device_node *child_node = NULL;
+	struct device_node *mdio_node = priv->plat->mdio_node;
 
 	if (!mdio_bus_data)
 		return 0;
 
 	if (IS_ENABLED(CONFIG_OF)) {
-		for_each_child_of_node(priv->device->of_node, child_node) {
-			if (of_device_is_compatible(child_node,
-						    "snps,dwmac-mdio")) {
-				mdio_node = child_node;
-				break;
-			}
-		}
-
 		if (mdio_node) {
 			netdev_dbg(ndev, "FOUND MDIO subnode\n");
 		} else {

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 6a52fa1..4514ba7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c

@@ -110,6 +110,7 @@
 	struct device_node *np = pdev->dev.of_node;
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_dma_cfg *dma_cfg;
+	struct device_node *child_node = NULL;
 
 	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
 	if (!plat)
@@ -140,13 +141,19 @@
 		plat->phy_node = of_node_get(np);
 	}
 
+	for_each_child_of_node(np, child_node)
+		if (of_device_is_compatible(child_node,	"snps,dwmac-mdio")) {
+			plat->mdio_node = child_node;
+			break;
+		}
+
 	/* "snps,phy-addr" is not a standard property. Mark it as deprecated
 	 * and warn of its use. Remove this when phy node support is added.
 	 */
 	if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0)
 		dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
-	if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name)
+	if ((plat->phy_node && !of_phy_is_fixed_link(np)) || !plat->mdio_node)
 		plat->mdio_bus_data = NULL;
 	else
 		plat->mdio_bus_data =

diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
index fc8bbff..af11ed1 100644
--- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c
+++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c

@@ -426,7 +426,7 @@
 #define DWC_MMC_RXOCTETCOUNT_GB          0x0784
 #define DWC_MMC_RXPACKETCOUNT_GB         0x0780
 
-static int debug = 3;
+static int debug = -1;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "DWC_eth_qos debug level (0=none,...,16=all)");
 
@@ -650,6 +650,11 @@
 	u32 mmc_tx_counters_mask;
 
 	struct dwceqos_flowcontrol flowcontrol;
+
+	/* Tracks the intermediate state of phy started but hardware
+	 * init not finished yet.
+	 */
+	bool phy_defer;
 };
 
 static void dwceqos_read_mmc_counters(struct net_local *lp, u32 rx_mask,
@@ -901,6 +906,9 @@
 	struct phy_device *phydev = lp->phy_dev;
 	int status_change = 0;
 
+	if (lp->phy_defer)
+		return;
+
 	if (phydev->link) {
 		if ((lp->speed != phydev->speed) ||
 		    (lp->duplex != phydev->duplex)) {
@@ -1113,7 +1121,7 @@
 	/* Allocate DMA descriptors */
 	size = DWCEQOS_RX_DCNT * sizeof(struct dwceqos_dma_desc);
 	lp->rx_descs = dma_alloc_coherent(lp->ndev->dev.parent, size,
-			&lp->rx_descs_addr, 0);
+			&lp->rx_descs_addr, GFP_KERNEL);
 	if (!lp->rx_descs)
 		goto err_out;
 	lp->rx_descs_tail_addr = lp->rx_descs_addr +
@@ -1121,7 +1129,7 @@
 
 	size = DWCEQOS_TX_DCNT * sizeof(struct dwceqos_dma_desc);
 	lp->tx_descs = dma_alloc_coherent(lp->ndev->dev.parent, size,
-			&lp->tx_descs_addr, 0);
+			&lp->tx_descs_addr, GFP_KERNEL);
 	if (!lp->tx_descs)
 		goto err_out;
 	lp->tx_descs_tail_addr = lp->tx_descs_addr +
@@ -1635,6 +1643,12 @@
 	regval = dwceqos_read(lp, REG_DWCEQOS_MAC_CFG);
 	dwceqos_write(lp, REG_DWCEQOS_MAC_CFG,
 		      regval | DWCEQOS_MAC_CFG_TE | DWCEQOS_MAC_CFG_RE);
+
+	lp->phy_defer = false;
+	mutex_lock(&lp->phy_dev->lock);
+	phy_read_status(lp->phy_dev);
+	dwceqos_adjust_link(lp->ndev);
+	mutex_unlock(&lp->phy_dev->lock);
 }
 
 static void dwceqos_tx_reclaim(unsigned long data)
@@ -1880,9 +1894,13 @@
 	}
 	netdev_reset_queue(ndev);
 
+	/* The dwceqos reset state machine requires all phy clocks to complete,
+	 * hence the unusual init order with phy_start first.
+	 */
+	lp->phy_defer = true;
+	phy_start(lp->phy_dev);
 	dwceqos_init_hw(lp);
 	napi_enable(&lp->napi);
-	phy_start(lp->phy_dev);
 
 	netif_start_queue(ndev);
 	tasklet_enable(&lp->tx_bdreclaim_tasklet);
@@ -1915,18 +1933,19 @@
 {
 	struct net_local *lp = netdev_priv(ndev);
 
-	phy_stop(lp->phy_dev);
-
 	tasklet_disable(&lp->tx_bdreclaim_tasklet);
-	netif_stop_queue(ndev);
 	napi_disable(&lp->napi);
 
-	dwceqos_drain_dma(lp);
+	/* Stop all tx before we drain the tx dma. */
+	netif_tx_lock_bh(lp->ndev);
+	netif_stop_queue(ndev);
+	netif_tx_unlock_bh(lp->ndev);
 
-	netif_tx_lock(lp->ndev);
+	dwceqos_drain_dma(lp);
 	dwceqos_reset_hw(lp);
+	phy_stop(lp->phy_dev);
+
 	dwceqos_descriptor_free(lp);
-	netif_tx_unlock(lp->ndev);
 
 	return 0;
 }
@@ -2178,12 +2197,10 @@
 		((trans.initial_descriptor + trans.nr_descriptors) %
 		 DWCEQOS_TX_DCNT));
 
-	dwceqos_tx_finalize(skb, lp, &trans);
-
-	netdev_sent_queue(ndev, skb->len);
-
 	spin_lock_bh(&lp->tx_lock);
 	lp->tx_free -= trans.nr_descriptors;
+	dwceqos_tx_finalize(skb, lp, &trans);
+	netdev_sent_queue(ndev, skb->len);
 	spin_unlock_bh(&lp->tx_lock);
 
 	ndev->trans_start = jiffies;

diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index e9cc61e..c3e85ac 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c

@@ -63,8 +63,12 @@
 		mode = AM33XX_GMII_SEL_MODE_RGMII;
 		break;
 
-	case PHY_INTERFACE_MODE_MII:
 	default:
+		dev_warn(priv->dev,
+			 "Unsupported PHY mode: \"%s\". Defaulting to MII.\n",
+			phy_modes(phy_mode));
+		/* fallthrough */
+	case PHY_INTERFACE_MODE_MII:
 		mode = AM33XX_GMII_SEL_MODE_MII;
 		break;
 	};
@@ -106,8 +110,12 @@
 		mode = AM33XX_GMII_SEL_MODE_RGMII;
 		break;
 
-	case PHY_INTERFACE_MODE_MII:
 	default:
+		dev_warn(priv->dev,
+			 "Unsupported PHY mode: \"%s\". Defaulting to MII.\n",
+			phy_modes(phy_mode));
+		/* fallthrough */
+	case PHY_INTERFACE_MODE_MII:
 		mode = AM33XX_GMII_SEL_MODE_MII;
 		break;
 	};

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index c61d66d..029841f 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c

@@ -117,21 +117,17 @@
 	*ndesc = le32_to_cpu(desc->next_desc);
 }
 
-static void get_pad_info(u32 *pad0, u32 *pad1, u32 *pad2, struct knav_dma_desc *desc)
+static u32 get_sw_data(int index, struct knav_dma_desc *desc)
 {
-	*pad0 = le32_to_cpu(desc->pad[0]);
-	*pad1 = le32_to_cpu(desc->pad[1]);
-	*pad2 = le32_to_cpu(desc->pad[2]);
+	/* No Endian conversion needed as this data is untouched by hw */
+	return desc->sw_data[index];
 }
 
-static void get_pad_ptr(void **padptr, struct knav_dma_desc *desc)
-{
-	u64 pad64;
-
-	pad64 = le32_to_cpu(desc->pad[0]) +
-		((u64)le32_to_cpu(desc->pad[1]) << 32);
-	*padptr = (void *)(uintptr_t)pad64;
-}
+/* use these macros to get sw data */
+#define GET_SW_DATA0(desc) get_sw_data(0, desc)
+#define GET_SW_DATA1(desc) get_sw_data(1, desc)
+#define GET_SW_DATA2(desc) get_sw_data(2, desc)
+#define GET_SW_DATA3(desc) get_sw_data(3, desc)
 
 static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len,
 			     struct knav_dma_desc *desc)
@@ -163,13 +159,18 @@
 	desc->packet_info = cpu_to_le32(pkt_info);
 }
 
-static void set_pad_info(u32 pad0, u32 pad1, u32 pad2, struct knav_dma_desc *desc)
+static void set_sw_data(int index, u32 data, struct knav_dma_desc *desc)
 {
-	desc->pad[0] = cpu_to_le32(pad0);
-	desc->pad[1] = cpu_to_le32(pad1);
-	desc->pad[2] = cpu_to_le32(pad1);
+	/* No Endian conversion needed as this data is untouched by hw */
+	desc->sw_data[index] = data;
 }
 
+/* use these macros to set sw data */
+#define SET_SW_DATA0(data, desc) set_sw_data(0, data, desc)
+#define SET_SW_DATA1(data, desc) set_sw_data(1, data, desc)
+#define SET_SW_DATA2(data, desc) set_sw_data(2, data, desc)
+#define SET_SW_DATA3(data, desc) set_sw_data(3, data, desc)
+
 static void set_org_pkt_info(dma_addr_t buff, u32 buff_len,
 			     struct knav_dma_desc *desc)
 {
@@ -581,7 +582,6 @@
 	dma_addr_t dma_desc, dma_buf;
 	unsigned int buf_len, dma_sz = sizeof(*ndesc);
 	void *buf_ptr;
-	u32 pad[2];
 	u32 tmp;
 
 	get_words(&dma_desc, 1, &desc->next_desc);
@@ -593,14 +593,20 @@
 			break;
 		}
 		get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc);
-		get_pad_ptr(&buf_ptr, ndesc);
+		/* warning!!!! We are retrieving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		buf_ptr = (void *)GET_SW_DATA0(ndesc);
+		buf_len = (int)GET_SW_DATA1(desc);
 		dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE);
 		__free_page(buf_ptr);
 		knav_pool_desc_put(netcp->rx_pool, desc);
 	}
-
-	get_pad_info(&pad[0], &pad[1], &buf_len, desc);
-	buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+	/* warning!!!! We are retrieving the virtual ptr in the sw_data
+	 * field as a 32bit value. Will not work on 64bit machines
+	 */
+	buf_ptr = (void *)GET_SW_DATA0(desc);
+	buf_len = (int)GET_SW_DATA1(desc);
 
 	if (buf_ptr)
 		netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr);
@@ -639,7 +645,6 @@
 	dma_addr_t dma_desc, dma_buff;
 	struct netcp_packet p_info;
 	struct sk_buff *skb;
-	u32 pad[2];
 	void *org_buf_ptr;
 
 	dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz);
@@ -653,8 +658,11 @@
 	}
 
 	get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc);
-	get_pad_info(&pad[0], &pad[1], &org_buf_len, desc);
-	org_buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
+	/* warning!!!! We are retrieving the virtual ptr in the sw_data
+	 * field as a 32bit value. Will not work on 64bit machines
+	 */
+	org_buf_ptr = (void *)GET_SW_DATA0(desc);
+	org_buf_len = (int)GET_SW_DATA1(desc);
 
 	if (unlikely(!org_buf_ptr)) {
 		dev_err(netcp->ndev_dev, "NULL bufptr in desc\n");
@@ -679,7 +687,6 @@
 	/* Fill in the page fragment list */
 	while (dma_desc) {
 		struct page *page;
-		void *ptr;
 
 		ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
 		if (unlikely(!ndesc)) {
@@ -688,8 +695,10 @@
 		}
 
 		get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc);
-		get_pad_ptr(&ptr, ndesc);
-		page = ptr;
+		/* warning!!!! We are retrieving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		page = (struct page *)GET_SW_DATA0(desc);
 
 		if (likely(dma_buff && buf_len && page)) {
 			dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
@@ -777,7 +786,10 @@
 		}
 
 		get_org_pkt_info(&dma, &buf_len, desc);
-		get_pad_ptr(&buf_ptr, desc);
+		/* warning!!!! We are retrieving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		buf_ptr = (void *)GET_SW_DATA0(desc);
 
 		if (unlikely(!dma)) {
 			dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n");
@@ -829,7 +841,7 @@
 	struct page *page;
 	dma_addr_t dma;
 	void *bufptr;
-	u32 pad[3];
+	u32 sw_data[2];
 
 	/* Allocate descriptor */
 	hwdesc = knav_pool_desc_get(netcp->rx_pool);
@@ -846,7 +858,7 @@
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 		bufptr = netdev_alloc_frag(primary_buf_len);
-		pad[2] = primary_buf_len;
+		sw_data[1] = primary_buf_len;
 
 		if (unlikely(!bufptr)) {
 			dev_warn_ratelimited(netcp->ndev_dev,
@@ -858,9 +870,10 @@
 		if (unlikely(dma_mapping_error(netcp->dev, dma)))
 			goto fail;
 
-		pad[0] = lower_32_bits((uintptr_t)bufptr);
-		pad[1] = upper_32_bits((uintptr_t)bufptr);
-
+		/* warning!!!! We are saving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		sw_data[0] = (u32)bufptr;
 	} else {
 		/* Allocate a secondary receive queue entry */
 		page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
@@ -870,9 +883,11 @@
 		}
 		buf_len = PAGE_SIZE;
 		dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE);
-		pad[0] = lower_32_bits(dma);
-		pad[1] = upper_32_bits(dma);
-		pad[2] = 0;
+		/* warning!!!! We are saving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		sw_data[0] = (u32)page;
+		sw_data[1] = 0;
 	}
 
 	desc_info =  KNAV_DMA_DESC_PS_INFO_IN_DESC;
@@ -882,7 +897,8 @@
 	pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) <<
 		    KNAV_DMA_DESC_RETQ_SHIFT;
 	set_org_pkt_info(dma, buf_len, hwdesc);
-	set_pad_info(pad[0], pad[1], pad[2], hwdesc);
+	SET_SW_DATA0(sw_data[0], hwdesc);
+	SET_SW_DATA1(sw_data[1], hwdesc);
 	set_desc_info(desc_info, pkt_info, hwdesc);
 
 	/* Push to FDQs */
@@ -971,7 +987,6 @@
 					  unsigned int budget)
 {
 	struct knav_dma_desc *desc;
-	void *ptr;
 	struct sk_buff *skb;
 	unsigned int dma_sz;
 	dma_addr_t dma;
@@ -988,8 +1003,10 @@
 			continue;
 		}
 
-		get_pad_ptr(&ptr, desc);
-		skb = ptr;
+		/* warning!!!! We are retrieving the virtual ptr in the sw_data
+		 * field as a 32bit value. Will not work on 64bit machines
+		 */
+		skb = (struct sk_buff *)GET_SW_DATA0(desc);
 		netcp_free_tx_desc_chain(netcp, desc, dma_sz);
 		if (!skb) {
 			dev_err(netcp->ndev_dev, "No skb in Tx desc\n");
@@ -1194,10 +1211,10 @@
 	}
 
 	set_words(&tmp, 1, &desc->packet_info);
-	tmp = lower_32_bits((uintptr_t)&skb);
-	set_words(&tmp, 1, &desc->pad[0]);
-	tmp = upper_32_bits((uintptr_t)&skb);
-	set_words(&tmp, 1, &desc->pad[1]);
+	/* warning!!!! We are saving the virtual ptr in the sw_data
+	 * field as a 32bit value. Will not work on 64bit machines
+	 */
+	SET_SW_DATA0((u32)skb, desc);
 
 	if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) {
 		tmp = tx_pipe->switch_to_port;

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 028e387..0bf7edd 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c

@@ -1039,17 +1039,34 @@
 	return geneve_xmit_skb(skb, dev, info);
 }
 
-static int geneve_change_mtu(struct net_device *dev, int new_mtu)
+static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
-	/* GENEVE overhead is not fixed, so we can't enforce a more
-	 * precise max MTU.
+	/* The max_mtu calculation does not take account of GENEVE
+	 * options, to avoid excluding potentially valid
+	 * configurations.
 	 */
-	if (new_mtu < 68 || new_mtu > IP_MAX_MTU)
+	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
+		- dev->hard_header_len;
+
+	if (new_mtu < 68)
 		return -EINVAL;
+
+	if (new_mtu > max_mtu) {
+		if (strict)
+			return -EINVAL;
+
+		new_mtu = max_mtu;
+	}
+
 	dev->mtu = new_mtu;
 	return 0;
 }
 
+static int geneve_change_mtu(struct net_device *dev, int new_mtu)
+{
+	return __geneve_change_mtu(dev, new_mtu, true);
+}
+
 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -1161,6 +1178,7 @@
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 
 	netif_keep_dst(dev);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
 	eth_hw_addr_random(dev);
 }
@@ -1452,14 +1470,15 @@
 		return dev;
 
 	err = geneve_configure(net, dev, &geneve_remote_unspec,
-			       0, 0, 0, htons(dst_port), true, 0);
+			       0, 0, 0, htons(dst_port), true,
+			       GENEVE_F_UDP_ZERO_CSUM6_RX);
 	if (err)
 		goto err;
 
 	/* openvswitch users expect packet sizes to be unrestricted,
 	 * so set the largest MTU we can.
 	 */
-	err = geneve_change_mtu(dev, IP_MAX_MTU);
+	err = __geneve_change_mtu(dev, IP_MAX_MTU, false);
 	if (err)
 		goto err;
 

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1d3a665..98e34fe 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c

@@ -1089,6 +1089,9 @@
 	net->ethtool_ops = &ethtool_ops;
 	SET_NETDEV_DEV(net, &dev->device);
 
+	/* We always need headroom for rndis header */
+	net->needed_headroom = RNDIS_AND_PPI_SIZE;
+
 	/* Notify the netvsc driver of the new device */
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index bf241a3..db507e3 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c

@@ -250,10 +250,6 @@
 	phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XX_64CLK_MDIO);
 	phy_read(phydev, MII_BCM7XXX_AUX_MODE);
 
-	/* Workaround only required for 100Mbits/sec capable PHYs */
-	if (phydev->supported & PHY_GBIT_FEATURES)
-		return 0;
-
 	/* set shadow mode 2 */
 	ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
 			MII_BCM7XXX_SHD_MODE_2, MII_BCM7XXX_SHD_MODE_2);
@@ -270,7 +266,7 @@
 	phy_write(phydev, MII_BCM7XXX_100TX_FALSE_CAR, 0x7555);
 
 	/* reset shadow mode 2 */
-	ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, MII_BCM7XXX_SHD_MODE_2, 0);
+	ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, MII_BCM7XXX_SHD_MODE_2);
 	if (ret < 0)
 		return ret;
 
@@ -307,11 +303,6 @@
 	return 0;
 }
 
-static int bcm7xxx_dummy_config_init(struct phy_device *phydev)
-{
-	return 0;
-}
-
 #define BCM7XXX_28NM_GPHY(_oui, _name)					\
 {									\
 	.phy_id		= (_oui),					\
@@ -337,7 +328,7 @@
 	.phy_id         = PHY_ID_BCM7425,
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM7425",
-	.features       = PHY_GBIT_FEATURES |
+	.features       = PHY_BASIC_FEATURES |
 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
 	.flags          = PHY_IS_INTERNAL,
 	.config_init    = bcm7xxx_config_init,
@@ -349,7 +340,7 @@
 	.phy_id         = PHY_ID_BCM7429,
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM7429",
-	.features       = PHY_GBIT_FEATURES |
+	.features       = PHY_BASIC_FEATURES |
 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
 	.flags          = PHY_IS_INTERNAL,
 	.config_init    = bcm7xxx_config_init,
@@ -361,7 +352,7 @@
 	.phy_id         = PHY_ID_BCM7435,
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM7435",
-	.features       = PHY_GBIT_FEATURES |
+	.features       = PHY_BASIC_FEATURES |
 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
 	.flags          = PHY_IS_INTERNAL,
 	.config_init    = bcm7xxx_config_init,
@@ -369,30 +360,6 @@
 	.read_status    = genphy_read_status,
 	.suspend        = bcm7xxx_suspend,
 	.resume         = bcm7xxx_config_init,
-}, {
-	.phy_id		= PHY_BCM_OUI_4,
-	.phy_id_mask	= 0xffff0000,
-	.name		= "Broadcom BCM7XXX 40nm",
-	.features	= PHY_GBIT_FEATURES |
-			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-	.flags		= PHY_IS_INTERNAL,
-	.config_init	= bcm7xxx_config_init,
-	.config_aneg	= genphy_config_aneg,
-	.read_status	= genphy_read_status,
-	.suspend	= bcm7xxx_suspend,
-	.resume		= bcm7xxx_config_init,
-}, {
-	.phy_id		= PHY_BCM_OUI_5,
-	.phy_id_mask	= 0xffffff00,
-	.name		= "Broadcom BCM7XXX 65nm",
-	.features	= PHY_BASIC_FEATURES |
-			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-	.flags		= PHY_IS_INTERNAL,
-	.config_init	= bcm7xxx_dummy_config_init,
-	.config_aneg	= genphy_config_aneg,
-	.read_status	= genphy_read_status,
-	.suspend	= bcm7xxx_suspend,
-	.resume		= bcm7xxx_config_init,
 } };
 
 static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
@@ -404,8 +371,6 @@
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7435, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
-	{ PHY_BCM_OUI_4, 0xffff0000 },
-	{ PHY_BCM_OUI_5, 0xffffff00 },
 	{ }
 };
 

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e3eb964..ab1d0fc 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c

@@ -446,6 +446,12 @@
 	if (err < 0)
 		return err;
 
+	return 0;
+}
+
+static int marvell_config_init(struct phy_device *phydev)
+{
+	/* Set registers from marvell,reg-init DT property */
 	return marvell_of_reg_init(phydev);
 }
 
@@ -495,7 +501,7 @@
 
 	mdelay(500);
 
-	return 0;
+	return marvell_config_init(phydev);
 }
 
 static int m88e3016_config_init(struct phy_device *phydev)
@@ -514,7 +520,7 @@
 	if (reg < 0)
 		return reg;
 
-	return 0;
+	return marvell_config_init(phydev);
 }
 
 static int m88e1111_config_init(struct phy_device *phydev)
@@ -1078,6 +1084,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.probe = marvell_probe,
 		.flags = PHY_HAS_INTERRUPT,
+		.config_init = &marvell_config_init,
 		.config_aneg = &marvell_config_aneg,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1149,6 +1156,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1167,6 +1175,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1318_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1259,6 +1268,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1277,6 +1287,7 @@
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 03833db..dc85f70 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c

@@ -297,6 +297,17 @@
 	if (priv->led_mode >= 0)
 		kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
 
+	if (phy_interrupt_is_valid(phydev)) {
+		int ctl = phy_read(phydev, MII_BMCR);
+
+		if (ctl < 0)
+			return ctl;
+
+		ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE);
+		if (ret < 0)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -635,6 +646,21 @@
 		data[i] = kszphy_get_stat(phydev, i);
 }
 
+static int kszphy_resume(struct phy_device *phydev)
+{
+	int value;
+
+	mutex_lock(&phydev->lock);
+
+	value = phy_read(phydev, MII_BMCR);
+	phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
+
+	kszphy_config_intr(phydev);
+	mutex_unlock(&phydev->lock);
+
+	return 0;
+}
+
 static int kszphy_probe(struct phy_device *phydev)
 {
 	const struct kszphy_type *type = phydev->drv->driver_data;
@@ -844,7 +870,7 @@
 	.get_strings	= kszphy_get_strings,
 	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
-	.resume		= genphy_resume,
+	.resume		= kszphy_resume,
 }, {
 	.phy_id		= PHY_ID_KSZ8061,
 	.name		= "Micrel KSZ8061",

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index bad3f00..e551f3a 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c

@@ -1410,7 +1410,7 @@
 
 	features = (SUPPORTED_TP | SUPPORTED_MII
 			| SUPPORTED_AUI | SUPPORTED_FIBRE |
-			SUPPORTED_BNC);
+			SUPPORTED_BNC | SUPPORTED_Pause | SUPPORTED_Asym_Pause);
 
 	/* Do we support autonegotiation? */
 	val = phy_read(phydev, MII_BMSR);

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index fc8ad00..d61da9ec 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c

@@ -443,9 +443,14 @@
 			 * network traffic (demand mode).
 			 */
 			struct ppp *ppp = PF_TO_PPP(pf);
+
+			ppp_recv_lock(ppp);
 			if (ppp->n_channels == 0 &&
-			    (ppp->flags & SC_LOOP_TRAFFIC) == 0)
+			    (ppp->flags & SC_LOOP_TRAFFIC) == 0) {
+				ppp_recv_unlock(ppp);
 				break;
+			}
+			ppp_recv_unlock(ppp);
 		}
 		ret = -EAGAIN;
 		if (file->f_flags & O_NONBLOCK)
@@ -532,9 +537,12 @@
 	else if (pf->kind == INTERFACE) {
 		/* see comment in ppp_read */
 		struct ppp *ppp = PF_TO_PPP(pf);
+
+		ppp_recv_lock(ppp);
 		if (ppp->n_channels == 0 &&
 		    (ppp->flags & SC_LOOP_TRAFFIC) == 0)
 			mask |= POLLIN | POLLRDNORM;
+		ppp_recv_unlock(ppp);
 	}
 
 	return mask;
@@ -2808,6 +2816,7 @@
 
 out2:
 	mutex_unlock(&pn->all_ppp_mutex);
+	rtnl_unlock();
 	free_netdev(dev);
 out1:
 	*retp = ret;

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index f3c6302..4ddae81 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c

@@ -395,6 +395,8 @@
 
 		if (!__pppoe_xmit(sk_pppox(relay_po), skb))
 			goto abort_put;
+
+		sock_put(sk_pppox(relay_po));
 	} else {
 		if (sock_queue_rcv_skb(sk, skb))
 			goto abort_kfree;

diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 7f83504..cdde590 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig

@@ -395,6 +395,10 @@
 	  The protocol specification is incomplete, and is controlled by
 	  (and for) Microsoft; it isn't an "Open" ecosystem or market.
 
+config USB_NET_CDC_SUBSET_ENABLE
+	tristate
+	depends on USB_NET_CDC_SUBSET
+
 config USB_NET_CDC_SUBSET
 	tristate "Simple USB Network Links (CDC Ethernet subset)"
 	depends on USB_USBNET
@@ -413,6 +417,7 @@
 config USB_ALI_M5632
 	bool "ALi M5632 based 'USB 2.0 Data Link' cables"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	help
 	  Choose this option if you're using a host-to-host cable
 	  based on this design, which supports USB 2.0 high speed.
@@ -420,6 +425,7 @@
 config USB_AN2720
 	bool "AnchorChips 2720 based cables (Xircom PGUNET, ...)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	help
 	  Choose this option if you're using a host-to-host cable
 	  based on this design.  Note that AnchorChips is now a
@@ -428,6 +434,7 @@
 config USB_BELKIN
 	bool "eTEK based host-to-host cables (Advance, Belkin, ...)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	default y
 	help
 	  Choose this option if you're using a host-to-host cable
@@ -437,6 +444,7 @@
 config USB_ARMLINUX
 	bool "Embedded ARM Linux links (iPaq, ...)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	default y
 	help
 	  Choose this option to support the "usb-eth" networking driver
@@ -454,6 +462,7 @@
 config USB_EPSON2888
 	bool "Epson 2888 based firmware (DEVELOPMENT)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	help
 	  Choose this option to support the usb networking links used
 	  by some sample firmware from Epson.
@@ -461,6 +470,7 @@
 config USB_KC2190
 	bool "KT Technology KC2190 based cables (InstaNet)"
 	depends on USB_NET_CDC_SUBSET
+	select USB_NET_CDC_SUBSET_ENABLE
 	help
 	  Choose this option if you're using a host-to-host cable
 	  with one of these chips.

diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile
index b5f0406..37fb46ae 100644
--- a/drivers/net/usb/Makefile
+++ b/drivers/net/usb/Makefile

@@ -23,7 +23,7 @@
 obj-$(CONFIG_USB_NET_NET1080)	+= net1080.o
 obj-$(CONFIG_USB_NET_PLUSB)	+= plusb.o
 obj-$(CONFIG_USB_NET_RNDIS_HOST)	+= rndis_host.o
-obj-$(CONFIG_USB_NET_CDC_SUBSET)	+= cdc_subset.o
+obj-$(CONFIG_USB_NET_CDC_SUBSET_ENABLE)	+= cdc_subset.o
 obj-$(CONFIG_USB_NET_ZAURUS)	+= zaurus.o
 obj-$(CONFIG_USB_NET_MCS7830)	+= mcs7830.o
 obj-$(CONFIG_USB_USBNET)	+= usbnet.o

diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index 224e7d8..cf77f2d 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c

@@ -134,7 +134,6 @@
 
 	netdev_info(dev->net, "deregistering mdio bus %s\n", priv->mdio->id);
 	mdiobus_unregister(priv->mdio);
-	kfree(priv->mdio->irq);
 	mdiobus_free(priv->mdio);
 }
 

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index dc0212c..86ba30b 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c

@@ -837,7 +837,11 @@
 
 	iface_no = ctx->data->cur_altsetting->desc.bInterfaceNumber;
 
-	/* reset data interface */
+	/* Reset data interface. Some devices will not reset properly
+	 * unless they are configured first.  Toggle the altsetting to
+	 * force a reset
+	 */
+	usb_set_interface(dev->udev, iface_no, data_altsetting);
 	temp = usb_set_interface(dev->udev, iface_no, 0);
 	if (temp) {
 		dev_dbg(&intf->dev, "set interface failed\n");
@@ -984,8 +988,6 @@
 
 static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
 {
-	int ret;
-
 	/* MBIM backwards compatible function? */
 	if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
 		return -ENODEV;
@@ -994,16 +996,7 @@
 	 * Additionally, generic NCM devices are assumed to accept arbitrarily
 	 * placed NDP.
 	 */
-	ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
-
-	/*
-	 * We should get an event when network connection is "connected" or
-	 * "disconnected". Set network connection in "disconnected" state
-	 * (carrier is OFF) during attach, so the IP network stack does not
-	 * start IPv6 negotiation and more.
-	 */
-	usbnet_link_change(dev, 0, 0);
-	return ret;
+	return cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
 }
 
 static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max)
@@ -1586,7 +1579,8 @@
 
 static const struct driver_info cdc_ncm_info = {
 	.description = "CDC NCM",
-	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
+	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
+			| FLAG_LINK_INTR,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.manage_power = usbnet_manage_power,
@@ -1599,7 +1593,7 @@
 static const struct driver_info wwan_info = {
 	.description = "Mobile Broadband Network Device",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
-			| FLAG_WWAN,
+			| FLAG_LINK_INTR | FLAG_WWAN,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.manage_power = usbnet_manage_power,
@@ -1612,7 +1606,7 @@
 static const struct driver_info wwan_noarp_info = {
 	.description = "Mobile Broadband Network Device (NO ARP)",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
-			| FLAG_WWAN | FLAG_NOARP,
+			| FLAG_LINK_INTR | FLAG_WWAN | FLAG_NOARP,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.manage_power = usbnet_manage_power,

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 23e9880..a3a4ccf 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c

@@ -637,6 +637,7 @@
 
 	/* 3. Combined interface devices matching on interface number */
 	{QMI_FIXED_INTF(0x0408, 0xea42, 4)},	/* Yota / Megafon M100-1 */
+	{QMI_FIXED_INTF(0x05c6, 0x6001, 3)},	/* 4G LTE usb-modem U901 */
 	{QMI_FIXED_INTF(0x05c6, 0x7000, 0)},
 	{QMI_FIXED_INTF(0x05c6, 0x7001, 1)},
 	{QMI_FIXED_INTF(0x05c6, 0x7002, 1)},
@@ -860,8 +861,10 @@
 	{QMI_FIXED_INTF(0x1199, 0x9056, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
 	{QMI_FIXED_INTF(0x1199, 0x9061, 8)},	/* Sierra Wireless Modem */
-	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx/EM74xx */
-	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx/EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9071, 8)},	/* Sierra Wireless MC74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9071, 10)},	/* Sierra Wireless MC74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9079, 8)},	/* Sierra Wireless EM74xx */
+	{QMI_FIXED_INTF(0x1199, 0x9079, 10)},	/* Sierra Wireless EM74xx */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
 	{QMI_FIXED_INTF(0x2357, 0x0201, 4)},	/* TP-LINK HSUPA Modem MA180 */
@@ -884,6 +887,7 @@
 	{QMI_FIXED_INTF(0x413c, 0x81a8, 8)},	/* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81a9, 8)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81b1, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
+	{QMI_FIXED_INTF(0x413c, 0x81b3, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
 	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */
 	{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},	/* SIMCom 7230E */

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 0b0ba7e..1079812 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c

@@ -1769,6 +1769,13 @@
 	if (info->unbind)
 		info->unbind (dev, udev);
 out1:
+	/* subdrivers must undo all they did in bind() if they
+	 * fail it, but we may fail later and a deferred kevent
+	 * may trigger an error resubmitting itself and, worse,
+	 * schedule a timer. So we kill it all just in case.
+	 */
+	cancel_work_sync(&dev->kevent);
+	del_timer_sync(&dev->delay);
 	free_netdev(net);
 out:
 	return status;

diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h
index 221a530..72ba8ae 100644
--- a/drivers/net/vmxnet3/vmxnet3_defs.h
+++ b/drivers/net/vmxnet3/vmxnet3_defs.h

@@ -377,7 +377,7 @@
 #define VMXNET3_TX_RING_MAX_SIZE   4096
 #define VMXNET3_TC_RING_MAX_SIZE   4096
 #define VMXNET3_RX_RING_MAX_SIZE   4096
-#define VMXNET3_RX_RING2_MAX_SIZE  2048
+#define VMXNET3_RX_RING2_MAX_SIZE  4096
 #define VMXNET3_RC_RING_MAX_SIZE   8192
 
 /* a list of reasons for queue stop */

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 0cbf520..fc895d0 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c

@@ -814,7 +814,7 @@
 
 
 /*
- *    parse and copy relevant protocol headers:
+ *    parse relevant protocol headers:
  *      For a tso pkt, relevant headers are L2/3/4 including options
  *      For a pkt requesting csum offloading, they are L2/3 and may include L4
  *      if it's a TCP/UDP pkt
@@ -827,15 +827,14 @@
  * Other effects:
  *    1. related *ctx fields are updated.
  *    2. ctx->copy_size is # of bytes copied
- *    3. the portion copied is guaranteed to be in the linear part
+ *    3. the portion to be copied is guaranteed to be in the linear part
  *
  */
 static int
-vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
-			   struct vmxnet3_tx_ctx *ctx,
-			   struct vmxnet3_adapter *adapter)
+vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
+		  struct vmxnet3_tx_ctx *ctx,
+		  struct vmxnet3_adapter *adapter)
 {
-	struct Vmxnet3_TxDataDesc *tdd;
 	u8 protocol = 0;
 
 	if (ctx->mss) {	/* TSO */
@@ -892,16 +891,34 @@
 		return 0;
 	}
 
+	return 1;
+err:
+	return -1;
+}
+
+/*
+ *    copy relevant protocol headers to the transmit ring:
+ *      For a tso pkt, relevant headers are L2/3/4 including options
+ *      For a pkt requesting csum offloading, they are L2/3 and may include L4
+ *      if it's a TCP/UDP pkt
+ *
+ *
+ *    Note that this requires that vmxnet3_parse_hdr be called first to set the
+ *      appropriate bits in ctx first
+ */
+static void
+vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
+		 struct vmxnet3_tx_ctx *ctx,
+		 struct vmxnet3_adapter *adapter)
+{
+	struct Vmxnet3_TxDataDesc *tdd;
+
 	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
 
 	memcpy(tdd->data, skb->data, ctx->copy_size);
 	netdev_dbg(adapter->netdev,
 		"copy %u bytes to dataRing[%u]\n",
 		ctx->copy_size, tq->tx_ring.next2fill);
-	return 1;
-
-err:
-	return -1;
 }
 
 
@@ -998,22 +1015,7 @@
 		}
 	}
 
-	spin_lock_irqsave(&tq->tx_lock, flags);
-
-	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
-		tq->stats.tx_ring_full++;
-		netdev_dbg(adapter->netdev,
-			"tx queue stopped on %s, next2comp %u"
-			" next2fill %u\n", adapter->netdev->name,
-			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
-
-		vmxnet3_tq_stop(tq, adapter);
-		spin_unlock_irqrestore(&tq->tx_lock, flags);
-		return NETDEV_TX_BUSY;
-	}
-
-
-	ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
+	ret = vmxnet3_parse_hdr(skb, tq, &ctx, adapter);
 	if (ret >= 0) {
 		BUG_ON(ret <= 0 && ctx.copy_size != 0);
 		/* hdrs parsed, check against other limits */
@@ -1033,9 +1035,26 @@
 		}
 	} else {
 		tq->stats.drop_hdr_inspect_err++;
-		goto unlock_drop_pkt;
+		goto drop_pkt;
 	}
 
+	spin_lock_irqsave(&tq->tx_lock, flags);
+
+	if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
+		tq->stats.tx_ring_full++;
+		netdev_dbg(adapter->netdev,
+			"tx queue stopped on %s, next2comp %u"
+			" next2fill %u\n", adapter->netdev->name,
+			tq->tx_ring.next2comp, tq->tx_ring.next2fill);
+
+		vmxnet3_tq_stop(tq, adapter);
+		spin_unlock_irqrestore(&tq->tx_lock, flags);
+		return NETDEV_TX_BUSY;
+	}
+
+
+	vmxnet3_copy_hdr(skb, tq, &ctx, adapter);
+
 	/* fill tx descs related to addr & len */
 	if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
 		goto unlock_drop_pkt;

diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index bdb8a6c..729c344 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h

@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.5.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.6.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040500
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040600
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 66addb7..bdcf617 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c

@@ -104,20 +104,23 @@
 #if IS_ENABLED(CONFIG_IPV6)
 static bool check_ipv6_frame(const struct sk_buff *skb)
 {
-	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
-	size_t hlen = sizeof(*ipv6h);
+	const struct ipv6hdr *ipv6h;
+	struct ipv6hdr _ipv6h;
 	bool rc = true;
 
-	if (skb->len < hlen)
+	ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h);
+	if (!ipv6h)
 		goto out;
 
 	if (ipv6h->nexthdr == NEXTHDR_ICMP) {
 		const struct icmp6hdr *icmph;
+		struct icmp6hdr _icmph;
 
-		if (skb->len < hlen + sizeof(*icmph))
+		icmph = skb_header_pointer(skb, sizeof(_ipv6h),
+					   sizeof(_icmph), &_icmph);
+		if (!icmph)
 			goto out;
 
-		icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h));
 		switch (icmph->icmp6_type) {
 		case NDISC_ROUTER_SOLICITATION:
 		case NDISC_ROUTER_ADVERTISEMENT:

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a31cd95..1c32bd1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c

@@ -931,8 +931,10 @@
 						     cb->nlh->nlmsg_seq,
 						     RTM_NEWNEIGH,
 						     NLM_F_MULTI, rd);
-				if (err < 0)
+				if (err < 0) {
+					cb->args[1] = err;
 					goto out;
+				}
 skip:
 				++idx;
 			}
@@ -1306,8 +1308,10 @@
 		gbp = (struct vxlanhdr_gbp *)vxh;
 		md->gbp = ntohs(gbp->policy_id);
 
-		if (tun_dst)
+		if (tun_dst) {
 			tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
+			tun_dst->u.tun_info.options_len = sizeof(*md);
+		}
 
 		if (gbp->dont_learn)
 			md->gbp |= VXLAN_GBP_DONT_LEARN;
@@ -2171,9 +2175,11 @@
 #endif
 	}
 
-	if (vxlan->flags & VXLAN_F_COLLECT_METADATA &&
-	    info && info->mode & IP_TUNNEL_INFO_TX) {
-		vxlan_xmit_one(skb, dev, NULL, false);
+	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+		if (info && info->mode & IP_TUNNEL_INFO_TX)
+			vxlan_xmit_one(skb, dev, NULL, false);
+		else
+			kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
 
@@ -2537,6 +2543,7 @@
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
 	netif_keep_dst(dev);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
 
 	INIT_LIST_HEAD(&vxlan->next);

diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 7a72407..6292259 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c

@@ -1626,7 +1626,7 @@
 		if (state & Xpr) {
 			void __iomem *scc_addr;
 			unsigned long ring;
-			int i;
+			unsigned int i;
 
 			/*
 			 * - the busy condition happens (sometimes);

diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig
index 8660677..7438fbe 100644
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig

@@ -53,7 +53,6 @@
 
 config IWLDVM
 	tristate "Intel Wireless WiFi DVM Firmware support"
-	depends on m
 	help
 	  This is the driver that supports the DVM firmware. The list
 	  of the devices that use this firmware is available here:

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
index c84a029..bce9b3420 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c

@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -70,12 +71,15 @@
 
 /* Highest firmware API version supported */
 #define IWL8000_UCODE_API_MAX	20
+#define IWL8265_UCODE_API_MAX	20
 
 /* Oldest version we won't warn about */
 #define IWL8000_UCODE_API_OK	13
+#define IWL8265_UCODE_API_OK	20
 
 /* Lowest firmware API version supported */
 #define IWL8000_UCODE_API_MIN	13
+#define IWL8265_UCODE_API_MIN	20
 
 /* NVM versions */
 #define IWL8000_NVM_VERSION		0x0a1d
@@ -93,6 +97,10 @@
 #define IWL8000_MODULE_FIRMWARE(api) \
 	IWL8000_FW_PRE "-" __stringify(api) ".ucode"
 
+#define IWL8265_FW_PRE "iwlwifi-8265-"
+#define IWL8265_MODULE_FIRMWARE(api) \
+	IWL8265_FW_PRE __stringify(api) ".ucode"
+
 #define NVM_HW_SECTION_NUM_FAMILY_8000		10
 #define DEFAULT_NVM_FILE_FAMILY_8000B		"nvmData-8000B"
 #define DEFAULT_NVM_FILE_FAMILY_8000C		"nvmData-8000C"
@@ -144,10 +152,7 @@
 	.support_tx_backoff = true,
 };
 
-#define IWL_DEVICE_8000							\
-	.ucode_api_max = IWL8000_UCODE_API_MAX,				\
-	.ucode_api_ok = IWL8000_UCODE_API_OK,				\
-	.ucode_api_min = IWL8000_UCODE_API_MIN,				\
+#define IWL_DEVICE_8000_COMMON						\
 	.device_family = IWL_DEVICE_FAMILY_8000,			\
 	.max_inst_size = IWL60_RTC_INST_SIZE,				\
 	.max_data_size = IWL60_RTC_DATA_SIZE,				\
@@ -167,10 +172,28 @@
 	.thermal_params = &iwl8000_tt_params,				\
 	.apmg_not_supported = true
 
+#define IWL_DEVICE_8000							\
+	IWL_DEVICE_8000_COMMON,						\
+	.ucode_api_max = IWL8000_UCODE_API_MAX,				\
+	.ucode_api_ok = IWL8000_UCODE_API_OK,				\
+	.ucode_api_min = IWL8000_UCODE_API_MIN				\
+
+#define IWL_DEVICE_8260							\
+	IWL_DEVICE_8000_COMMON,						\
+	.ucode_api_max = IWL8000_UCODE_API_MAX,				\
+	.ucode_api_ok = IWL8000_UCODE_API_OK,				\
+	.ucode_api_min = IWL8000_UCODE_API_MIN				\
+
+#define IWL_DEVICE_8265							\
+	IWL_DEVICE_8000_COMMON,						\
+	.ucode_api_max = IWL8265_UCODE_API_MAX,				\
+	.ucode_api_ok = IWL8265_UCODE_API_OK,				\
+	.ucode_api_min = IWL8265_UCODE_API_MIN				\
+
 const struct iwl_cfg iwl8260_2n_cfg = {
 	.name = "Intel(R) Dual Band Wireless N 8260",
 	.fw_name_pre = IWL8000_FW_PRE,
-	IWL_DEVICE_8000,
+	IWL_DEVICE_8260,
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -179,7 +202,7 @@
 const struct iwl_cfg iwl8260_2ac_cfg = {
 	.name = "Intel(R) Dual Band Wireless AC 8260",
 	.fw_name_pre = IWL8000_FW_PRE,
-	IWL_DEVICE_8000,
+	IWL_DEVICE_8260,
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -188,8 +211,8 @@
 
 const struct iwl_cfg iwl8265_2ac_cfg = {
 	.name = "Intel(R) Dual Band Wireless AC 8265",
-	.fw_name_pre = IWL8000_FW_PRE,
-	IWL_DEVICE_8000,
+	.fw_name_pre = IWL8265_FW_PRE,
+	IWL_DEVICE_8265,
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -209,7 +232,7 @@
 const struct iwl_cfg iwl8260_2ac_sdio_cfg = {
 	.name = "Intel(R) Dual Band Wireless-AC 8260",
 	.fw_name_pre = IWL8000_FW_PRE,
-	IWL_DEVICE_8000,
+	IWL_DEVICE_8260,
 	.ht_params = &iwl8000_ht_params,
 	.nvm_ver = IWL8000_NVM_VERSION,
 	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
@@ -236,3 +259,4 @@
 };
 
 MODULE_FIRMWARE(IWL8000_MODULE_FIRMWARE(IWL8000_UCODE_API_OK));
+MODULE_FIRMWARE(IWL8265_MODULE_FIRMWARE(IWL8265_UCODE_API_OK));

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 7acb490..ab4c2a0 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c

@@ -243,8 +243,10 @@
 	if (drv->trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) {
 		char rev_step = 'A' + CSR_HW_REV_STEP(drv->trans->hw_rev);
 
-		snprintf(drv->firmware_name, sizeof(drv->firmware_name),
-			 "%s%c-%s.ucode", name_pre, rev_step, tag);
+		if (rev_step != 'A')
+			snprintf(drv->firmware_name,
+				 sizeof(drv->firmware_name), "%s%c-%s.ucode",
+				 name_pre, rev_step, tag);
 	}
 
 	IWL_DEBUG_INFO(drv, "attempting to load firmware %s'%s'\n",

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 4ed5180..0ccc697 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c

@@ -107,7 +107,7 @@
 				    sizeof(tx_ant_cmd), &tx_ant_cmd);
 }
 
-static void iwl_free_fw_paging(struct iwl_mvm *mvm)
+void iwl_free_fw_paging(struct iwl_mvm *mvm)
 {
 	int i;
 
@@ -127,6 +127,8 @@
 			     get_order(mvm->fw_paging_db[i].fw_paging_size));
 	}
 	kfree(mvm->trans->paging_download_buf);
+	mvm->trans->paging_download_buf = NULL;
+
 	memset(mvm->fw_paging_db, 0, sizeof(mvm->fw_paging_db));
 }
 

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 5f3ac8c..ff7c6df 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h

@@ -1225,6 +1225,9 @@
 void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm,
 					      struct iwl_rx_cmd_buffer *rxb);
 
+/* Paging */
+void iwl_free_fw_paging(struct iwl_mvm *mvm);
+
 /* MVM debugfs */
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir);

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 89ea70d..e80be9a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c

@@ -684,6 +684,8 @@
 	for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++)
 		kfree(mvm->nvm_sections[i].data);
 
+	iwl_free_fw_paging(mvm);
+
 	iwl_mvm_tof_clean(mvm);
 
 	ieee80211_free_hw(mvm->hw);

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 9a15642..ea1e177 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c

@@ -1298,6 +1298,10 @@
 		return -EBUSY;
 	}
 
+	/* we don't support "match all" in the firmware */
+	if (!req->n_match_sets)
+		return -EOPNOTSUPP;
+
 	ret = iwl_mvm_check_running_scans(mvm, type);
 	if (ret)
 		return ret;

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 0914ec2..a040edc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c

@@ -423,6 +423,15 @@
 		return -1;
 	}
 
+	/*
+	 * Increase the pending frames counter, so that later when a reply comes
+	 * in and the counter is decreased - we don't start getting negative
+	 * values.
+	 * Note that we don't need to make sure it isn't agg'd, since we're
+	 * TXing non-sta
+	 */
+	atomic_inc(&mvm->pending_frames[sta_id]);
+
 	return 0;
 }
 

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index cc3888e..73c9559 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h

@@ -490,6 +490,15 @@
 	iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
 }
 
+static inline void iwl_enable_fw_load_int(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	IWL_DEBUG_ISR(trans, "Enabling FW load interrupt\n");
+	trans_pcie->inta_mask = CSR_INT_BIT_FH_TX;
+	iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
+}
+
 static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index ccafbd8..152cf9a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c

@@ -1438,9 +1438,11 @@
 			 inta & ~trans_pcie->inta_mask);
 	}
 
-	/* Re-enable all interrupts */
-	/* only Re-enable if disabled by irq */
-	if (test_bit(STATUS_INT_ENABLED, &trans->status))
+	/* we are loading the firmware, enable FH_TX interrupt only */
+	if (handled & CSR_INT_BIT_FH_TX)
+		iwl_enable_fw_load_int(trans);
+	/* only Re-enable all interrupt if disabled by irq */
+	else if (test_bit(STATUS_INT_ENABLED, &trans->status))
 		iwl_enable_interrupts(trans);
 	/* Re-enable RF_KILL if it occurred */
 	else if (handled & CSR_INT_BIT_RF_KILL)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index d60a467..5a854c6 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c

@@ -1021,82 +1021,6 @@
 					       &first_ucode_section);
 }
 
-static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
-				   const struct fw_img *fw, bool run_in_rfkill)
-{
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	bool hw_rfkill;
-	int ret;
-
-	mutex_lock(&trans_pcie->mutex);
-
-	/* Someone called stop_device, don't try to start_fw */
-	if (trans_pcie->is_down) {
-		IWL_WARN(trans,
-			 "Can't start_fw since the HW hasn't been started\n");
-		ret = EIO;
-		goto out;
-	}
-
-	/* This may fail if AMT took ownership of the device */
-	if (iwl_pcie_prepare_card_hw(trans)) {
-		IWL_WARN(trans, "Exit HW not ready\n");
-		ret = -EIO;
-		goto out;
-	}
-
-	iwl_enable_rfkill_int(trans);
-
-	/* If platform's RF_KILL switch is NOT set to KILL */
-	hw_rfkill = iwl_is_rfkill_set(trans);
-	if (hw_rfkill)
-		set_bit(STATUS_RFKILL, &trans->status);
-	else
-		clear_bit(STATUS_RFKILL, &trans->status);
-	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
-	if (hw_rfkill && !run_in_rfkill) {
-		ret = -ERFKILL;
-		goto out;
-	}
-
-	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
-
-	ret = iwl_pcie_nic_init(trans);
-	if (ret) {
-		IWL_ERR(trans, "Unable to init nic\n");
-		goto out;
-	}
-
-	/* make sure rfkill handshake bits are cleared */
-	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
-		    CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
-
-	/* clear (again), then enable host interrupts */
-	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
-	iwl_enable_interrupts(trans);
-
-	/* really make sure rfkill handshake bits are cleared */
-	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
-
-	/* Load the given image to the HW */
-	if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
-		ret = iwl_pcie_load_given_ucode_8000(trans, fw);
-	else
-		ret = iwl_pcie_load_given_ucode(trans, fw);
-
-out:
-	mutex_unlock(&trans_pcie->mutex);
-	return ret;
-}
-
-static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
-{
-	iwl_pcie_reset_ict(trans);
-	iwl_pcie_tx_start(trans, scd_addr);
-}
-
 static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -1127,7 +1051,8 @@
 	 * already dead.
 	 */
 	if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
-		IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n");
+		IWL_DEBUG_INFO(trans,
+			       "DEVICE_ENABLED bit was set and is now cleared\n");
 		iwl_pcie_tx_stop(trans);
 		iwl_pcie_rx_stop(trans);
 
@@ -1161,7 +1086,6 @@
 	iwl_disable_interrupts(trans);
 	spin_unlock(&trans_pcie->irq_lock);
 
-
 	/* clear all status bits */
 	clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
 	clear_bit(STATUS_INT_ENABLED, &trans->status);
@@ -1194,10 +1118,116 @@
 	if (hw_rfkill != was_hw_rfkill)
 		iwl_trans_pcie_rf_kill(trans, hw_rfkill);
 
-	/* re-take ownership to prevent other users from stealing the deivce */
+	/* re-take ownership to prevent other users from stealing the device */
 	iwl_pcie_prepare_card_hw(trans);
 }
 
+static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
+				   const struct fw_img *fw, bool run_in_rfkill)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	bool hw_rfkill;
+	int ret;
+
+	/* This may fail if AMT took ownership of the device */
+	if (iwl_pcie_prepare_card_hw(trans)) {
+		IWL_WARN(trans, "Exit HW not ready\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	iwl_enable_rfkill_int(trans);
+
+	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
+
+	/*
+	 * We enabled the RF-Kill interrupt and the handler may very
+	 * well be running. Disable the interrupts to make sure no other
+	 * interrupt can be fired.
+	 */
+	iwl_disable_interrupts(trans);
+
+	/* Make sure it finished running */
+	synchronize_irq(trans_pcie->pci_dev->irq);
+
+	mutex_lock(&trans_pcie->mutex);
+
+	/* If platform's RF_KILL switch is NOT set to KILL */
+	hw_rfkill = iwl_is_rfkill_set(trans);
+	if (hw_rfkill)
+		set_bit(STATUS_RFKILL, &trans->status);
+	else
+		clear_bit(STATUS_RFKILL, &trans->status);
+	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+	if (hw_rfkill && !run_in_rfkill) {
+		ret = -ERFKILL;
+		goto out;
+	}
+
+	/* Someone called stop_device, don't try to start_fw */
+	if (trans_pcie->is_down) {
+		IWL_WARN(trans,
+			 "Can't start_fw since the HW hasn't been started\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	/* make sure rfkill handshake bits are cleared */
+	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
+		    CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
+
+	/* clear (again), then enable host interrupts */
+	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
+
+	ret = iwl_pcie_nic_init(trans);
+	if (ret) {
+		IWL_ERR(trans, "Unable to init nic\n");
+		goto out;
+	}
+
+	/*
+	 * Now, we load the firmware and don't want to be interrupted, even
+	 * by the RF-Kill interrupt (hence mask all the interrupt besides the
+	 * FH_TX interrupt which is needed to load the firmware). If the
+	 * RF-Kill switch is toggled, we will find out after having loaded
+	 * the firmware and return the proper value to the caller.
+	 */
+	iwl_enable_fw_load_int(trans);
+
+	/* really make sure rfkill handshake bits are cleared */
+	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
+
+	/* Load the given image to the HW */
+	if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
+		ret = iwl_pcie_load_given_ucode_8000(trans, fw);
+	else
+		ret = iwl_pcie_load_given_ucode(trans, fw);
+	iwl_enable_interrupts(trans);
+
+	/* re-check RF-Kill state since we may have missed the interrupt */
+	hw_rfkill = iwl_is_rfkill_set(trans);
+	if (hw_rfkill)
+		set_bit(STATUS_RFKILL, &trans->status);
+	else
+		clear_bit(STATUS_RFKILL, &trans->status);
+
+	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+	if (hw_rfkill && !run_in_rfkill)
+		ret = -ERFKILL;
+
+out:
+	mutex_unlock(&trans_pcie->mutex);
+	return ret;
+}
+
+static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
+{
+	iwl_pcie_reset_ict(trans);
+	iwl_pcie_tx_start(trans, scd_addr);
+}
+
 static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);

diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c
index 74c14ce..28f7010 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rc.c

@@ -138,6 +138,11 @@
 		    ((wireless_mode == WIRELESS_MODE_N_5G) ||
 		     (wireless_mode == WIRELESS_MODE_N_24G)))
 			rate->flags |= IEEE80211_TX_RC_MCS;
+		if (sta && sta->vht_cap.vht_supported &&
+		    (wireless_mode == WIRELESS_MODE_AC_5G ||
+		     wireless_mode == WIRELESS_MODE_AC_24G ||
+		     wireless_mode == WIRELESS_MODE_AC_ONLY))
+			rate->flags |= IEEE80211_TX_RC_VHT_MCS;
 	}
 }
 

diff --git a/drivers/net/wireless/ti/wlcore/io.c b/drivers/net/wireless/ti/wlcore/io.c
index 9ac118e..564ca75 100644
--- a/drivers/net/wireless/ti/wlcore/io.c
+++ b/drivers/net/wireless/ti/wlcore/io.c

@@ -175,14 +175,14 @@
 	if (ret < 0)
 		goto out;
 
+	/* We don't need the size of the last partition, as it is
+	 * automatically calculated based on the total memory size and
+	 * the sizes of the previous partitions.
+	 */
 	ret = wlcore_raw_write32(wl, HW_PART3_START_ADDR, p->mem3.start);
 	if (ret < 0)
 		goto out;
 
-	ret = wlcore_raw_write32(wl, HW_PART3_SIZE_ADDR, p->mem3.size);
-	if (ret < 0)
-		goto out;
-
 out:
 	return ret;
 }

diff --git a/drivers/net/wireless/ti/wlcore/io.h b/drivers/net/wireless/ti/wlcore/io.h
index 6c257b5..10cf374 100644
--- a/drivers/net/wireless/ti/wlcore/io.h
+++ b/drivers/net/wireless/ti/wlcore/io.h

@@ -36,8 +36,8 @@
 #define HW_PART1_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 12)
 #define HW_PART2_SIZE_ADDR              (HW_PARTITION_REGISTERS_ADDR + 16)
 #define HW_PART2_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 20)
-#define HW_PART3_SIZE_ADDR              (HW_PARTITION_REGISTERS_ADDR + 24)
-#define HW_PART3_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 28)
+#define HW_PART3_START_ADDR             (HW_PARTITION_REGISTERS_ADDR + 24)
+
 #define HW_ACCESS_REGISTER_SIZE         4
 
 #define HW_ACCESS_PRAM_MAX_RANGE	0x3c000

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 7e2c43f..5d28e94 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c

@@ -382,18 +382,18 @@
 	[ND_CMD_ARS_CAP] = {
 		.in_num = 2,
 		.in_sizes = { 8, 8, },
+		.out_num = 4,
+		.out_sizes = { 4, 4, 4, 4, },
+	},
+	[ND_CMD_ARS_START] = {
+		.in_num = 5,
+		.in_sizes = { 8, 8, 2, 1, 5, },
 		.out_num = 2,
 		.out_sizes = { 4, 4, },
 	},
-	[ND_CMD_ARS_START] = {
-		.in_num = 4,
-		.in_sizes = { 8, 8, 2, 6, },
-		.out_num = 1,
-		.out_sizes = { 4, },
-	},
 	[ND_CMD_ARS_STATUS] = {
-		.out_num = 2,
-		.out_sizes = { 4, UINT_MAX, },
+		.out_num = 3,
+		.out_sizes = { 4, 4, UINT_MAX, },
 	},
 };
 
@@ -442,8 +442,8 @@
 		return in_field[1];
 	else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
 		return out_field[1];
-	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
-		return ND_CMD_ARS_STATUS_MAX;
+	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2)
+		return out_field[1] - 8;
 
 	return UINT_MAX;
 }

diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index b0045a5..95825b3 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c

@@ -55,7 +55,7 @@
 	for (p = iomem_resource.child; p ; p = p->sibling) {
 		struct nd_region_desc ndr_desc;
 
-		if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0)
+		if (p->desc != IORES_DESC_PERSISTENT_MEMORY_LEGACY)
 			continue;
 
 		memset(&ndr_desc, 0, sizeof(ndr_desc));

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7edf316..8d0b546 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c

@@ -41,7 +41,7 @@
 	phys_addr_t		phys_addr;
 	/* when non-zero this device is hosting a 'pfn' instance */
 	phys_addr_t		data_offset;
-	unsigned long		pfn_flags;
+	u64			pfn_flags;
 	void __pmem		*virt_addr;
 	size_t			size;
 	struct badblocks	bb;

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3cd921e..03c4641 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c

@@ -55,8 +55,9 @@
 	ns->disk->private_data = NULL;
 	spin_unlock(&dev_list_lock);
 
-	nvme_put_ctrl(ns->ctrl);
 	put_disk(ns->disk);
+	ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
+	nvme_put_ctrl(ns->ctrl);
 	kfree(ns);
 }
 
@@ -183,7 +184,7 @@
 			goto out_unmap;
 		}
 
-		if (meta_buffer) {
+		if (meta_buffer && meta_len) {
 			struct bio_integrity_payload *bip;
 
 			meta = kmalloc(meta_len, GFP_KERNEL);
@@ -373,6 +374,8 @@
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
+	if (io.flags)
+		return -EINVAL;
 
 	switch (io.opcode) {
 	case nvme_cmd_write:
@@ -424,6 +427,8 @@
 		return -EACCES;
 	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
+	if (cmd.flags)
+		return -EINVAL;
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = cmd.opcode;
@@ -556,6 +561,10 @@
 	u16 old_ms;
 	unsigned short bs;
 
+	if (test_bit(NVME_NS_DEAD, &ns->flags)) {
+		set_capacity(disk, 0);
+		return -ENODEV;
+	}
 	if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
 		dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
 				__func__, ns->ctrl->instance, ns->ns_id);
@@ -831,6 +840,23 @@
 	return ret;
 }
 
+static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
+		struct request_queue *q)
+{
+	if (ctrl->max_hw_sectors) {
+		u32 max_segments =
+			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
+
+		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
+		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
+	}
+	if (ctrl->stripe_size)
+		blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9);
+	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+		blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
+	blk_queue_virt_boundary(q, ctrl->page_size - 1);
+}
+
 /*
  * Initialize the cached copies of the Identify data and various controller
  * register in our nvme_ctrl structure.  This should be called as soon as
@@ -888,6 +914,8 @@
 		}
 	}
 
+	nvme_set_queue_limits(ctrl, ctrl->admin_q);
+
 	kfree(id);
 	return 0;
 }
@@ -1118,9 +1146,13 @@
 	if (!ns)
 		return;
 
+	ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
+	if (ns->instance < 0)
+		goto out_free_ns;
+
 	ns->queue = blk_mq_init_queue(ctrl->tagset);
 	if (IS_ERR(ns->queue))
-		goto out_free_ns;
+		goto out_release_instance;
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
 	ns->queue->queuedata = ns;
 	ns->ctrl = ctrl;
@@ -1134,17 +1166,9 @@
 	ns->disk = disk;
 	ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
 
+
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
-	if (ctrl->max_hw_sectors) {
-		blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
-		blk_queue_max_segments(ns->queue,
-			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
-	}
-	if (ctrl->stripe_size)
-		blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
-	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
-		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
-	blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
+	nvme_set_queue_limits(ctrl, ns->queue);
 
 	disk->major = nvme_major;
 	disk->first_minor = 0;
@@ -1153,7 +1177,7 @@
 	disk->queue = ns->queue;
 	disk->driverfs_dev = ctrl->device;
 	disk->flags = GENHD_FL_EXT_DEVT;
-	sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
+	sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
 
 	if (nvme_revalidate_disk(ns->disk))
 		goto out_free_disk;
@@ -1173,40 +1197,29 @@
 	kfree(disk);
  out_free_queue:
 	blk_cleanup_queue(ns->queue);
+ out_release_instance:
+	ida_simple_remove(&ctrl->ns_ida, ns->instance);
  out_free_ns:
 	kfree(ns);
 }
 
 static void nvme_ns_remove(struct nvme_ns *ns)
 {
-	bool kill = nvme_io_incapable(ns->ctrl) &&
-			!blk_queue_dying(ns->queue);
+	if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
+		return;
 
-	lockdep_assert_held(&ns->ctrl->namespaces_mutex);
-
-	if (kill) {
-		blk_set_queue_dying(ns->queue);
-
-		/*
-		 * The controller was shutdown first if we got here through
-		 * device removal. The shutdown may requeue outstanding
-		 * requests. These need to be aborted immediately so
-		 * del_gendisk doesn't block indefinitely for their completion.
-		 */
-		blk_mq_abort_requeue_list(ns->queue);
-	}
 	if (ns->disk->flags & GENHD_FL_UP) {
 		if (blk_get_integrity(ns->disk))
 			blk_integrity_unregister(ns->disk);
 		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_attr_group);
 		del_gendisk(ns->disk);
-	}
-	if (kill || !blk_queue_dying(ns->queue)) {
 		blk_mq_abort_requeue_list(ns->queue);
 		blk_cleanup_queue(ns->queue);
 	}
+	mutex_lock(&ns->ctrl->namespaces_mutex);
 	list_del_init(&ns->list);
+	mutex_unlock(&ns->ctrl->namespaces_mutex);
 	nvme_put_ns(ns);
 }
 
@@ -1300,10 +1313,8 @@
 {
 	struct nvme_ns *ns, *next;
 
-	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
 		nvme_ns_remove(ns);
-	mutex_unlock(&ctrl->namespaces_mutex);
 }
 
 static DEFINE_IDA(nvme_instance_ida);
@@ -1350,6 +1361,7 @@
 
 	put_device(ctrl->device);
 	nvme_release_instance(ctrl);
+	ida_destroy(&ctrl->ns_ida);
 
 	ctrl->ops->free_ctrl(ctrl);
 }
@@ -1390,6 +1402,7 @@
 	}
 	get_device(ctrl->device);
 	dev_set_drvdata(ctrl->device, ctrl);
+	ida_init(&ctrl->ns_ida);
 
 	spin_lock(&dev_list_lock);
 	list_add_tail(&ctrl->node, &nvme_ctrl_list);
@@ -1402,6 +1415,38 @@
 	return ret;
 }
 
+/**
+ * nvme_kill_queues(): Ends all namespace queues
+ * @ctrl: the dead controller that needs to end
+ *
+ * Call this function when the driver determines it is unable to get the
+ * controller in a state capable of servicing IO.
+ */
+void nvme_kill_queues(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
+		if (!kref_get_unless_zero(&ns->kref))
+			continue;
+
+		/*
+		 * Revalidating a dead namespace sets capacity to 0. This will
+		 * end buffered writers dirtying pages that can't be synced.
+		 */
+		if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+			revalidate_disk(ns->disk);
+
+		blk_set_queue_dying(ns->queue);
+		blk_mq_abort_requeue_list(ns->queue);
+		blk_mq_start_stopped_hw_queues(ns->queue, true);
+
+		nvme_put_ns(ns);
+	}
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+
 void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9664d07..fb15ba5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h

@@ -72,6 +72,7 @@
 	struct mutex namespaces_mutex;
 	struct device *device;	/* char device */
 	struct list_head node;
+	struct ida ns_ida;
 
 	char name[12];
 	char serial[20];
@@ -102,6 +103,7 @@
 	struct request_queue *queue;
 	struct gendisk *disk;
 	struct kref kref;
+	int instance;
 
 	u8 eui[8];
 	u8 uuid[16];
@@ -112,6 +114,11 @@
 	bool ext;
 	u8 pi_type;
 	int type;
+	unsigned long flags;
+
+#define NVME_NS_REMOVING 0
+#define NVME_NS_DEAD     1
+
 	u64 mode_select_num_blocks;
 	u32 mode_select_block_len;
 };
@@ -240,6 +247,7 @@
 
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
+void nvme_kill_queues(struct nvme_ctrl *ctrl);
 
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags);

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a128672..680f578 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c

@@ -86,7 +86,6 @@
 
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
 /*
@@ -120,6 +119,7 @@
 	unsigned long flags;
 
 #define NVME_CTRL_RESETTING    0
+#define NVME_CTRL_REMOVING     1
 
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
@@ -286,6 +286,17 @@
 	return 0;
 }
 
+static void nvme_queue_scan(struct nvme_dev *dev)
+{
+	/*
+	 * Do not queue new scan work when a controller is reset during
+	 * removal.
+	 */
+	if (test_bit(NVME_CTRL_REMOVING, &dev->flags))
+		return;
+	queue_work(nvme_workq, &dev->scan_work);
+}
+
 static void nvme_complete_async_event(struct nvme_dev *dev,
 		struct nvme_completion *cqe)
 {
@@ -300,7 +311,7 @@
 	switch (result & 0xff07) {
 	case NVME_AER_NOTICE_NS_CHANGED:
 		dev_info(dev->dev, "rescanning\n");
-		queue_work(nvme_workq, &dev->scan_work);
+		nvme_queue_scan(dev);
 	default:
 		dev_warn(dev->dev, "async event result %08x\n", result);
 	}
@@ -679,7 +690,10 @@
 
 	spin_lock_irq(&nvmeq->q_lock);
 	if (unlikely(nvmeq->cq_vector < 0)) {
-		ret = BLK_MQ_RQ_QUEUE_BUSY;
+		if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
+			ret = BLK_MQ_RQ_QUEUE_BUSY;
+		else
+			ret = BLK_MQ_RQ_QUEUE_ERROR;
 		spin_unlock_irq(&nvmeq->q_lock);
 		goto out;
 	}
@@ -1250,6 +1264,12 @@
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
 	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
+		/*
+		 * If the controller was reset during removal, it's possible
+		 * user requests may be waiting on a stopped queue. Start the
+		 * queue to flush these to completion.
+		 */
+		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -1690,14 +1710,14 @@
 			return 0;
 		dev->ctrl.tagset = &dev->tagset;
 	}
-	queue_work(nvme_workq, &dev->scan_work);
+	nvme_queue_scan(dev);
 	return 0;
 }
 
-static int nvme_dev_map(struct nvme_dev *dev)
+static int nvme_pci_enable(struct nvme_dev *dev)
 {
 	u64 cap;
-	int bars, result = -ENOMEM;
+	int result = -ENOMEM;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	if (pci_enable_device_mem(pdev))
@@ -1705,24 +1725,14 @@
 
 	dev->entry[0].vector = pdev->irq;
 	pci_set_master(pdev);
-	bars = pci_select_bars(pdev, IORESOURCE_MEM);
-	if (!bars)
-		goto disable_pci;
-
-	if (pci_request_selected_regions(pdev, bars, "nvme"))
-		goto disable_pci;
 
 	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
 	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
 		goto disable;
 
-	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-	if (!dev->bar)
-		goto disable;
-
 	if (readl(dev->bar + NVME_REG_CSTS) == -1) {
 		result = -ENODEV;
-		goto unmap;
+		goto disable;
 	}
 
 	/*
@@ -1732,7 +1742,7 @@
 	if (!pdev->irq) {
 		result = pci_enable_msix(pdev, dev->entry, 1);
 		if (result < 0)
-			goto unmap;
+			goto disable;
 	}
 
 	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
@@ -1759,18 +1769,20 @@
 	pci_save_state(pdev);
 	return 0;
 
- unmap:
-	iounmap(dev->bar);
-	dev->bar = NULL;
  disable:
-	pci_release_regions(pdev);
- disable_pci:
 	pci_disable_device(pdev);
 	return result;
 }
 
 static void nvme_dev_unmap(struct nvme_dev *dev)
 {
+	if (dev->bar)
+		iounmap(dev->bar);
+	pci_release_regions(to_pci_dev(dev->dev));
+}
+
+static void nvme_pci_disable(struct nvme_dev *dev)
+{
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	if (pdev->msi_enabled)
@@ -1778,12 +1790,6 @@
 	else if (pdev->msix_enabled)
 		pci_disable_msix(pdev);
 
-	if (dev->bar) {
-		iounmap(dev->bar);
-		dev->bar = NULL;
-		pci_release_regions(pdev);
-	}
-
 	if (pci_is_enabled(pdev)) {
 		pci_disable_pcie_error_reporting(pdev);
 		pci_disable_device(pdev);
@@ -1842,7 +1848,7 @@
 	nvme_dev_list_remove(dev);
 
 	mutex_lock(&dev->shutdown_lock);
-	if (dev->bar) {
+	if (pci_is_enabled(to_pci_dev(dev->dev))) {
 		nvme_stop_queues(&dev->ctrl);
 		csts = readl(dev->bar + NVME_REG_CSTS);
 	}
@@ -1855,7 +1861,7 @@
 		nvme_disable_io_queues(dev);
 		nvme_disable_admin_queue(dev, shutdown);
 	}
-	nvme_dev_unmap(dev);
+	nvme_pci_disable(dev);
 
 	for (i = dev->queue_count - 1; i >= 0; i--)
 		nvme_clear_queue(dev->queues[i]);
@@ -1899,10 +1905,20 @@
 	kfree(dev);
 }
 
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
+{
+	dev_warn(dev->dev, "Removing after probe failure status: %d\n", status);
+
+	kref_get(&dev->ctrl.kref);
+	nvme_dev_disable(dev, false);
+	if (!schedule_work(&dev->remove_work))
+		nvme_put_ctrl(&dev->ctrl);
+}
+
 static void nvme_reset_work(struct work_struct *work)
 {
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
-	int result;
+	int result = -ENODEV;
 
 	if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
 		goto out;
@@ -1911,37 +1927,37 @@
 	 * If we're called to reset a live controller first shut it down before
 	 * moving on.
 	 */
-	if (dev->bar)
+	if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
 		nvme_dev_disable(dev, false);
 
 	set_bit(NVME_CTRL_RESETTING, &dev->flags);
 
-	result = nvme_dev_map(dev);
+	result = nvme_pci_enable(dev);
 	if (result)
 		goto out;
 
 	result = nvme_configure_admin_queue(dev);
 	if (result)
-		goto unmap;
+		goto out;
 
 	nvme_init_queue(dev->queues[0], 0);
 	result = nvme_alloc_admin_tags(dev);
 	if (result)
-		goto disable;
+		goto out;
 
 	result = nvme_init_identify(&dev->ctrl);
 	if (result)
-		goto free_tags;
+		goto out;
 
 	result = nvme_setup_io_queues(dev);
 	if (result)
-		goto free_tags;
+		goto out;
 
 	dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
 
 	result = nvme_dev_list_add(dev);
 	if (result)
-		goto remove;
+		goto out;
 
 	/*
 	 * Keep the controller around but remove all namespaces if we don't have
@@ -1958,19 +1974,8 @@
 	clear_bit(NVME_CTRL_RESETTING, &dev->flags);
 	return;
 
- remove:
-	nvme_dev_list_remove(dev);
- free_tags:
-	nvme_dev_remove_admin(dev);
-	blk_put_queue(dev->ctrl.admin_q);
-	dev->ctrl.admin_q = NULL;
-	dev->queues[0]->tags = NULL;
- disable:
-	nvme_disable_admin_queue(dev, false);
- unmap:
-	nvme_dev_unmap(dev);
  out:
-	nvme_remove_dead_ctrl(dev);
+	nvme_remove_dead_ctrl(dev, result);
 }
 
 static void nvme_remove_dead_ctrl_work(struct work_struct *work)
@@ -1978,19 +1983,12 @@
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
+	nvme_kill_queues(&dev->ctrl);
 	if (pci_get_drvdata(pdev))
 		pci_stop_and_remove_bus_device_locked(pdev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
-{
-	dev_warn(dev->dev, "Removing after probe failure\n");
-	kref_get(&dev->ctrl.kref);
-	if (!schedule_work(&dev->remove_work))
-		nvme_put_ctrl(&dev->ctrl);
-}
-
 static int nvme_reset(struct nvme_dev *dev)
 {
 	if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
@@ -2042,6 +2040,27 @@
 	.free_ctrl		= nvme_pci_free_ctrl,
 };
 
+static int nvme_dev_map(struct nvme_dev *dev)
+{
+	int bars;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	if (!bars)
+		return -ENODEV;
+	if (pci_request_selected_regions(pdev, bars, "nvme"))
+		return -ENODEV;
+
+	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
+	if (!dev->bar)
+		goto release;
+
+       return 0;
+  release:
+       pci_release_regions(pdev);
+       return -ENODEV;
+}
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int node, result = -ENOMEM;
@@ -2066,6 +2085,10 @@
 	dev->dev = get_device(&pdev->dev);
 	pci_set_drvdata(pdev, dev);
 
+	result = nvme_dev_map(dev);
+	if (result)
+		goto free;
+
 	INIT_LIST_HEAD(&dev->node);
 	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
@@ -2089,6 +2112,7 @@
 	nvme_release_prp_pools(dev);
  put_pci:
 	put_device(dev->dev);
+	nvme_dev_unmap(dev);
  free:
 	kfree(dev->queues);
 	kfree(dev->entry);
@@ -2112,10 +2136,16 @@
 	nvme_dev_disable(dev, true);
 }
 
+/*
+ * The driver's remove may be called on a device in a partially initialized
+ * state. This function must not have any dependencies on the device state in
+ * order to proceed.
+ */
 static void nvme_remove(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
 
+	set_bit(NVME_CTRL_REMOVING, &dev->flags);
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->scan_work);
 	nvme_remove_namespaces(&dev->ctrl);
@@ -2126,6 +2156,7 @@
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
+	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 39c4be4..365dc7e 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c

@@ -305,6 +305,7 @@
  * @dev: pointer to net_device claiming the phy
  * @phy_np: Pointer to device tree node for the PHY
  * @hndlr: Link state callback for the network device
+ * @flags: flags to pass to the PHY
  * @iface: PHY data interface type
  *
  * If successful, returns a pointer to the phy_device with the embedded

diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c
index a656d9e..21905fe 100644
--- a/drivers/parisc/eisa_enumerator.c
+++ b/drivers/parisc/eisa_enumerator.c

@@ -91,7 +91,7 @@
 	for (i=0;i<HPEE_MEMORY_MAX_ENT;i++) {
 		c = get_8(buf+len);
 		
-		if (NULL != (res = kmalloc(sizeof(struct resource), GFP_KERNEL))) {
+		if (NULL != (res = kzalloc(sizeof(struct resource), GFP_KERNEL))) {
 			int result;
 			
 			res->name = name;
@@ -183,7 +183,7 @@
 	for (i=0;i<HPEE_PORT_MAX_ENT;i++) {
 		c = get_8(buf+len);
 		
-		if (NULL != (res = kmalloc(sizeof(struct resource), GFP_KERNEL))) {
+		if (NULL != (res = kzalloc(sizeof(struct resource), GFP_KERNEL))) {
 			res->name = board;
 			res->start = get_16(buf+len+1);
 			res->end = get_16(buf+len+1)+(c&HPEE_PORT_SIZE_MASK)+1;

diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 75a6054..d1cdd9c 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig

@@ -14,6 +14,7 @@
 config PCI_MVEBU
 	bool "Marvell EBU PCIe controller"
 	depends on ARCH_MVEBU || ARCH_DOVE
+	depends on ARM
 	depends on OF
 
 config PCIE_DW

diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c
index ed34c95..6153853 100644
--- a/drivers/pci/host/pci-keystone-dw.c
+++ b/drivers/pci/host/pci-keystone-dw.c

@@ -58,11 +58,6 @@
 
 #define to_keystone_pcie(x)	container_of(x, struct keystone_pcie, pp)
 
-static inline struct pcie_port *sys_to_pcie(struct pci_sys_data *sys)
-{
-	return sys->private_data;
-}
-
 static inline void update_reg_offset_bit_pos(u32 offset, u32 *reg_offset,
 					     u32 *bit_pos)
 {
@@ -108,7 +103,7 @@
 	struct pcie_port *pp;
 
 	msi = irq_data_get_msi_desc(d);
-	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+	pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
 	ks_pcie = to_keystone_pcie(pp);
 	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 	update_reg_offset_bit_pos(offset, &reg_offset, &bit_pos);
@@ -146,7 +141,7 @@
 	u32 offset;
 
 	msi = irq_data_get_msi_desc(d);
-	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+	pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
 	ks_pcie = to_keystone_pcie(pp);
 	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 
@@ -167,7 +162,7 @@
 	u32 offset;
 
 	msi = irq_data_get_msi_desc(d);
-	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+	pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
 	ks_pcie = to_keystone_pcie(pp);
 	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 

diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c
index 3923bed..f39961b 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c

@@ -77,6 +77,16 @@
 	iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->dbi + PCI_CLASS_DEVICE);
 }
 
+/* Drop MSG TLP except for Vendor MSG */
+static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie)
+{
+	u32 val;
+
+	val = ioread32(pcie->dbi + PCIE_STRFMR1);
+	val &= 0xDFFFFFFF;
+	iowrite32(val, pcie->dbi + PCIE_STRFMR1);
+}
+
 static int ls1021_pcie_link_up(struct pcie_port *pp)
 {
 	u32 state;
@@ -97,7 +107,7 @@
 static void ls1021_pcie_host_init(struct pcie_port *pp)
 {
 	struct ls_pcie *pcie = to_ls_pcie(pp);
-	u32 val, index[2];
+	u32 index[2];
 
 	pcie->scfg = syscon_regmap_lookup_by_phandle(pp->dev->of_node,
 						     "fsl,pcie-scfg");
@@ -116,13 +126,7 @@
 
 	dw_pcie_setup_rc(pp);
 
-	/*
-	 * LS1021A Workaround for internal TKT228622
-	 * to fix the INTx hang issue
-	 */
-	val = ioread32(pcie->dbi + PCIE_STRFMR1);
-	val &= 0xffff;
-	iowrite32(val, pcie->dbi + PCIE_STRFMR1);
+	ls_pcie_drop_msg_tlp(pcie);
 }
 
 static int ls_pcie_link_up(struct pcie_port *pp)
@@ -147,6 +151,7 @@
 	iowrite32(1, pcie->dbi + PCIE_DBI_RO_WR_EN);
 	ls_pcie_fix_class(pcie);
 	ls_pcie_clear_multifunction(pcie);
+	ls_pcie_drop_msg_tlp(pcie);
 	iowrite32(0, pcie->dbi + PCIE_DBI_RO_WR_EN);
 }
 

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 602eb42..f89db3a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c

@@ -4772,8 +4772,10 @@
 void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent)
 {
 	static int use_dt_domains = -1;
-	int domain = of_get_pci_domain_nr(parent->of_node);
+	int domain = -1;
 
+	if (parent)
+		domain = of_get_pci_domain_nr(parent->of_node);
 	/*
 	 * Check DT domain and use_dt_domains values.
 	 *

diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index c777b97..5f70fee 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c

@@ -53,7 +53,7 @@
 };
 
 struct pcifront_sd {
-	int domain;
+	struct pci_sysdata sd;
 	struct pcifront_device *pdev;
 };
 
@@ -67,7 +67,9 @@
 				    unsigned int domain, unsigned int bus,
 				    struct pcifront_device *pdev)
 {
-	sd->domain = domain;
+	/* Because we do not expose that information via XenBus. */
+	sd->sd.node = first_online_node;
+	sd->sd.domain = domain;
 	sd->pdev = pdev;
 }
 
@@ -468,8 +470,8 @@
 	dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
 		 domain, bus);
 
-	bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
-	sd = kmalloc(sizeof(*sd), GFP_KERNEL);
+	bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL);
+	sd = kzalloc(sizeof(*sd), GFP_KERNEL);
 	if (!bus_entry || !sd) {
 		err = -ENOMEM;
 		goto err_out;

diff --git a/drivers/power/bq27xxx_battery_i2c.c b/drivers/power/bq27xxx_battery_i2c.c
index 9429e66..8eafc6f 100644
--- a/drivers/power/bq27xxx_battery_i2c.c
+++ b/drivers/power/bq27xxx_battery_i2c.c

@@ -21,6 +21,9 @@
 
 #include <linux/power/bq27xxx_battery.h>
 
+static DEFINE_IDR(battery_id);
+static DEFINE_MUTEX(battery_mutex);
+
 static irqreturn_t bq27xxx_battery_irq_handler_thread(int irq, void *data)
 {
 	struct bq27xxx_device_info *di = data;
@@ -70,19 +73,33 @@
 {
 	struct bq27xxx_device_info *di;
 	int ret;
+	char *name;
+	int num;
+
+	/* Get new ID for the new battery device */
+	mutex_lock(&battery_mutex);
+	num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL);
+	mutex_unlock(&battery_mutex);
+	if (num < 0)
+		return num;
+
+	name = devm_kasprintf(&client->dev, GFP_KERNEL, "%s-%d", id->name, num);
+	if (!name)
+		goto err_mem;
 
 	di = devm_kzalloc(&client->dev, sizeof(*di), GFP_KERNEL);
 	if (!di)
-		return -ENOMEM;
+		goto err_mem;
 
+	di->id = num;
 	di->dev = &client->dev;
 	di->chip = id->driver_data;
-	di->name = id->name;
+	di->name = name;
 	di->bus.read = bq27xxx_battery_i2c_read;
 
 	ret = bq27xxx_battery_setup(di);
 	if (ret)
-		return ret;
+		goto err_failed;
 
 	/* Schedule a polling after about 1 min */
 	schedule_delayed_work(&di->work, 60 * HZ);
@@ -103,6 +120,16 @@
 	}
 
 	return 0;
+
+err_mem:
+	ret = -ENOMEM;
+
+err_failed:
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, num);
+	mutex_unlock(&battery_mutex);
+
+	return ret;
 }
 
 static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
@@ -111,6 +138,10 @@
 
 	bq27xxx_battery_teardown(di);
 
+	mutex_lock(&battery_mutex);
+	idr_remove(&battery_id, di->id);
+	mutex_unlock(&battery_mutex);
+
 	return 0;
 }
 

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index da7bae9..579fd65 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c

@@ -22,6 +22,7 @@
 #include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/timekeeping.h>
 
 #include "ptp_private.h"
 
@@ -120,11 +121,13 @@
 	struct ptp_clock_caps caps;
 	struct ptp_clock_request req;
 	struct ptp_sys_offset *sysoff = NULL;
+	struct ptp_sys_offset_precise precise_offset;
 	struct ptp_pin_desc pd;
 	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
 	struct ptp_clock_info *ops = ptp->info;
 	struct ptp_clock_time *pct;
 	struct timespec64 ts;
+	struct system_device_crosststamp xtstamp;
 	int enable, err = 0;
 	unsigned int i, pin_index;
 
@@ -138,6 +141,7 @@
 		caps.n_per_out = ptp->info->n_per_out;
 		caps.pps = ptp->info->pps;
 		caps.n_pins = ptp->info->n_pins;
+		caps.cross_timestamping = ptp->info->getcrosststamp != NULL;
 		if (copy_to_user((void __user *)arg, &caps, sizeof(caps)))
 			err = -EFAULT;
 		break;
@@ -180,6 +184,29 @@
 		err = ops->enable(ops, &req, enable);
 		break;
 
+	case PTP_SYS_OFFSET_PRECISE:
+		if (!ptp->info->getcrosststamp) {
+			err = -EOPNOTSUPP;
+			break;
+		}
+		err = ptp->info->getcrosststamp(ptp->info, &xtstamp);
+		if (err)
+			break;
+
+		ts = ktime_to_timespec64(xtstamp.device);
+		precise_offset.device.sec = ts.tv_sec;
+		precise_offset.device.nsec = ts.tv_nsec;
+		ts = ktime_to_timespec64(xtstamp.sys_realtime);
+		precise_offset.sys_realtime.sec = ts.tv_sec;
+		precise_offset.sys_realtime.nsec = ts.tv_nsec;
+		ts = ktime_to_timespec64(xtstamp.sys_monoraw);
+		precise_offset.sys_monoraw.sec = ts.tv_sec;
+		precise_offset.sys_monoraw.nsec = ts.tv_nsec;
+		if (copy_to_user((void __user *)arg, &precise_offset,
+				 sizeof(precise_offset)))
+			err = -EFAULT;
+		break;
+
 	case PTP_SYS_OFFSET:
 		sysoff = kmalloc(sizeof(*sysoff), GFP_KERNEL);
 		if (!sysoff) {

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index d7b87c6..e220edc 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c

@@ -117,7 +117,7 @@
 	if (mport->ops->open_inb_mbox == NULL)
 		goto out;
 
-	res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
 	if (res) {
 		rio_init_mbox_res(res, mbox, mbox);
@@ -185,7 +185,7 @@
 	if (mport->ops->open_outb_mbox == NULL)
 		goto out;
 
-	res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
 	if (res) {
 		rio_init_mbox_res(res, mbox, mbox);
@@ -285,7 +285,7 @@
 {
 	int rc = 0;
 
-	struct resource *res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+	struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
 	if (res) {
 		rio_init_dbell_res(res, start, end);
@@ -360,7 +360,7 @@
 struct resource *rio_request_outb_dbell(struct rio_dev *rdev, u16 start,
 					u16 end)
 {
-	struct resource *res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+	struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
 	if (res) {
 		rio_init_dbell_res(res, start, end);

diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index cb61f30..277b5c8 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c

@@ -67,7 +67,7 @@
  * and function code cmd.
  * In case of an exception return 3. Otherwise return result of bitwise OR of
  * resulting condition code and DIAG return code. */
-static inline int dia250(void *iob, int cmd)
+static inline int __dia250(void *iob, int cmd)
 {
 	register unsigned long reg2 asm ("2") = (unsigned long) iob;
 	typedef union {
@@ -77,7 +77,6 @@
 	int rc;
 
 	rc = 3;
-	diag_stat_inc(DIAG_STAT_X250);
 	asm volatile(
 		"	diag	2,%2,0x250\n"
 		"0:	ipm	%0\n"
@@ -91,6 +90,12 @@
 	return rc;
 }
 
+static inline int dia250(void *iob, int cmd)
+{
+	diag_stat_inc(DIAG_STAT_X250);
+	return __dia250(iob, cmd);
+}
+
 /* Initialize block I/O to DIAG device using the specified blocksize and
  * block offset. On success, return zero and set end_block to contain the
  * number of blocks on the device minus the specified offset. Return non-zero

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 3b3e099..d6a691e 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c

@@ -4002,6 +4002,7 @@
 	struct ipr_sglist *sglist;
 	char fname[100];
 	char *src;
+	char *endline;
 	int result, dnld_size;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -4009,6 +4010,10 @@
 
 	snprintf(fname, sizeof(fname), "%s", buf);
 
+	endline = strchr(fname, '\n');
+	if (endline)
+		*endline = '\0';
+
 	if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
 		dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
 		return -EIO;

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fa6b2c4..8c6e318 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c

@@ -1344,6 +1344,7 @@
 
 	switch (ret) {
 	case BLKPREP_KILL:
+	case BLKPREP_INVALID:
 		req->errors = DID_NO_CONNECT << 16;
 		/* release the command and kill it */
 		if (req->special) {

diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c
index 91a00301..a9bac3b 100644
--- a/drivers/sh/pm_runtime.c
+++ b/drivers/sh/pm_runtime.c

@@ -34,7 +34,7 @@
 
 static int __init sh_pm_runtime_init(void)
 {
-	if (IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
+	if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
 		if (!of_find_compatible_node(NULL, NULL,
 					     "renesas,cpg-mstp-clocks"))
 			return 0;

diff --git a/drivers/sh/superhyway/superhyway.c b/drivers/sh/superhyway/superhyway.c
index 2d9e7f3..bb1fb771 100644
--- a/drivers/sh/superhyway/superhyway.c
+++ b/drivers/sh/superhyway/superhyway.c

@@ -66,7 +66,7 @@
 	superhyway_read_vcr(dev, base, &dev->vcr);
 
 	if (!dev->resource) {
-		dev->resource = kmalloc(sizeof(struct resource), GFP_KERNEL);
+		dev->resource = kzalloc(sizeof(struct resource), GFP_KERNEL);
 		if (!dev->resource) {
 			kfree(dev);
 			return -ENOMEM;

diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 6a4ff27..c688efa 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c

@@ -204,8 +204,8 @@
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
 
-	if (spi_imx->dma_is_inited &&
-	    transfer->len > spi_imx->wml * sizeof(u32))
+	if (spi_imx->dma_is_inited && transfer->len >= spi_imx->wml &&
+	    (transfer->len % spi_imx->wml) == 0)
 		return true;
 	return false;
 }
@@ -919,8 +919,6 @@
 	struct dma_async_tx_descriptor *desc_tx = NULL, *desc_rx = NULL;
 	int ret;
 	unsigned long timeout;
-	u32 dma;
-	int left;
 	struct spi_master *master = spi_imx->bitbang.master;
 	struct sg_table *tx = &transfer->tx_sg, *rx = &transfer->rx_sg;
 
@@ -954,13 +952,6 @@
 	/* Trigger the cspi module. */
 	spi_imx->dma_finished = 0;
 
-	dma = readl(spi_imx->base + MX51_ECSPI_DMA);
-	dma = dma & (~MX51_ECSPI_DMA_RXT_WML_MASK);
-	/* Change RX_DMA_LENGTH trigger dma fetch tail data */
-	left = transfer->len % spi_imx->wml;
-	if (left)
-		writel(dma | (left << MX51_ECSPI_DMA_RXT_WML_OFFSET),
-				spi_imx->base + MX51_ECSPI_DMA);
 	/*
 	 * Set these order to avoid potential RX overflow. The overflow may
 	 * happen if we enable SPI HW before starting RX DMA due to rescheduling
@@ -992,10 +983,6 @@
 			spi_imx->devtype_data->reset(spi_imx);
 			dmaengine_terminate_all(master->dma_rx);
 		}
-		dma &= ~MX51_ECSPI_DMA_RXT_WML_MASK;
-		writel(dma |
-		       spi_imx->wml << MX51_ECSPI_DMA_RXT_WML_OFFSET,
-		       spi_imx->base + MX51_ECSPI_DMA);
 	}
 
 	spi_imx->dma_finished = 1;

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 79a8bc4..7cb1b2d 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c

@@ -749,6 +749,7 @@
 	return 0;
 
 err_register_master:
+	pm_runtime_disable(&pdev->dev);
 	if (rs->dma_tx.ch)
 		dma_release_channel(rs->dma_tx.ch);
 	if (rs->dma_rx.ch)
@@ -778,6 +779,8 @@
 	if (rs->dma_rx.ch)
 		dma_release_channel(rs->dma_rx.ch);
 
+	spi_master_put(master);
+
 	return 0;
 }
 

diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 0c67586..d8e4219 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig

@@ -83,6 +83,7 @@
 config SSB_HOST_SOC
 	bool "Support for SSB bus on SoC"
 	depends on SSB && BCM47XX_NVRAM
+	select SSB_SPROM
 	help
 	  Host interface for a SSB directly mapped into memory. This is
 	  for some Broadcom SoCs from the BCM47xx and BCM53xx lines.

diff --git a/drivers/staging/media/davinci_vpfe/vpfe_video.c b/drivers/staging/media/davinci_vpfe/vpfe_video.c
index 3ec7e65..db49af9 100644
--- a/drivers/staging/media/davinci_vpfe/vpfe_video.c
+++ b/drivers/staging/media/davinci_vpfe/vpfe_video.c

@@ -147,7 +147,7 @@
 	mutex_lock(&mdev->graph_mutex);
 	ret = media_entity_graph_walk_init(&graph, entity->graph_obj.mdev);
 	if (ret) {
-		mutex_unlock(&video->lock);
+		mutex_unlock(&mdev->graph_mutex);
 		return -ENOMEM;
 	}
 	media_entity_graph_walk_start(&graph, entity);

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 82a663b..4f229e7 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c

@@ -177,7 +177,6 @@
 
 		if (!__target_check_io_state(se_cmd, se_sess, 0)) {
 			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-			target_put_sess_cmd(se_cmd);
 			goto out;
 		}
 		list_del_init(&se_cmd->se_cmd_list);

diff --git a/drivers/usb/chipidea/ci_hdrc_pci.c b/drivers/usb/chipidea/ci_hdrc_pci.c
index b59195e..b635ab6 100644
--- a/drivers/usb/chipidea/ci_hdrc_pci.c
+++ b/drivers/usb/chipidea/ci_hdrc_pci.c

@@ -85,8 +85,8 @@
 
 	/* register a nop PHY */
 	ci->phy = usb_phy_generic_register();
-	if (!ci->phy)
-		return -ENOMEM;
+	if (IS_ERR(ci->phy))
+		return PTR_ERR(ci->phy);
 
 	memset(res, 0, sizeof(res));
 	res[0].start	= pci_resource_start(pdev, 0);

diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index a4f7db2..df47110 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c

@@ -100,6 +100,9 @@
 	if (sscanf(buf, "%u", &mode) != 1)
 		return -EINVAL;
 
+	if (mode > 255)
+		return -EBADRQC;
+
 	pm_runtime_get_sync(ci->dev);
 	spin_lock_irqsave(&ci->lock, flags);
 	ret = hw_port_test_set(ci, mode);

diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c
index 45f86da..03b6743 100644
--- a/drivers/usb/chipidea/otg.c
+++ b/drivers/usb/chipidea/otg.c

@@ -158,7 +158,7 @@
 int ci_hdrc_otg_init(struct ci_hdrc *ci)
 {
 	INIT_WORK(&ci->work, ci_otg_work);
-	ci->wq = create_singlethread_workqueue("ci_otg");
+	ci->wq = create_freezable_workqueue("ci_otg");
 	if (!ci->wq) {
 		dev_err(ci->dev, "can't create workqueue\n");
 		return -ENODEV;

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 350dcd9..51b43691 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c

@@ -5401,6 +5401,7 @@
 	}
 
 	bos = udev->bos;
+	udev->bos = NULL;
 
 	for (i = 0; i < SET_CONFIG_TRIES; ++i) {
 
@@ -5493,11 +5494,8 @@
 	usb_set_usb2_hardware_lpm(udev, 1);
 	usb_unlocked_enable_lpm(udev);
 	usb_enable_ltm(udev);
-	/* release the new BOS descriptor allocated  by hub_port_init() */
-	if (udev->bos != bos) {
-		usb_release_bos_descriptor(udev);
-		udev->bos = bos;
-	}
+	usb_release_bos_descriptor(udev);
+	udev->bos = bos;
 	return 0;
 
 re_enumerate:

diff --git a/drivers/usb/dwc2/Kconfig b/drivers/usb/dwc2/Kconfig
index fd95ba6..f0decc0 100644
--- a/drivers/usb/dwc2/Kconfig
+++ b/drivers/usb/dwc2/Kconfig

@@ -1,5 +1,6 @@
 config USB_DWC2
 	tristate "DesignWare USB2 DRD Core Support"
+	depends on HAS_DMA
 	depends on USB || USB_GADGET
 	help
 	  Say Y here if your system has a Dual Role Hi-Speed USB

diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index e991d55..46c4ba7 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c

@@ -619,6 +619,12 @@
 			 __func__, hsotg->dr_mode);
 		break;
 	}
+
+	/*
+	 * NOTE: This is required for some rockchip soc based
+	 * platforms.
+	 */
+	msleep(50);
 }
 
 /*

diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c
index 36606fc..a41274a 100644
--- a/drivers/usb/dwc2/hcd_ddma.c
+++ b/drivers/usb/dwc2/hcd_ddma.c

@@ -1174,14 +1174,11 @@
 	failed = dwc2_update_non_isoc_urb_state_ddma(hsotg, chan, qtd, dma_desc,
 						     halt_status, n_bytes,
 						     xfer_done);
-	if (*xfer_done && urb->status != -EINPROGRESS)
-		failed = 1;
-
-	if (failed) {
+	if (failed || (*xfer_done && urb->status != -EINPROGRESS)) {
 		dwc2_host_complete(hsotg, qtd, urb->status);
 		dwc2_hcd_qtd_unlink_and_free(hsotg, qtd, qh);
-		dev_vdbg(hsotg->dev, "failed=%1x xfer_done=%1x status=%08x\n",
-			 failed, *xfer_done, urb->status);
+		dev_vdbg(hsotg->dev, "failed=%1x xfer_done=%1x\n",
+			 failed, *xfer_done);
 		return failed;
 	}
 
@@ -1236,21 +1233,23 @@
 
 	list_for_each_safe(qtd_item, qtd_tmp, &qh->qtd_list) {
 		int i;
+		int qtd_desc_count;
 
 		qtd = list_entry(qtd_item, struct dwc2_qtd, qtd_list_entry);
 		xfer_done = 0;
+		qtd_desc_count = qtd->n_desc;
 
-		for (i = 0; i < qtd->n_desc; i++) {
+		for (i = 0; i < qtd_desc_count; i++) {
 			if (dwc2_process_non_isoc_desc(hsotg, chan, chnum, qtd,
 						       desc_num, halt_status,
-						       &xfer_done)) {
-				qtd = NULL;
-				break;
-			}
+						       &xfer_done))
+				goto stop_scan;
+
 			desc_num++;
 		}
 	}
 
+stop_scan:
 	if (qh->ep_type != USB_ENDPOINT_XFER_CONTROL) {
 		/*
 		 * Resetting the data toggle for bulk and interrupt endpoints
@@ -1258,7 +1257,7 @@
 		 */
 		if (halt_status == DWC2_HC_XFER_STALL)
 			qh->data_toggle = DWC2_HC_PID_DATA0;
-		else if (qtd)
+		else
 			dwc2_hcd_save_data_toggle(hsotg, chan, chnum, qtd);
 	}
 

diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index f825380..cadba8b 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c

@@ -525,11 +525,19 @@
 	u32 pid = (hctsiz & TSIZ_SC_MC_PID_MASK) >> TSIZ_SC_MC_PID_SHIFT;
 
 	if (chan->ep_type != USB_ENDPOINT_XFER_CONTROL) {
+		if (WARN(!chan || !chan->qh,
+			 "chan->qh must be specified for non-control eps\n"))
+			return;
+
 		if (pid == TSIZ_SC_MC_PID_DATA0)
 			chan->qh->data_toggle = DWC2_HC_PID_DATA0;
 		else
 			chan->qh->data_toggle = DWC2_HC_PID_DATA1;
 	} else {
+		if (WARN(!qtd,
+			 "qtd must be specified for control eps\n"))
+			return;
+
 		if (pid == TSIZ_SC_MC_PID_DATA0)
 			qtd->data_toggle = DWC2_HC_PID_DATA0;
 		else

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 2913068..e4f8b90 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h

@@ -856,7 +856,6 @@
 	unsigned		pullups_connected:1;
 	unsigned		resize_fifos:1;
 	unsigned		setup_packet_pending:1;
-	unsigned		start_config_issued:1;
 	unsigned		three_stage_setup:1;
 	unsigned		usb3_lpm_capable:1;
 

diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 3a9354a..8d6b75c 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c

@@ -555,7 +555,6 @@
 	int ret;
 	u32 reg;
 
-	dwc->start_config_issued = false;
 	cfg = le16_to_cpu(ctrl->wValue);
 
 	switch (state) {
@@ -737,10 +736,6 @@
 		dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_ISOCH_DELAY");
 		ret = dwc3_ep0_set_isoch_delay(dwc, ctrl);
 		break;
-	case USB_REQ_SET_INTERFACE:
-		dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_INTERFACE");
-		dwc->start_config_issued = false;
-		/* Fall through */
 	default:
 		dwc3_trace(trace_dwc3_ep0, "Forwarding to gadget driver");
 		ret = dwc3_ep0_delegate_req(dwc, ctrl);

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 7d1dd82..2363bad 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c

@@ -385,24 +385,66 @@
 	dep->trb_pool_dma = 0;
 }
 
+static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep);
+
+/**
+ * dwc3_gadget_start_config - Configure EP resources
+ * @dwc: pointer to our controller context structure
+ * @dep: endpoint that is being enabled
+ *
+ * The assignment of transfer resources cannot perfectly follow the
+ * data book due to the fact that the controller driver does not have
+ * all knowledge of the configuration in advance. It is given this
+ * information piecemeal by the composite gadget framework after every
+ * SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook
+ * programming model in this scenario can cause errors. For two
+ * reasons:
+ *
+ * 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION
+ * and SET_INTERFACE (8.1.5). This is incorrect in the scenario of
+ * multiple interfaces.
+ *
+ * 2) The databook does not mention doing more DEPXFERCFG for new
+ * endpoint on alt setting (8.1.6).
+ *
+ * The following simplified method is used instead:
+ *
+ * All hardware endpoints can be assigned a transfer resource and this
+ * setting will stay persistent until either a core reset or
+ * hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and
+ * do DEPXFERCFG for every hardware endpoint as well. We are
+ * guaranteed that there are as many transfer resources as endpoints.
+ *
+ * This function is called for each endpoint when it is being enabled
+ * but is triggered only when called for EP0-out, which always happens
+ * first, and which should only happen in one of the above conditions.
+ */
 static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep)
 {
 	struct dwc3_gadget_ep_cmd_params params;
 	u32			cmd;
+	int			i;
+	int			ret;
+
+	if (dep->number)
+		return 0;
 
 	memset(&params, 0x00, sizeof(params));
+	cmd = DWC3_DEPCMD_DEPSTARTCFG;
 
-	if (dep->number != 1) {
-		cmd = DWC3_DEPCMD_DEPSTARTCFG;
-		/* XferRscIdx == 0 for ep0 and 2 for the remaining */
-		if (dep->number > 1) {
-			if (dwc->start_config_issued)
-				return 0;
-			dwc->start_config_issued = true;
-			cmd |= DWC3_DEPCMD_PARAM(2);
-		}
+	ret = dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+	if (ret)
+		return ret;
 
-		return dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+	for (i = 0; i < DWC3_ENDPOINTS_NUM; i++) {
+		struct dwc3_ep *dep = dwc->eps[i];
+
+		if (!dep)
+			continue;
+
+		ret = dwc3_gadget_set_xfer_resource(dwc, dep);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -516,10 +558,6 @@
 		struct dwc3_trb	*trb_st_hw;
 		struct dwc3_trb	*trb_link;
 
-		ret = dwc3_gadget_set_xfer_resource(dwc, dep);
-		if (ret)
-			return ret;
-
 		dep->endpoint.desc = desc;
 		dep->comp_desc = comp_desc;
 		dep->type = usb_endpoint_type(desc);
@@ -1636,8 +1674,6 @@
 	}
 	dwc3_writel(dwc->regs, DWC3_DCFG, reg);
 
-	dwc->start_config_issued = false;
-
 	/* Start with SuperSpeed Default */
 	dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512);
 
@@ -2237,7 +2273,6 @@
 	dwc3_writel(dwc->regs, DWC3_DCTL, reg);
 
 	dwc3_disconnect_gadget(dwc);
-	dwc->start_config_issued = false;
 
 	dwc->gadget.speed = USB_SPEED_UNKNOWN;
 	dwc->setup_packet_pending = false;
@@ -2288,7 +2323,6 @@
 
 	dwc3_stop_active_transfers(dwc);
 	dwc3_clear_stall_all_ep(dwc);
-	dwc->start_config_issued = false;
 
 	/* Reset device address to zero */
 	reg = dwc3_readl(dwc->regs, DWC3_DCFG);

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 7e179f8..87fb0fd 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c

@@ -130,7 +130,8 @@
 					setup_can_stall : 1,
 					setup_out_ready : 1,
 					setup_out_error : 1,
-					setup_abort : 1;
+					setup_abort : 1,
+					gadget_registered : 1;
 	unsigned			setup_wLength;
 
 	/* the rest is basically write-once */
@@ -1179,7 +1180,8 @@
 
 	/* closing ep0 === shutdown all */
 
-	usb_gadget_unregister_driver (&gadgetfs_driver);
+	if (dev->gadget_registered)
+		usb_gadget_unregister_driver (&gadgetfs_driver);
 
 	/* at this point "good" hardware has disconnected the
 	 * device from USB; the host won't see it any more.
@@ -1847,6 +1849,7 @@
 		 * kick in after the ep0 descriptor is closed.
 		 */
 		value = len;
+		dev->gadget_registered = true;
 	}
 	return value;
 

diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c
index 53c0692..93d28cb 100644
--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
+++ b/drivers/usb/gadget/udc/fsl_qe_udc.c

@@ -2340,7 +2340,7 @@
 {
 	struct qe_udc *udc;
 	struct device_node *np = ofdev->dev.of_node;
-	unsigned int tmp_addr = 0;
+	unsigned long tmp_addr = 0;
 	struct usb_device_para __iomem *usbpram;
 	unsigned int i;
 	u64 size;

diff --git a/drivers/usb/gadget/udc/net2280.h b/drivers/usb/gadget/udc/net2280.h
index 4dff60d..0d32052 100644
--- a/drivers/usb/gadget/udc/net2280.h
+++ b/drivers/usb/gadget/udc/net2280.h

@@ -369,9 +369,20 @@
 	static const u32 ep_enhanced[9] = { 0x10, 0x60, 0x30, 0x80,
 					  0x50, 0x20, 0x70, 0x40, 0x90 };
 
-	if (ep->dev->enhanced_mode)
+	if (ep->dev->enhanced_mode) {
 		reg = ep_enhanced[ep->num];
-	else{
+		switch (ep->dev->gadget.speed) {
+		case USB_SPEED_SUPER:
+			reg += 2;
+			break;
+		case USB_SPEED_FULL:
+			reg += 1;
+			break;
+		case USB_SPEED_HIGH:
+		default:
+			break;
+		}
+	} else {
 		reg = (ep->num + 1) * 0x10;
 		if (ep->dev->gadget.speed != USB_SPEED_HIGH)
 			reg += 1;

diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index fd73a3e..b86a6f0 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c

@@ -413,9 +413,10 @@
 		if (!driver->udc_name || strcmp(driver->udc_name,
 						dev_name(&udc->dev)) == 0) {
 			ret = udc_bind_to_driver(udc, driver);
+			if (ret != -EPROBE_DEFER)
+				list_del(&driver->pending);
 			if (ret)
 				goto err4;
-			list_del(&driver->pending);
 			break;
 		}
 	}

diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 795a45b..58487a4 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c

@@ -662,7 +662,7 @@
 		csr &= ~(MUSB_TXCSR_AUTOSET | MUSB_TXCSR_DMAMODE);
 		csr |= MUSB_TXCSR_DMAENAB; /* against programmer's guide */
 	}
-	channel->desired_mode = mode;
+	channel->desired_mode = *mode;
 	musb_writew(epio, MUSB_TXCSR, csr);
 
 	return 0;
@@ -2003,10 +2003,8 @@
 				qh->offset,
 				urb->transfer_buffer_length);
 
-			done = musb_rx_dma_in_inventra_cppi41(c, hw_ep, qh,
-							      urb, xfer_len,
-							      iso_err);
-			if (done)
+			if (musb_rx_dma_in_inventra_cppi41(c, hw_ep, qh, urb,
+							   xfer_len, iso_err))
 				goto finish;
 			else
 				dev_err(musb->controller, "error: rx_dma failed\n");

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 970a30e..72b387d 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c

@@ -757,14 +757,8 @@
 	otg->host = host;
 	dev_dbg(otg->usb_phy->dev, "host driver registered w/ tranceiver\n");
 
-	/*
-	 * Kick the state machine work, if peripheral is not supported
-	 * or peripheral is already registered with us.
-	 */
-	if (motg->pdata->mode == USB_DR_MODE_HOST || otg->gadget) {
-		pm_runtime_get_sync(otg->usb_phy->dev);
-		schedule_work(&motg->sm_work);
-	}
+	pm_runtime_get_sync(otg->usb_phy->dev);
+	schedule_work(&motg->sm_work);
 
 	return 0;
 }
@@ -827,14 +821,8 @@
 	dev_dbg(otg->usb_phy->dev,
 		"peripheral driver registered w/ tranceiver\n");
 
-	/*
-	 * Kick the state machine work, if host is not supported
-	 * or host is already registered with us.
-	 */
-	if (motg->pdata->mode == USB_DR_MODE_PERIPHERAL || otg->host) {
-		pm_runtime_get_sync(otg->usb_phy->dev);
-		schedule_work(&motg->sm_work);
-	}
+	pm_runtime_get_sync(otg->usb_phy->dev);
+	schedule_work(&motg->sm_work);
 
 	return 0;
 }

diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
index f612dda..56ecb8b 100644
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig

@@ -475,22 +475,6 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called mos7840.  If unsure, choose N.
 
-config USB_SERIAL_MXUPORT11
-	tristate "USB Moxa UPORT 11x0 Serial Driver"
-	---help---
-	  Say Y here if you want to use a MOXA UPort 11x0 Serial hub.
-
-	  This driver supports:
-
-	  - UPort 1110  : 1 port RS-232 USB to Serial Hub.
-	  - UPort 1130  : 1 port RS-422/485 USB to Serial Hub.
-	  - UPort 1130I : 1 port RS-422/485 USB to Serial Hub with Isolation.
-	  - UPort 1150  : 1 port RS-232/422/485 USB to Serial Hub.
-	  - UPort 1150I : 1 port RS-232/422/485 USB to Serial Hub with Isolation.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called mxu11x0.
-
 config USB_SERIAL_MXUPORT
 	tristate "USB Moxa UPORT Serial Driver"
 	---help---

diff --git a/drivers/usb/serial/Makefile b/drivers/usb/serial/Makefile
index f3fa5e5..349d9df 100644
--- a/drivers/usb/serial/Makefile
+++ b/drivers/usb/serial/Makefile

@@ -38,7 +38,6 @@
 obj-$(CONFIG_USB_SERIAL_MOS7720)		+= mos7720.o
 obj-$(CONFIG_USB_SERIAL_MOS7840)		+= mos7840.o
 obj-$(CONFIG_USB_SERIAL_MXUPORT)		+= mxuport.o
-obj-$(CONFIG_USB_SERIAL_MXUPORT11)		+= mxu11x0.o
 obj-$(CONFIG_USB_SERIAL_NAVMAN)			+= navman.o
 obj-$(CONFIG_USB_SERIAL_OMNINET)		+= omninet.o
 obj-$(CONFIG_USB_SERIAL_OPTICON)		+= opticon.o

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 987813b..73a366d 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c

@@ -163,6 +163,9 @@
 	{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
 	{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+	{ USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
+	{ USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
+	{ USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
 	{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
 	{ USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */
 	{ USB_DEVICE(0x1BA4, 0x0002) },	/* Silicon Labs 358x factory default */

diff --git a/drivers/usb/serial/mxu11x0.c b/drivers/usb/serial/mxu11x0.c
deleted file mode 100644
index 6196073..0000000
--- a/drivers/usb/serial/mxu11x0.c
+++ /dev/null

@@ -1,1006 +0,0 @@
-/*
- * USB Moxa UPORT 11x0 Serial Driver
- *
- * Copyright (C) 2007 MOXA Technologies Co., Ltd.
- * Copyright (C) 2015 Mathieu Othacehe <m.othacehe@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- *
- * Supports the following Moxa USB to serial converters:
- *  UPort 1110,  1 port RS-232 USB to Serial Hub.
- *  UPort 1130,  1 port RS-422/485 USB to Serial Hub.
- *  UPort 1130I, 1 port RS-422/485 USB to Serial Hub with isolation
- *    protection.
- *  UPort 1150,  1 port RS-232/422/485 USB to Serial Hub.
- *  UPort 1150I, 1 port RS-232/422/485 USB to Serial Hub with isolation
- *  protection.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/firmware.h>
-#include <linux/jiffies.h>
-#include <linux/serial.h>
-#include <linux/serial_reg.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/tty.h>
-#include <linux/tty_driver.h>
-#include <linux/tty_flip.h>
-#include <linux/uaccess.h>
-#include <linux/usb.h>
-#include <linux/usb/serial.h>
-
-/* Vendor and product ids */
-#define MXU1_VENDOR_ID				0x110a
-#define MXU1_1110_PRODUCT_ID			0x1110
-#define MXU1_1130_PRODUCT_ID			0x1130
-#define MXU1_1150_PRODUCT_ID			0x1150
-#define MXU1_1151_PRODUCT_ID			0x1151
-#define MXU1_1131_PRODUCT_ID			0x1131
-
-/* Commands */
-#define MXU1_GET_VERSION			0x01
-#define MXU1_GET_PORT_STATUS			0x02
-#define MXU1_GET_PORT_DEV_INFO			0x03
-#define MXU1_GET_CONFIG				0x04
-#define MXU1_SET_CONFIG				0x05
-#define MXU1_OPEN_PORT				0x06
-#define MXU1_CLOSE_PORT				0x07
-#define MXU1_START_PORT				0x08
-#define MXU1_STOP_PORT				0x09
-#define MXU1_TEST_PORT				0x0A
-#define MXU1_PURGE_PORT				0x0B
-#define MXU1_RESET_EXT_DEVICE			0x0C
-#define MXU1_GET_OUTQUEUE			0x0D
-#define MXU1_WRITE_DATA				0x80
-#define MXU1_READ_DATA				0x81
-#define MXU1_REQ_TYPE_CLASS			0x82
-
-/* Module identifiers */
-#define MXU1_I2C_PORT				0x01
-#define MXU1_IEEE1284_PORT			0x02
-#define MXU1_UART1_PORT				0x03
-#define MXU1_UART2_PORT				0x04
-#define MXU1_RAM_PORT				0x05
-
-/* Modem status */
-#define MXU1_MSR_DELTA_CTS			0x01
-#define MXU1_MSR_DELTA_DSR			0x02
-#define MXU1_MSR_DELTA_RI			0x04
-#define MXU1_MSR_DELTA_CD			0x08
-#define MXU1_MSR_CTS				0x10
-#define MXU1_MSR_DSR				0x20
-#define MXU1_MSR_RI				0x40
-#define MXU1_MSR_CD				0x80
-#define MXU1_MSR_DELTA_MASK			0x0F
-#define MXU1_MSR_MASK				0xF0
-
-/* Line status */
-#define MXU1_LSR_OVERRUN_ERROR			0x01
-#define MXU1_LSR_PARITY_ERROR			0x02
-#define MXU1_LSR_FRAMING_ERROR			0x04
-#define MXU1_LSR_BREAK				0x08
-#define MXU1_LSR_ERROR				0x0F
-#define MXU1_LSR_RX_FULL			0x10
-#define MXU1_LSR_TX_EMPTY			0x20
-
-/* Modem control */
-#define MXU1_MCR_LOOP				0x04
-#define MXU1_MCR_DTR				0x10
-#define MXU1_MCR_RTS				0x20
-
-/* Mask settings */
-#define MXU1_UART_ENABLE_RTS_IN			0x0001
-#define MXU1_UART_DISABLE_RTS			0x0002
-#define MXU1_UART_ENABLE_PARITY_CHECKING	0x0008
-#define MXU1_UART_ENABLE_DSR_OUT		0x0010
-#define MXU1_UART_ENABLE_CTS_OUT		0x0020
-#define MXU1_UART_ENABLE_X_OUT			0x0040
-#define MXU1_UART_ENABLE_XA_OUT			0x0080
-#define MXU1_UART_ENABLE_X_IN			0x0100
-#define MXU1_UART_ENABLE_DTR_IN			0x0800
-#define MXU1_UART_DISABLE_DTR			0x1000
-#define MXU1_UART_ENABLE_MS_INTS		0x2000
-#define MXU1_UART_ENABLE_AUTO_START_DMA		0x4000
-#define MXU1_UART_SEND_BREAK_SIGNAL		0x8000
-
-/* Parity */
-#define MXU1_UART_NO_PARITY			0x00
-#define MXU1_UART_ODD_PARITY			0x01
-#define MXU1_UART_EVEN_PARITY			0x02
-#define MXU1_UART_MARK_PARITY			0x03
-#define MXU1_UART_SPACE_PARITY			0x04
-
-/* Stop bits */
-#define MXU1_UART_1_STOP_BITS			0x00
-#define MXU1_UART_1_5_STOP_BITS			0x01
-#define MXU1_UART_2_STOP_BITS			0x02
-
-/* Bits per character */
-#define MXU1_UART_5_DATA_BITS			0x00
-#define MXU1_UART_6_DATA_BITS			0x01
-#define MXU1_UART_7_DATA_BITS			0x02
-#define MXU1_UART_8_DATA_BITS			0x03
-
-/* Operation modes */
-#define MXU1_UART_232				0x00
-#define MXU1_UART_485_RECEIVER_DISABLED		0x01
-#define MXU1_UART_485_RECEIVER_ENABLED		0x02
-
-/* Pipe transfer mode and timeout */
-#define MXU1_PIPE_MODE_CONTINUOUS		0x01
-#define MXU1_PIPE_MODE_MASK			0x03
-#define MXU1_PIPE_TIMEOUT_MASK			0x7C
-#define MXU1_PIPE_TIMEOUT_ENABLE		0x80
-
-/* Config struct */
-struct mxu1_uart_config {
-	__be16	wBaudRate;
-	__be16	wFlags;
-	u8	bDataBits;
-	u8	bParity;
-	u8	bStopBits;
-	char	cXon;
-	char	cXoff;
-	u8	bUartMode;
-} __packed;
-
-/* Purge modes */
-#define MXU1_PURGE_OUTPUT			0x00
-#define MXU1_PURGE_INPUT			0x80
-
-/* Read/Write data */
-#define MXU1_RW_DATA_ADDR_SFR			0x10
-#define MXU1_RW_DATA_ADDR_IDATA			0x20
-#define MXU1_RW_DATA_ADDR_XDATA			0x30
-#define MXU1_RW_DATA_ADDR_CODE			0x40
-#define MXU1_RW_DATA_ADDR_GPIO			0x50
-#define MXU1_RW_DATA_ADDR_I2C			0x60
-#define MXU1_RW_DATA_ADDR_FLASH			0x70
-#define MXU1_RW_DATA_ADDR_DSP			0x80
-
-#define MXU1_RW_DATA_UNSPECIFIED		0x00
-#define MXU1_RW_DATA_BYTE			0x01
-#define MXU1_RW_DATA_WORD			0x02
-#define MXU1_RW_DATA_DOUBLE_WORD		0x04
-
-struct mxu1_write_data_bytes {
-	u8	bAddrType;
-	u8	bDataType;
-	u8	bDataCounter;
-	__be16	wBaseAddrHi;
-	__be16	wBaseAddrLo;
-	u8	bData[0];
-} __packed;
-
-/* Interrupt codes */
-#define MXU1_CODE_HARDWARE_ERROR		0xFF
-#define MXU1_CODE_DATA_ERROR			0x03
-#define MXU1_CODE_MODEM_STATUS			0x04
-
-static inline int mxu1_get_func_from_code(unsigned char code)
-{
-	return code & 0x0f;
-}
-
-/* Download firmware max packet size */
-#define MXU1_DOWNLOAD_MAX_PACKET_SIZE		64
-
-/* Firmware image header */
-struct mxu1_firmware_header {
-	__le16 wLength;
-	u8 bCheckSum;
-} __packed;
-
-#define MXU1_UART_BASE_ADDR	    0xFFA0
-#define MXU1_UART_OFFSET_MCR	    0x0004
-
-#define MXU1_BAUD_BASE              923077
-
-#define MXU1_TRANSFER_TIMEOUT	    2
-#define MXU1_DOWNLOAD_TIMEOUT       1000
-#define MXU1_DEFAULT_CLOSING_WAIT   4000 /* in .01 secs */
-
-struct mxu1_port {
-	u8 msr;
-	u8 mcr;
-	u8 uart_mode;
-	spinlock_t spinlock; /* Protects msr */
-	struct mutex mutex; /* Protects mcr */
-	bool send_break;
-};
-
-struct mxu1_device {
-	u16 mxd_model;
-};
-
-static const struct usb_device_id mxu1_idtable[] = {
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1110_PRODUCT_ID) },
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1130_PRODUCT_ID) },
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) },
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) },
-	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) },
-	{ }
-};
-
-MODULE_DEVICE_TABLE(usb, mxu1_idtable);
-
-/* Write the given buffer out to the control pipe.  */
-static int mxu1_send_ctrl_data_urb(struct usb_serial *serial,
-				   u8 request,
-				   u16 value, u16 index,
-				   void *data, size_t size)
-{
-	int status;
-
-	status = usb_control_msg(serial->dev,
-				 usb_sndctrlpipe(serial->dev, 0),
-				 request,
-				 (USB_DIR_OUT | USB_TYPE_VENDOR |
-				  USB_RECIP_DEVICE), value, index,
-				 data, size,
-				 USB_CTRL_SET_TIMEOUT);
-	if (status < 0) {
-		dev_err(&serial->interface->dev,
-			"%s - usb_control_msg failed: %d\n",
-			__func__, status);
-		return status;
-	}
-
-	if (status != size) {
-		dev_err(&serial->interface->dev,
-			"%s - short write (%d / %zd)\n",
-			__func__, status, size);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-/* Send a vendor request without any data */
-static int mxu1_send_ctrl_urb(struct usb_serial *serial,
-			      u8 request, u16 value, u16 index)
-{
-	return mxu1_send_ctrl_data_urb(serial, request, value, index,
-				       NULL, 0);
-}
-
-static int mxu1_download_firmware(struct usb_serial *serial,
-				  const struct firmware *fw_p)
-{
-	int status = 0;
-	int buffer_size;
-	int pos;
-	int len;
-	int done;
-	u8 cs = 0;
-	u8 *buffer;
-	struct usb_device *dev = serial->dev;
-	struct mxu1_firmware_header *header;
-	unsigned int pipe;
-
-	pipe = usb_sndbulkpipe(dev, serial->port[0]->bulk_out_endpointAddress);
-
-	buffer_size = fw_p->size + sizeof(*header);
-	buffer = kmalloc(buffer_size, GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	memcpy(buffer, fw_p->data, fw_p->size);
-	memset(buffer + fw_p->size, 0xff, buffer_size - fw_p->size);
-
-	for (pos = sizeof(*header); pos < buffer_size; pos++)
-		cs = (u8)(cs + buffer[pos]);
-
-	header = (struct mxu1_firmware_header *)buffer;
-	header->wLength = cpu_to_le16(buffer_size - sizeof(*header));
-	header->bCheckSum = cs;
-
-	dev_dbg(&dev->dev, "%s - downloading firmware\n", __func__);
-
-	for (pos = 0; pos < buffer_size; pos += done) {
-		len = min(buffer_size - pos, MXU1_DOWNLOAD_MAX_PACKET_SIZE);
-
-		status = usb_bulk_msg(dev, pipe, buffer + pos, len, &done,
-				MXU1_DOWNLOAD_TIMEOUT);
-		if (status)
-			break;
-	}
-
-	kfree(buffer);
-
-	if (status) {
-		dev_err(&dev->dev, "failed to download firmware: %d\n", status);
-		return status;
-	}
-
-	msleep_interruptible(100);
-	usb_reset_device(dev);
-
-	dev_dbg(&dev->dev, "%s - download successful\n", __func__);
-
-	return 0;
-}
-
-static int mxu1_port_probe(struct usb_serial_port *port)
-{
-	struct mxu1_port *mxport;
-	struct mxu1_device *mxdev;
-
-	if (!port->interrupt_in_urb) {
-		dev_err(&port->dev, "no interrupt urb\n");
-		return -ENODEV;
-	}
-
-	mxport = kzalloc(sizeof(struct mxu1_port), GFP_KERNEL);
-	if (!mxport)
-		return -ENOMEM;
-
-	spin_lock_init(&mxport->spinlock);
-	mutex_init(&mxport->mutex);
-
-	mxdev = usb_get_serial_data(port->serial);
-
-	switch (mxdev->mxd_model) {
-	case MXU1_1110_PRODUCT_ID:
-	case MXU1_1150_PRODUCT_ID:
-	case MXU1_1151_PRODUCT_ID:
-		mxport->uart_mode = MXU1_UART_232;
-		break;
-	case MXU1_1130_PRODUCT_ID:
-	case MXU1_1131_PRODUCT_ID:
-		mxport->uart_mode = MXU1_UART_485_RECEIVER_DISABLED;
-		break;
-	}
-
-	usb_set_serial_port_data(port, mxport);
-
-	port->port.closing_wait =
-			msecs_to_jiffies(MXU1_DEFAULT_CLOSING_WAIT * 10);
-	port->port.drain_delay = 1;
-
-	return 0;
-}
-
-static int mxu1_port_remove(struct usb_serial_port *port)
-{
-	struct mxu1_port *mxport;
-
-	mxport = usb_get_serial_port_data(port);
-	kfree(mxport);
-
-	return 0;
-}
-
-static int mxu1_startup(struct usb_serial *serial)
-{
-	struct mxu1_device *mxdev;
-	struct usb_device *dev = serial->dev;
-	struct usb_host_interface *cur_altsetting;
-	char fw_name[32];
-	const struct firmware *fw_p = NULL;
-	int err;
-
-	dev_dbg(&serial->interface->dev, "%s - product 0x%04X, num configurations %d, configuration value %d\n",
-		__func__, le16_to_cpu(dev->descriptor.idProduct),
-		dev->descriptor.bNumConfigurations,
-		dev->actconfig->desc.bConfigurationValue);
-
-	/* create device structure */
-	mxdev = kzalloc(sizeof(struct mxu1_device), GFP_KERNEL);
-	if (!mxdev)
-		return -ENOMEM;
-
-	usb_set_serial_data(serial, mxdev);
-
-	mxdev->mxd_model = le16_to_cpu(dev->descriptor.idProduct);
-
-	cur_altsetting = serial->interface->cur_altsetting;
-
-	/* if we have only 1 configuration, download firmware */
-	if (cur_altsetting->desc.bNumEndpoints == 1) {
-
-		snprintf(fw_name,
-			 sizeof(fw_name),
-			 "moxa/moxa-%04x.fw",
-			 mxdev->mxd_model);
-
-		err = request_firmware(&fw_p, fw_name, &serial->interface->dev);
-		if (err) {
-			dev_err(&serial->interface->dev, "failed to request firmware: %d\n",
-				err);
-			goto err_free_mxdev;
-		}
-
-		err = mxu1_download_firmware(serial, fw_p);
-		if (err)
-			goto err_release_firmware;
-
-		/* device is being reset */
-		err = -ENODEV;
-		goto err_release_firmware;
-	}
-
-	return 0;
-
-err_release_firmware:
-	release_firmware(fw_p);
-err_free_mxdev:
-	kfree(mxdev);
-
-	return err;
-}
-
-static void mxu1_release(struct usb_serial *serial)
-{
-	struct mxu1_device *mxdev;
-
-	mxdev = usb_get_serial_data(serial);
-	kfree(mxdev);
-}
-
-static int mxu1_write_byte(struct usb_serial_port *port, u32 addr,
-			   u8 mask, u8 byte)
-{
-	int status;
-	size_t size;
-	struct mxu1_write_data_bytes *data;
-
-	dev_dbg(&port->dev, "%s - addr 0x%08X, mask 0x%02X, byte 0x%02X\n",
-		__func__, addr, mask, byte);
-
-	size = sizeof(struct mxu1_write_data_bytes) + 2;
-	data = kzalloc(size, GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->bAddrType = MXU1_RW_DATA_ADDR_XDATA;
-	data->bDataType = MXU1_RW_DATA_BYTE;
-	data->bDataCounter = 1;
-	data->wBaseAddrHi = cpu_to_be16(addr >> 16);
-	data->wBaseAddrLo = cpu_to_be16(addr);
-	data->bData[0] = mask;
-	data->bData[1] = byte;
-
-	status = mxu1_send_ctrl_data_urb(port->serial, MXU1_WRITE_DATA, 0,
-					 MXU1_RAM_PORT, data, size);
-	if (status < 0)
-		dev_err(&port->dev, "%s - failed: %d\n", __func__, status);
-
-	kfree(data);
-
-	return status;
-}
-
-static int mxu1_set_mcr(struct usb_serial_port *port, unsigned int mcr)
-{
-	int status;
-
-	status = mxu1_write_byte(port,
-				 MXU1_UART_BASE_ADDR + MXU1_UART_OFFSET_MCR,
-				 MXU1_MCR_RTS | MXU1_MCR_DTR | MXU1_MCR_LOOP,
-				 mcr);
-	return status;
-}
-
-static void mxu1_set_termios(struct tty_struct *tty,
-			     struct usb_serial_port *port,
-			     struct ktermios *old_termios)
-{
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	struct mxu1_uart_config *config;
-	tcflag_t cflag, iflag;
-	speed_t baud;
-	int status;
-	unsigned int mcr;
-
-	cflag = tty->termios.c_cflag;
-	iflag = tty->termios.c_iflag;
-
-	if (old_termios &&
-	    !tty_termios_hw_change(&tty->termios, old_termios) &&
-	    tty->termios.c_iflag == old_termios->c_iflag) {
-		dev_dbg(&port->dev, "%s - nothing to change\n", __func__);
-		return;
-	}
-
-	dev_dbg(&port->dev,
-		"%s - cflag 0x%08x, iflag 0x%08x\n", __func__, cflag, iflag);
-
-	if (old_termios) {
-		dev_dbg(&port->dev, "%s - old cflag 0x%08x, old iflag 0x%08x\n",
-			__func__,
-			old_termios->c_cflag,
-			old_termios->c_iflag);
-	}
-
-	config = kzalloc(sizeof(*config), GFP_KERNEL);
-	if (!config)
-		return;
-
-	/* these flags must be set */
-	config->wFlags |= MXU1_UART_ENABLE_MS_INTS;
-	config->wFlags |= MXU1_UART_ENABLE_AUTO_START_DMA;
-	if (mxport->send_break)
-		config->wFlags |= MXU1_UART_SEND_BREAK_SIGNAL;
-	config->bUartMode = mxport->uart_mode;
-
-	switch (C_CSIZE(tty)) {
-	case CS5:
-		config->bDataBits = MXU1_UART_5_DATA_BITS;
-		break;
-	case CS6:
-		config->bDataBits = MXU1_UART_6_DATA_BITS;
-		break;
-	case CS7:
-		config->bDataBits = MXU1_UART_7_DATA_BITS;
-		break;
-	default:
-	case CS8:
-		config->bDataBits = MXU1_UART_8_DATA_BITS;
-		break;
-	}
-
-	if (C_PARENB(tty)) {
-		config->wFlags |= MXU1_UART_ENABLE_PARITY_CHECKING;
-		if (C_CMSPAR(tty)) {
-			if (C_PARODD(tty))
-				config->bParity = MXU1_UART_MARK_PARITY;
-			else
-				config->bParity = MXU1_UART_SPACE_PARITY;
-		} else {
-			if (C_PARODD(tty))
-				config->bParity = MXU1_UART_ODD_PARITY;
-			else
-				config->bParity = MXU1_UART_EVEN_PARITY;
-		}
-	} else {
-		config->bParity = MXU1_UART_NO_PARITY;
-	}
-
-	if (C_CSTOPB(tty))
-		config->bStopBits = MXU1_UART_2_STOP_BITS;
-	else
-		config->bStopBits = MXU1_UART_1_STOP_BITS;
-
-	if (C_CRTSCTS(tty)) {
-		/* RTS flow control must be off to drop RTS for baud rate B0 */
-		if (C_BAUD(tty) != B0)
-			config->wFlags |= MXU1_UART_ENABLE_RTS_IN;
-		config->wFlags |= MXU1_UART_ENABLE_CTS_OUT;
-	}
-
-	if (I_IXOFF(tty) || I_IXON(tty)) {
-		config->cXon  = START_CHAR(tty);
-		config->cXoff = STOP_CHAR(tty);
-
-		if (I_IXOFF(tty))
-			config->wFlags |= MXU1_UART_ENABLE_X_IN;
-
-		if (I_IXON(tty))
-			config->wFlags |= MXU1_UART_ENABLE_X_OUT;
-	}
-
-	baud = tty_get_baud_rate(tty);
-	if (!baud)
-		baud = 9600;
-	config->wBaudRate = MXU1_BAUD_BASE / baud;
-
-	dev_dbg(&port->dev, "%s - BaudRate=%d, wBaudRate=%d, wFlags=0x%04X, bDataBits=%d, bParity=%d, bStopBits=%d, cXon=%d, cXoff=%d, bUartMode=%d\n",
-		__func__, baud, config->wBaudRate, config->wFlags,
-		config->bDataBits, config->bParity, config->bStopBits,
-		config->cXon, config->cXoff, config->bUartMode);
-
-	cpu_to_be16s(&config->wBaudRate);
-	cpu_to_be16s(&config->wFlags);
-
-	status = mxu1_send_ctrl_data_urb(port->serial, MXU1_SET_CONFIG, 0,
-					 MXU1_UART1_PORT, config,
-					 sizeof(*config));
-	if (status)
-		dev_err(&port->dev, "cannot set config: %d\n", status);
-
-	mutex_lock(&mxport->mutex);
-	mcr = mxport->mcr;
-
-	if (C_BAUD(tty) == B0)
-		mcr &= ~(MXU1_MCR_DTR | MXU1_MCR_RTS);
-	else if (old_termios && (old_termios->c_cflag & CBAUD) == B0)
-		mcr |= MXU1_MCR_DTR | MXU1_MCR_RTS;
-
-	status = mxu1_set_mcr(port, mcr);
-	if (status)
-		dev_err(&port->dev, "cannot set modem control: %d\n", status);
-	else
-		mxport->mcr = mcr;
-
-	mutex_unlock(&mxport->mutex);
-
-	kfree(config);
-}
-
-static int mxu1_get_serial_info(struct usb_serial_port *port,
-				struct serial_struct __user *ret_arg)
-{
-	struct serial_struct ret_serial;
-	unsigned cwait;
-
-	if (!ret_arg)
-		return -EFAULT;
-
-	cwait = port->port.closing_wait;
-	if (cwait != ASYNC_CLOSING_WAIT_NONE)
-		cwait = jiffies_to_msecs(cwait) / 10;
-
-	memset(&ret_serial, 0, sizeof(ret_serial));
-
-	ret_serial.type = PORT_16550A;
-	ret_serial.line = port->minor;
-	ret_serial.port = 0;
-	ret_serial.xmit_fifo_size = port->bulk_out_size;
-	ret_serial.baud_base = MXU1_BAUD_BASE;
-	ret_serial.close_delay = 5*HZ;
-	ret_serial.closing_wait = cwait;
-
-	if (copy_to_user(ret_arg, &ret_serial, sizeof(*ret_arg)))
-		return -EFAULT;
-
-	return 0;
-}
-
-
-static int mxu1_set_serial_info(struct usb_serial_port *port,
-				struct serial_struct __user *new_arg)
-{
-	struct serial_struct new_serial;
-	unsigned cwait;
-
-	if (copy_from_user(&new_serial, new_arg, sizeof(new_serial)))
-		return -EFAULT;
-
-	cwait = new_serial.closing_wait;
-	if (cwait != ASYNC_CLOSING_WAIT_NONE)
-		cwait = msecs_to_jiffies(10 * new_serial.closing_wait);
-
-	port->port.closing_wait = cwait;
-
-	return 0;
-}
-
-static int mxu1_ioctl(struct tty_struct *tty,
-		      unsigned int cmd, unsigned long arg)
-{
-	struct usb_serial_port *port = tty->driver_data;
-
-	switch (cmd) {
-	case TIOCGSERIAL:
-		return mxu1_get_serial_info(port,
-					    (struct serial_struct __user *)arg);
-	case TIOCSSERIAL:
-		return mxu1_set_serial_info(port,
-					    (struct serial_struct __user *)arg);
-	}
-
-	return -ENOIOCTLCMD;
-}
-
-static int mxu1_tiocmget(struct tty_struct *tty)
-{
-	struct usb_serial_port *port = tty->driver_data;
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	unsigned int result;
-	unsigned int msr;
-	unsigned int mcr;
-	unsigned long flags;
-
-	mutex_lock(&mxport->mutex);
-	spin_lock_irqsave(&mxport->spinlock, flags);
-
-	msr = mxport->msr;
-	mcr = mxport->mcr;
-
-	spin_unlock_irqrestore(&mxport->spinlock, flags);
-	mutex_unlock(&mxport->mutex);
-
-	result = ((mcr & MXU1_MCR_DTR)	? TIOCM_DTR	: 0) |
-		 ((mcr & MXU1_MCR_RTS)	? TIOCM_RTS	: 0) |
-		 ((mcr & MXU1_MCR_LOOP) ? TIOCM_LOOP	: 0) |
-		 ((msr & MXU1_MSR_CTS)	? TIOCM_CTS	: 0) |
-		 ((msr & MXU1_MSR_CD)	? TIOCM_CAR	: 0) |
-		 ((msr & MXU1_MSR_RI)	? TIOCM_RI	: 0) |
-		 ((msr & MXU1_MSR_DSR)	? TIOCM_DSR	: 0);
-
-	dev_dbg(&port->dev, "%s - 0x%04X\n", __func__, result);
-
-	return result;
-}
-
-static int mxu1_tiocmset(struct tty_struct *tty,
-			 unsigned int set, unsigned int clear)
-{
-	struct usb_serial_port *port = tty->driver_data;
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	int err;
-	unsigned int mcr;
-
-	mutex_lock(&mxport->mutex);
-	mcr = mxport->mcr;
-
-	if (set & TIOCM_RTS)
-		mcr |= MXU1_MCR_RTS;
-	if (set & TIOCM_DTR)
-		mcr |= MXU1_MCR_DTR;
-	if (set & TIOCM_LOOP)
-		mcr |= MXU1_MCR_LOOP;
-
-	if (clear & TIOCM_RTS)
-		mcr &= ~MXU1_MCR_RTS;
-	if (clear & TIOCM_DTR)
-		mcr &= ~MXU1_MCR_DTR;
-	if (clear & TIOCM_LOOP)
-		mcr &= ~MXU1_MCR_LOOP;
-
-	err = mxu1_set_mcr(port, mcr);
-	if (!err)
-		mxport->mcr = mcr;
-
-	mutex_unlock(&mxport->mutex);
-
-	return err;
-}
-
-static void mxu1_break(struct tty_struct *tty, int break_state)
-{
-	struct usb_serial_port *port = tty->driver_data;
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-
-	if (break_state == -1)
-		mxport->send_break = true;
-	else
-		mxport->send_break = false;
-
-	mxu1_set_termios(tty, port, NULL);
-}
-
-static int mxu1_open(struct tty_struct *tty, struct usb_serial_port *port)
-{
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	struct usb_serial *serial = port->serial;
-	int status;
-	u16 open_settings;
-
-	open_settings = (MXU1_PIPE_MODE_CONTINUOUS |
-			 MXU1_PIPE_TIMEOUT_ENABLE |
-			 (MXU1_TRANSFER_TIMEOUT << 2));
-
-	mxport->msr = 0;
-
-	status = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
-	if (status) {
-		dev_err(&port->dev, "failed to submit interrupt urb: %d\n",
-			status);
-		return status;
-	}
-
-	if (tty)
-		mxu1_set_termios(tty, port, NULL);
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_OPEN_PORT,
-				    open_settings, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot send open command: %d\n", status);
-		goto unlink_int_urb;
-	}
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_START_PORT,
-				    0, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot send start command: %d\n", status);
-		goto unlink_int_urb;
-	}
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_PURGE_PORT,
-				    MXU1_PURGE_INPUT, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot clear input buffers: %d\n",
-			status);
-
-		goto unlink_int_urb;
-	}
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_PURGE_PORT,
-				    MXU1_PURGE_OUTPUT, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot clear output buffers: %d\n",
-			status);
-
-		goto unlink_int_urb;
-	}
-
-	/*
-	 * reset the data toggle on the bulk endpoints to work around bug in
-	 * host controllers where things get out of sync some times
-	 */
-	usb_clear_halt(serial->dev, port->write_urb->pipe);
-	usb_clear_halt(serial->dev, port->read_urb->pipe);
-
-	if (tty)
-		mxu1_set_termios(tty, port, NULL);
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_OPEN_PORT,
-				    open_settings, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot send open command: %d\n", status);
-		goto unlink_int_urb;
-	}
-
-	status = mxu1_send_ctrl_urb(serial, MXU1_START_PORT,
-				    0, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "cannot send start command: %d\n", status);
-		goto unlink_int_urb;
-	}
-
-	status = usb_serial_generic_open(tty, port);
-	if (status)
-		goto unlink_int_urb;
-
-	return 0;
-
-unlink_int_urb:
-	usb_kill_urb(port->interrupt_in_urb);
-
-	return status;
-}
-
-static void mxu1_close(struct usb_serial_port *port)
-{
-	int status;
-
-	usb_serial_generic_close(port);
-	usb_kill_urb(port->interrupt_in_urb);
-
-	status = mxu1_send_ctrl_urb(port->serial, MXU1_CLOSE_PORT,
-				    0, MXU1_UART1_PORT);
-	if (status) {
-		dev_err(&port->dev, "failed to send close port command: %d\n",
-			status);
-	}
-}
-
-static void mxu1_handle_new_msr(struct usb_serial_port *port, u8 msr)
-{
-	struct mxu1_port *mxport = usb_get_serial_port_data(port);
-	struct async_icount *icount;
-	unsigned long flags;
-
-	dev_dbg(&port->dev, "%s - msr 0x%02X\n", __func__, msr);
-
-	spin_lock_irqsave(&mxport->spinlock, flags);
-	mxport->msr = msr & MXU1_MSR_MASK;
-	spin_unlock_irqrestore(&mxport->spinlock, flags);
-
-	if (msr & MXU1_MSR_DELTA_MASK) {
-		icount = &port->icount;
-		if (msr & MXU1_MSR_DELTA_CTS)
-			icount->cts++;
-		if (msr & MXU1_MSR_DELTA_DSR)
-			icount->dsr++;
-		if (msr & MXU1_MSR_DELTA_CD)
-			icount->dcd++;
-		if (msr & MXU1_MSR_DELTA_RI)
-			icount->rng++;
-
-		wake_up_interruptible(&port->port.delta_msr_wait);
-	}
-}
-
-static void mxu1_interrupt_callback(struct urb *urb)
-{
-	struct usb_serial_port *port = urb->context;
-	unsigned char *data = urb->transfer_buffer;
-	int length = urb->actual_length;
-	int function;
-	int status;
-	u8 msr;
-
-	switch (urb->status) {
-	case 0:
-		break;
-	case -ECONNRESET:
-	case -ENOENT:
-	case -ESHUTDOWN:
-		dev_dbg(&port->dev, "%s - urb shutting down: %d\n",
-			__func__, urb->status);
-		return;
-	default:
-		dev_dbg(&port->dev, "%s - nonzero urb status: %d\n",
-			__func__, urb->status);
-		goto exit;
-	}
-
-	if (length != 2) {
-		dev_dbg(&port->dev, "%s - bad packet size: %d\n",
-			__func__, length);
-		goto exit;
-	}
-
-	if (data[0] == MXU1_CODE_HARDWARE_ERROR) {
-		dev_err(&port->dev, "hardware error: %d\n", data[1]);
-		goto exit;
-	}
-
-	function = mxu1_get_func_from_code(data[0]);
-
-	dev_dbg(&port->dev, "%s - function %d, data 0x%02X\n",
-		 __func__, function, data[1]);
-
-	switch (function) {
-	case MXU1_CODE_DATA_ERROR:
-		dev_dbg(&port->dev, "%s - DATA ERROR, data 0x%02X\n",
-			 __func__, data[1]);
-		break;
-
-	case MXU1_CODE_MODEM_STATUS:
-		msr = data[1];
-		mxu1_handle_new_msr(port, msr);
-		break;
-
-	default:
-		dev_err(&port->dev, "unknown interrupt code: 0x%02X\n",
-			data[1]);
-		break;
-	}
-
-exit:
-	status = usb_submit_urb(urb, GFP_ATOMIC);
-	if (status) {
-		dev_err(&port->dev, "resubmit interrupt urb failed: %d\n",
-			status);
-	}
-}
-
-static struct usb_serial_driver mxu11x0_device = {
-	.driver = {
-		.owner		= THIS_MODULE,
-		.name		= "mxu11x0",
-	},
-	.description		= "MOXA UPort 11x0",
-	.id_table		= mxu1_idtable,
-	.num_ports		= 1,
-	.port_probe             = mxu1_port_probe,
-	.port_remove            = mxu1_port_remove,
-	.attach			= mxu1_startup,
-	.release                = mxu1_release,
-	.open			= mxu1_open,
-	.close			= mxu1_close,
-	.ioctl			= mxu1_ioctl,
-	.set_termios		= mxu1_set_termios,
-	.tiocmget		= mxu1_tiocmget,
-	.tiocmset		= mxu1_tiocmset,
-	.tiocmiwait		= usb_serial_generic_tiocmiwait,
-	.get_icount		= usb_serial_generic_get_icount,
-	.break_ctl		= mxu1_break,
-	.read_int_callback	= mxu1_interrupt_callback,
-};
-
-static struct usb_serial_driver *const serial_drivers[] = {
-	&mxu11x0_device, NULL
-};
-
-module_usb_serial_driver(serial_drivers, mxu1_idtable);
-
-MODULE_AUTHOR("Mathieu Othacehe <m.othacehe@gmail.com>");
-MODULE_DESCRIPTION("MOXA UPort 11x0 USB to Serial Hub Driver");
-MODULE_LICENSE("GPL");
-MODULE_FIRMWARE("moxa/moxa-1110.fw");
-MODULE_FIRMWARE("moxa/moxa-1130.fw");
-MODULE_FIRMWARE("moxa/moxa-1131.fw");
-MODULE_FIRMWARE("moxa/moxa-1150.fw");
-MODULE_FIRMWARE("moxa/moxa-1151.fw");

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index db86e51..348e198 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c

@@ -270,6 +270,7 @@
 #define TELIT_PRODUCT_UE910_V2			0x1012
 #define TELIT_PRODUCT_LE922_USBCFG0		0x1042
 #define TELIT_PRODUCT_LE922_USBCFG3		0x1043
+#define TELIT_PRODUCT_LE922_USBCFG5		0x1045
 #define TELIT_PRODUCT_LE920			0x1200
 #define TELIT_PRODUCT_LE910			0x1201
 
@@ -315,6 +316,7 @@
 #define TOSHIBA_PRODUCT_G450			0x0d45
 
 #define ALINK_VENDOR_ID				0x1e0e
+#define SIMCOM_PRODUCT_SIM7100E			0x9001 /* Yes, ALINK_VENDOR_ID */
 #define ALINK_PRODUCT_PH300			0x9100
 #define ALINK_PRODUCT_3GU			0x9200
 
@@ -607,6 +609,10 @@
 	.reserved = BIT(3) | BIT(4),
 };
 
+static const struct option_blacklist_info simcom_sim7100e_blacklist = {
+	.reserved = BIT(5) | BIT(6),
+};
+
 static const struct option_blacklist_info telit_le910_blacklist = {
 	.sendsetup = BIT(0),
 	.reserved = BIT(1) | BIT(2),
@@ -1122,9 +1128,13 @@
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, 0x6001, 0xff, 0xff, 0xff), /* 4G LTE usb-modem U901 */
+	  .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
@@ -1176,6 +1186,8 @@
 		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
 		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff),
+		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
 		.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
@@ -1645,6 +1657,8 @@
 	{ USB_DEVICE(ALINK_VENDOR_ID, 0x9000) },
 	{ USB_DEVICE(ALINK_VENDOR_ID, ALINK_PRODUCT_PH300) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E),
+	  .driver_info = (kernel_ulong_t)&simcom_sim7100e_blacklist },
 	{ USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
 	  .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist
 	},

diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 9919d2a..1bc6089 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c

@@ -157,14 +157,17 @@
 	{DEVICE_SWI(0x1199, 0x9056)},	/* Sierra Wireless Modem */
 	{DEVICE_SWI(0x1199, 0x9060)},	/* Sierra Wireless Modem */
 	{DEVICE_SWI(0x1199, 0x9061)},	/* Sierra Wireless Modem */
-	{DEVICE_SWI(0x1199, 0x9070)},	/* Sierra Wireless MC74xx/EM74xx */
-	{DEVICE_SWI(0x1199, 0x9071)},	/* Sierra Wireless MC74xx/EM74xx */
+	{DEVICE_SWI(0x1199, 0x9070)},	/* Sierra Wireless MC74xx */
+	{DEVICE_SWI(0x1199, 0x9071)},	/* Sierra Wireless MC74xx */
+	{DEVICE_SWI(0x1199, 0x9078)},	/* Sierra Wireless EM74xx */
+	{DEVICE_SWI(0x1199, 0x9079)},	/* Sierra Wireless EM74xx */
 	{DEVICE_SWI(0x413c, 0x81a2)},	/* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a3)},	/* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a4)},	/* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a8)},	/* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a9)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81b1)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
+	{DEVICE_SWI(0x413c, 0x81b3)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
 
 	/* Huawei devices */
 	{DEVICE_HWI(0x03f0, 0x581d)},	/* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 2760a7b..8c80a48 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c

@@ -446,7 +446,8 @@
 		info.num_regions = VFIO_PCI_NUM_REGIONS;
 		info.num_irqs = VFIO_PCI_NUM_IRQS;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
 		struct pci_dev *pdev = vdev->pdev;
@@ -520,7 +521,8 @@
 			return -EINVAL;
 		}
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
 		struct vfio_irq_info info;
@@ -555,7 +557,8 @@
 		else
 			info.flags |= VFIO_IRQ_INFO_NORESIZE;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
 		struct vfio_irq_set hdr;

diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 418cdd9..e65b142 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c

@@ -219,7 +219,8 @@
 		info.num_regions = vdev->num_regions;
 		info.num_irqs = vdev->num_irqs;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
 		struct vfio_region_info info;
@@ -240,7 +241,8 @@
 		info.size = vdev->regions[info.index].size;
 		info.flags = vdev->regions[info.index].flags;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
 		struct vfio_irq_info info;
@@ -259,7 +261,8 @@
 		info.flags = vdev->irqs[info.index].flags;
 		info.count = vdev->irqs[info.index].count;
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
 		struct vfio_irq_set hdr;

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 6f1ea3d..75b24e9 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c

@@ -999,7 +999,8 @@
 
 		info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
 
-		return copy_to_user((void __user *)arg, &info, minsz);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
 
 	} else if (cmd == VFIO_IOMMU_MAP_DMA) {
 		struct vfio_iommu_type1_dma_map map;
@@ -1032,7 +1033,8 @@
 		if (ret)
 			return ret;
 
-		return copy_to_user((void __user *)arg, &unmap, minsz);
+		return copy_to_user((void __user *)arg, &unmap, minsz) ?
+			-EFAULT : 0;
 	}
 
 	return -ENOTTY;

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ad2146a..236553e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c

@@ -1156,6 +1156,8 @@
 {
 	__virtio16 last_used_idx;
 	int r;
+	bool is_le = vq->is_le;
+
 	if (!vq->private_data) {
 		vq->is_le = virtio_legacy_is_little_endian();
 		return 0;
@@ -1165,15 +1167,20 @@
 
 	r = vhost_update_used_flags(vq);
 	if (r)
-		return r;
+		goto err;
 	vq->signalled_used_valid = false;
-	if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx))
-		return -EFAULT;
+	if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) {
+		r = -EFAULT;
+		goto err;
+	}
 	r = __get_user(last_used_idx, &vq->used->idx);
 	if (r)
-		return r;
+		goto err;
 	vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx);
 	return 0;
+err:
+	vq->is_le = is_le;
+	return r;
 }
 EXPORT_SYMBOL_GPL(vhost_init_used);
 

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 92f3949..6e92917 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c

@@ -709,6 +709,7 @@
 	}
 
 	if (!err) {
+		ops->cur_blink_jiffies = HZ / 5;
 		info->fbcon_par = ops;
 
 		if (vc)
@@ -956,6 +957,7 @@
 	ops->currcon = -1;
 	ops->graphics = 1;
 	ops->cur_rotate = -1;
+	ops->cur_blink_jiffies = HZ / 5;
 	info->fbcon_par = ops;
 	p->con_rotate = initial_rotation;
 	set_blitting_type(vc, info);

diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c
index a305cae..fb75b7e 100644
--- a/drivers/video/fbdev/acornfb.c
+++ b/drivers/video/fbdev/acornfb.c

@@ -1040,8 +1040,8 @@
 		 * for the framebuffer if we are not using
 		 * VRAM.
 		 */
-		base = dma_alloc_writecombine(current_par.dev, size, &handle,
-					      GFP_KERNEL);
+		base = dma_alloc_wc(current_par.dev, size, &handle,
+				    GFP_KERNEL);
 		if (base == NULL) {
 			printk(KERN_ERR "acornfb: unable to allocate screen "
 			       "memory\n");

diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c
index 7a8afcd..a8a22da 100644
--- a/drivers/video/fbdev/amba-clcd-versatile.c
+++ b/drivers/video/fbdev/amba-clcd-versatile.c

@@ -154,8 +154,8 @@
 {
 	dma_addr_t dma;
 
-	fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
-						    &dma, GFP_KERNEL);
+	fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, framesize, &dma,
+					  GFP_KERNEL);
 	if (!fb->fb.screen_base) {
 		pr_err("CLCD: unable to map framebuffer\n");
 		return -ENOMEM;
@@ -169,14 +169,12 @@
 
 int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma,
-				     fb->fb.screen_base,
-				     fb->fb.fix.smem_start,
-				     fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void versatile_clcd_remove_dma(struct clcd_fb *fb)
 {
-	dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-			      fb->fb.screen_base, fb->fb.fix.smem_start);
+	dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+		    fb->fb.fix.smem_start);
 }

diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index 9362424..fe274b5 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c

@@ -774,8 +774,8 @@
 
 static int clcdfb_of_dma_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-	return dma_mmap_writecombine(&fb->dev->dev, vma, fb->fb.screen_base,
-			fb->fb.fix.smem_start, fb->fb.fix.smem_len);
+	return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+			   fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 static void clcdfb_of_dma_remove(struct clcd_fb *fb)

diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index 19eb42b..56c60e6 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c

@@ -414,8 +414,8 @@
 {
 	struct fb_info *info = sinfo->info;
 
-	dma_free_writecombine(info->device, info->fix.smem_len,
-				info->screen_base, info->fix.smem_start);
+	dma_free_wc(info->device, info->fix.smem_len, info->screen_base,
+		    info->fix.smem_start);
 }
 
 /**
@@ -435,8 +435,9 @@
 		    * ((var->bits_per_pixel + 7) / 8));
 	info->fix.smem_len = max(smem_len, sinfo->smem_len);
 
-	info->screen_base = dma_alloc_writecombine(info->device, info->fix.smem_len,
-					(dma_addr_t *)&info->fix.smem_start, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(info->device, info->fix.smem_len,
+					 (dma_addr_t *)&info->fix.smem_start,
+					 GFP_KERNEL);
 
 	if (!info->screen_base) {
 		return -ENOMEM;

diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c
index 5b10810..75f0db2 100644
--- a/drivers/video/fbdev/ep93xx-fb.c
+++ b/drivers/video/fbdev/ep93xx-fb.c

@@ -316,9 +316,8 @@
 	unsigned int offset = vma->vm_pgoff << PAGE_SHIFT;
 
 	if (offset < info->fix.smem_len) {
-		return dma_mmap_writecombine(info->dev, vma, info->screen_base,
-					     info->fix.smem_start,
-					     info->fix.smem_len);
+		return dma_mmap_wc(info->dev, vma, info->screen_base,
+				   info->fix.smem_start, info->fix.smem_len);
 	}
 
 	return -EINVAL;
@@ -428,8 +427,7 @@
 	/* Maximum 16bpp -> used memory is maximum x*y*2 bytes */
 	fb_size = EP93XXFB_MAX_XRES * EP93XXFB_MAX_YRES * 2;
 
-	virt_addr = dma_alloc_writecombine(info->dev, fb_size,
-					   &phys_addr, GFP_KERNEL);
+	virt_addr = dma_alloc_wc(info->dev, fb_size, &phys_addr, GFP_KERNEL);
 	if (!virt_addr)
 		return -ENOMEM;
 

diff --git a/drivers/video/fbdev/gbefb.c b/drivers/video/fbdev/gbefb.c
index b63d55f..1a242b1 100644
--- a/drivers/video/fbdev/gbefb.c
+++ b/drivers/video/fbdev/gbefb.c

@@ -1185,8 +1185,8 @@
 	} else {
 		/* try to allocate memory with the classical allocator
 		 * this has high chance to fail on low memory machines */
-		gbe_mem = dma_alloc_writecombine(NULL, gbe_mem_size,
-						 &gbe_dma_addr, GFP_KERNEL);
+		gbe_mem = dma_alloc_wc(NULL, gbe_mem_size, &gbe_dma_addr,
+				       GFP_KERNEL);
 		if (!gbe_mem) {
 			printk(KERN_ERR "gbefb: couldn't allocate framebuffer memory\n");
 			ret = -ENOMEM;
@@ -1238,7 +1238,7 @@
 out_gbe_unmap:
 	arch_phys_wc_del(par->wc_cookie);
 	if (gbe_dma_addr)
-		dma_free_writecombine(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
+		dma_free_wc(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
 out_tiles_free:
 	dma_free_coherent(NULL, GBE_TLB_SIZE * sizeof(uint16_t),
 			  (void *)gbe_tiles.cpu, gbe_tiles.dma);
@@ -1259,7 +1259,7 @@
 	gbe_turn_off();
 	arch_phys_wc_del(par->wc_cookie);
 	if (gbe_dma_addr)
-		dma_free_writecombine(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
+		dma_free_wc(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
 	dma_free_coherent(NULL, GBE_TLB_SIZE * sizeof(uint16_t),
 			  (void *)gbe_tiles.cpu, gbe_tiles.dma);
 	release_mem_region(GBE_BASE, sizeof(struct sgi_gbe));

diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
index bb2f1e8..76b6a77 100644
--- a/drivers/video/fbdev/imxfb.c
+++ b/drivers/video/fbdev/imxfb.c

@@ -937,8 +937,8 @@
 	}
 
 	fbi->map_size = PAGE_ALIGN(info->fix.smem_len);
-	info->screen_base = dma_alloc_writecombine(&pdev->dev, fbi->map_size,
-						   &fbi->map_dma, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(&pdev->dev, fbi->map_size,
+					 &fbi->map_dma, GFP_KERNEL);
 
 	if (!info->screen_base) {
 		dev_err(&pdev->dev, "Failed to allocate video RAM: %d\n", ret);
@@ -1005,8 +1005,8 @@
 	if (pdata && pdata->exit)
 		pdata->exit(fbi->pdev);
 failed_platform_init:
-	dma_free_writecombine(&pdev->dev, fbi->map_size, info->screen_base,
-			      fbi->map_dma);
+	dma_free_wc(&pdev->dev, fbi->map_size, info->screen_base,
+		    fbi->map_dma);
 failed_map:
 	iounmap(fbi->regs);
 failed_ioremap:
@@ -1041,8 +1041,8 @@
 	kfree(info->pseudo_palette);
 	framebuffer_release(info);
 
-	dma_free_writecombine(&pdev->dev, fbi->map_size, info->screen_base,
-			      fbi->map_dma);
+	dma_free_wc(&pdev->dev, fbi->map_size, info->screen_base,
+		    fbi->map_dma);
 
 	iounmap(fbi->regs);
 	release_mem_region(res->start, resource_size(res));

diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c
index 7947634..f91b1db 100644
--- a/drivers/video/fbdev/mx3fb.c
+++ b/drivers/video/fbdev/mx3fb.c

@@ -1336,9 +1336,8 @@
 	int retval = 0;
 	dma_addr_t addr;
 
-	fbi->screen_base = dma_alloc_writecombine(fbi->device,
-						  mem_len,
-						  &addr, GFP_DMA | GFP_KERNEL);
+	fbi->screen_base = dma_alloc_wc(fbi->device, mem_len, &addr,
+					GFP_DMA | GFP_KERNEL);
 
 	if (!fbi->screen_base) {
 		dev_err(fbi->device, "Cannot allocate %u bytes framebuffer memory\n",
@@ -1378,8 +1377,8 @@
  */
 static int mx3fb_unmap_video_memory(struct fb_info *fbi)
 {
-	dma_free_writecombine(fbi->device, fbi->fix.smem_len,
-			      fbi->screen_base, fbi->fix.smem_start);
+	dma_free_wc(fbi->device, fbi->fix.smem_len, fbi->screen_base,
+		    fbi->fix.smem_start);
 
 	fbi->screen_base = NULL;
 	mutex_lock(&fbi->mm_lock);

diff --git a/drivers/video/fbdev/nuc900fb.c b/drivers/video/fbdev/nuc900fb.c
index 389fa2c..6680eda 100644
--- a/drivers/video/fbdev/nuc900fb.c
+++ b/drivers/video/fbdev/nuc900fb.c

@@ -396,8 +396,8 @@
 	dev_dbg(fbi->dev, "nuc900fb_map_video_memory(fbi=%p) map_size %lu\n",
 		fbi, map_size);
 
-	info->screen_base = dma_alloc_writecombine(fbi->dev, map_size,
-							&map_dma, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(fbi->dev, map_size, &map_dma,
+					 GFP_KERNEL);
 
 	if (!info->screen_base)
 		return -ENOMEM;
@@ -411,8 +411,8 @@
 static inline void nuc900fb_unmap_video_memory(struct fb_info *info)
 {
 	struct nuc900fb_info *fbi = info->par;
-	dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-			      info->screen_base, info->fix.smem_start);
+	dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+		    info->screen_base, info->fix.smem_start);
 }
 
 static irqreturn_t nuc900fb_irqhandler(int irq, void *dev_id)

diff --git a/drivers/video/fbdev/omap/lcdc.c b/drivers/video/fbdev/omap/lcdc.c
index 6efa259..e3d9b9e 100644
--- a/drivers/video/fbdev/omap/lcdc.c
+++ b/drivers/video/fbdev/omap/lcdc.c

@@ -612,8 +612,8 @@
 
 static int alloc_palette_ram(void)
 {
-	lcdc.palette_virt = dma_alloc_writecombine(lcdc.fbdev->dev,
-		MAX_PALETTE_SIZE, &lcdc.palette_phys, GFP_KERNEL);
+	lcdc.palette_virt = dma_alloc_wc(lcdc.fbdev->dev, MAX_PALETTE_SIZE,
+					 &lcdc.palette_phys, GFP_KERNEL);
 	if (lcdc.palette_virt == NULL) {
 		dev_err(lcdc.fbdev->dev, "failed to alloc palette memory\n");
 		return -ENOMEM;
@@ -625,8 +625,8 @@
 
 static void free_palette_ram(void)
 {
-	dma_free_writecombine(lcdc.fbdev->dev, MAX_PALETTE_SIZE,
-			lcdc.palette_virt, lcdc.palette_phys);
+	dma_free_wc(lcdc.fbdev->dev, MAX_PALETTE_SIZE, lcdc.palette_virt,
+		    lcdc.palette_phys);
 }
 
 static int alloc_fbmem(struct omapfb_mem_region *region)
@@ -642,8 +642,8 @@
 	if (region->size > frame_size)
 		frame_size = region->size;
 	lcdc.vram_size = frame_size;
-	lcdc.vram_virt = dma_alloc_writecombine(lcdc.fbdev->dev,
-			lcdc.vram_size, &lcdc.vram_phys, GFP_KERNEL);
+	lcdc.vram_virt = dma_alloc_wc(lcdc.fbdev->dev, lcdc.vram_size,
+				      &lcdc.vram_phys, GFP_KERNEL);
 	if (lcdc.vram_virt == NULL) {
 		dev_err(lcdc.fbdev->dev, "unable to allocate FB DMA memory\n");
 		return -ENOMEM;
@@ -660,8 +660,8 @@
 
 static void free_fbmem(void)
 {
-	dma_free_writecombine(lcdc.fbdev->dev, lcdc.vram_size,
-			      lcdc.vram_virt, lcdc.vram_phys);
+	dma_free_wc(lcdc.fbdev->dev, lcdc.vram_size, lcdc.vram_virt,
+		    lcdc.vram_phys);
 }
 
 static int setup_fbmem(struct omapfb_mem_desc *req_md)

diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c
index efb57c0..def3a50 100644
--- a/drivers/video/fbdev/pxa168fb.c
+++ b/drivers/video/fbdev/pxa168fb.c

@@ -680,8 +680,8 @@
 	 */
 	info->fix.smem_len = PAGE_ALIGN(DEFAULT_FB_SIZE);
 
-	info->screen_base = dma_alloc_writecombine(fbi->dev, info->fix.smem_len,
-						&fbi->fb_start_dma, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(fbi->dev, info->fix.smem_len,
+					 &fbi->fb_start_dma, GFP_KERNEL);
 	if (info->screen_base == NULL) {
 		ret = -ENOMEM;
 		goto failed_free_info;
@@ -804,8 +804,8 @@
 
 	irq = platform_get_irq(pdev, 0);
 
-	dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-				info->screen_base, info->fix.smem_start);
+	dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+		    info->screen_base, info->fix.smem_start);
 
 	clk_disable(fbi->clk);
 

diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index 33b2bb3..2c0487f 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c

@@ -2446,8 +2446,8 @@
 
 	free_pages_exact(fbi->video_mem, fbi->video_mem_size);
 
-	dma_free_writecombine(&dev->dev, fbi->dma_buff_size,
-			fbi->dma_buff, fbi->dma_buff_phys);
+	dma_free_wc(&dev->dev, fbi->dma_buff_size, fbi->dma_buff,
+		    fbi->dma_buff_phys);
 
 	iounmap(fbi->mmio_base);
 

diff --git a/drivers/video/fbdev/s3c-fb.c b/drivers/video/fbdev/s3c-fb.c
index f72dd12..5f4f696 100644
--- a/drivers/video/fbdev/s3c-fb.c
+++ b/drivers/video/fbdev/s3c-fb.c

@@ -1105,8 +1105,7 @@
 
 	dev_dbg(sfb->dev, "want %u bytes for window\n", size);
 
-	fbi->screen_base = dma_alloc_writecombine(sfb->dev, size,
-						  &map_dma, GFP_KERNEL);
+	fbi->screen_base = dma_alloc_wc(sfb->dev, size, &map_dma, GFP_KERNEL);
 	if (!fbi->screen_base)
 		return -ENOMEM;
 
@@ -1131,8 +1130,8 @@
 	struct fb_info *fbi = win->fbinfo;
 
 	if (fbi->screen_base)
-		dma_free_writecombine(sfb->dev, PAGE_ALIGN(fbi->fix.smem_len),
-			      fbi->screen_base, fbi->fix.smem_start);
+		dma_free_wc(sfb->dev, PAGE_ALIGN(fbi->fix.smem_len),
+		            fbi->screen_base, fbi->fix.smem_start);
 }
 
 /**

diff --git a/drivers/video/fbdev/s3c2410fb.c b/drivers/video/fbdev/s3c2410fb.c
index d6704ad..0dd86be 100644
--- a/drivers/video/fbdev/s3c2410fb.c
+++ b/drivers/video/fbdev/s3c2410fb.c

@@ -645,8 +645,8 @@
 
 	dprintk("map_video_memory(fbi=%p) map_size %u\n", fbi, map_size);
 
-	info->screen_base = dma_alloc_writecombine(fbi->dev, map_size,
-						   &map_dma, GFP_KERNEL);
+	info->screen_base = dma_alloc_wc(fbi->dev, map_size, &map_dma,
+					 GFP_KERNEL);
 
 	if (info->screen_base) {
 		/* prevent initial garbage on screen */
@@ -667,8 +667,8 @@
 {
 	struct s3c2410fb_info *fbi = info->par;
 
-	dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-			      info->screen_base, info->fix.smem_start);
+	dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+		    info->screen_base, info->fix.smem_start);
 }
 
 static inline void modify_gpio(void __iomem *reg,

diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c
index dcf774c..fc2aaa5 100644
--- a/drivers/video/fbdev/sa1100fb.c
+++ b/drivers/video/fbdev/sa1100fb.c

@@ -567,8 +567,8 @@
 
 	if (off < info->fix.smem_len) {
 		vma->vm_pgoff += 1; /* skip over the palette */
-		return dma_mmap_writecombine(fbi->dev, vma, fbi->map_cpu,
-					     fbi->map_dma, fbi->map_size);
+		return dma_mmap_wc(fbi->dev, vma, fbi->map_cpu, fbi->map_dma,
+				   fbi->map_size);
 	}
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -1099,8 +1099,8 @@
 	 * of the framebuffer.
 	 */
 	fbi->map_size = PAGE_ALIGN(fbi->fb.fix.smem_len + PAGE_SIZE);
-	fbi->map_cpu = dma_alloc_writecombine(fbi->dev, fbi->map_size,
-					      &fbi->map_dma, GFP_KERNEL);
+	fbi->map_cpu = dma_alloc_wc(fbi->dev, fbi->map_size, &fbi->map_dma,
+				    GFP_KERNEL);
 
 	if (fbi->map_cpu) {
 		fbi->fb.screen_base = fbi->map_cpu + PAGE_SIZE;

diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index c0c11fa..7760fc1 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c

@@ -679,7 +679,7 @@
 
 	pci_read_config_dword(pci_dev,
 			      notify + offsetof(struct virtio_pci_notify_cap,
-						cap.length),
+						cap.offset),
 			      &notify_offset);
 
 	/* We don't know how many VQs we'll map, ahead of the time.

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 0f6d851..80825a7 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig

@@ -1569,6 +1569,17 @@
 	  machines.  The watchdog timeout period is normally one minute but
 	  can be changed with a boot-time parameter.
 
+config WATCHDOG_SUN4V
+	tristate "Sun4v Watchdog support"
+	select WATCHDOG_CORE
+	depends on SPARC64
+	help
+	  Say Y here to support the hypervisor watchdog capability embedded
+	  in the SPARC sun4v architecture.
+
+	  To compile this driver as a module, choose M here. The module will
+	  be called sun4v_wdt.
+
 # XTENSA Architecture
 
 # Xen Architecture

diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index f566753..f6a6a38 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile

@@ -179,6 +179,7 @@
 
 obj-$(CONFIG_WATCHDOG_RIO)		+= riowd.o
 obj-$(CONFIG_WATCHDOG_CP1XXX)		+= cpwd.o
+obj-$(CONFIG_WATCHDOG_SUN4V)		+= sun4v_wdt.o
 
 # XTENSA Architecture
 

diff --git a/drivers/watchdog/sun4v_wdt.c b/drivers/watchdog/sun4v_wdt.c
new file mode 100644
index 0000000..1467fe5
--- /dev/null
+++ b/drivers/watchdog/sun4v_wdt.c

@@ -0,0 +1,191 @@
+/*
+ *	sun4v watchdog timer
+ *	(c) Copyright 2016 Oracle Corporation
+ *
+ *	Implement a simple watchdog driver using the built-in sun4v hypervisor
+ *	watchdog support. If time expires, the hypervisor stops or bounces
+ *	the guest domain.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/watchdog.h>
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+
+#define WDT_TIMEOUT			60
+#define WDT_MAX_TIMEOUT			31536000
+#define WDT_MIN_TIMEOUT			1
+#define WDT_DEFAULT_RESOLUTION_MS	1000	/* 1 second */
+
+static unsigned int timeout;
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default="
+	__MODULE_STRING(WDT_TIMEOUT) ")");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, S_IRUGO);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int sun4v_wdt_stop(struct watchdog_device *wdd)
+{
+	sun4v_mach_set_watchdog(0, NULL);
+
+	return 0;
+}
+
+static int sun4v_wdt_ping(struct watchdog_device *wdd)
+{
+	int hverr;
+
+	/*
+	 * HV watchdog timer will round up the timeout
+	 * passed in to the nearest multiple of the
+	 * watchdog resolution in milliseconds.
+	 */
+	hverr = sun4v_mach_set_watchdog(wdd->timeout * 1000, NULL);
+	if (hverr == HV_EINVAL)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int sun4v_wdt_set_timeout(struct watchdog_device *wdd,
+				 unsigned int timeout)
+{
+	wdd->timeout = timeout;
+
+	return 0;
+}
+
+static const struct watchdog_info sun4v_wdt_ident = {
+	.options =	WDIOF_SETTIMEOUT |
+			WDIOF_MAGICCLOSE |
+			WDIOF_KEEPALIVEPING,
+	.identity =	"sun4v hypervisor watchdog",
+	.firmware_version = 0,
+};
+
+static struct watchdog_ops sun4v_wdt_ops = {
+	.owner =	THIS_MODULE,
+	.start =	sun4v_wdt_ping,
+	.stop =		sun4v_wdt_stop,
+	.ping =		sun4v_wdt_ping,
+	.set_timeout =	sun4v_wdt_set_timeout,
+};
+
+static struct watchdog_device wdd = {
+	.info = &sun4v_wdt_ident,
+	.ops = &sun4v_wdt_ops,
+	.min_timeout = WDT_MIN_TIMEOUT,
+	.max_timeout = WDT_MAX_TIMEOUT,
+	.timeout = WDT_TIMEOUT,
+};
+
+static int __init sun4v_wdt_init(void)
+{
+	struct mdesc_handle *handle;
+	u64 node;
+	const u64 *value;
+	int err = 0;
+	unsigned long major = 1, minor = 1;
+
+	/*
+	 * There are 2 properties that can be set from the control
+	 * domain for the watchdog.
+	 * watchdog-resolution
+	 * watchdog-max-timeout
+	 *
+	 * We can expect a handle to be returned otherwise something
+	 * serious is wrong. Correct to return -ENODEV here.
+	 */
+
+	handle = mdesc_grab();
+	if (!handle)
+		return -ENODEV;
+
+	node = mdesc_node_by_name(handle, MDESC_NODE_NULL, "platform");
+	err = -ENODEV;
+	if (node == MDESC_NODE_NULL)
+		goto out_release;
+
+	/*
+	 * This is a safe way to validate if we are on the right
+	 * platform.
+	 */
+	if (sun4v_hvapi_register(HV_GRP_CORE, major, &minor))
+		goto out_hv_unreg;
+
+	/* Allow value of watchdog-resolution up to 1s (default) */
+	value = mdesc_get_property(handle, node, "watchdog-resolution", NULL);
+	err = -EINVAL;
+	if (value) {
+		if (*value == 0 ||
+		    *value > WDT_DEFAULT_RESOLUTION_MS)
+			goto out_hv_unreg;
+	}
+
+	value = mdesc_get_property(handle, node, "watchdog-max-timeout", NULL);
+	if (value) {
+		/*
+		 * If the property value (in ms) is smaller than
+		 * min_timeout, return -EINVAL.
+		 */
+		if (*value < wdd.min_timeout * 1000)
+			goto out_hv_unreg;
+
+		/*
+		 * If the property value is smaller than
+		 * default max_timeout  then set watchdog max_timeout to
+		 * the value of the property in seconds.
+		 */
+		if (*value < wdd.max_timeout * 1000)
+			wdd.max_timeout = *value  / 1000;
+	}
+
+	watchdog_init_timeout(&wdd, timeout, NULL);
+
+	watchdog_set_nowayout(&wdd, nowayout);
+
+	err = watchdog_register_device(&wdd);
+	if (err)
+		goto out_hv_unreg;
+
+	pr_info("initialized (timeout=%ds, nowayout=%d)\n",
+		 wdd.timeout, nowayout);
+
+	mdesc_release(handle);
+
+	return 0;
+
+out_hv_unreg:
+	sun4v_hvapi_unregister(HV_GRP_CORE);
+
+out_release:
+	mdesc_release(handle);
+	return err;
+}
+
+static void __exit sun4v_wdt_exit(void)
+{
+	sun4v_hvapi_unregister(HV_GRP_CORE);
+	watchdog_unregister_device(&wdd);
+}
+
+module_init(sun4v_wdt_init);
+module_exit(sun4v_wdt_exit);
+
+MODULE_AUTHOR("Wim Coekaerts <wim.coekaerts@oracle.com>");
+MODULE_DESCRIPTION("sun4v watchdog driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 12eab50..dc4305b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c

@@ -257,7 +257,7 @@
 		return NULL;
 
 	res->name = "System RAM";
-	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 	ret = allocate_resource(&iomem_resource, res,
 				size, 0, -1,

diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 73dafdc..fb02214 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c

@@ -227,8 +227,9 @@
 	/*
 	 * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able
 	 * to access the BARs where the MSI-X entries reside.
+	 * But VF devices are unique in which the PF needs to be checked.
 	 */
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd);
 	if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY))
 		return -ENXIO;
 
@@ -332,6 +333,9 @@
 	struct xen_pcibk_dev_data *dev_data = NULL;
 	struct xen_pci_op *op = &pdev->op;
 	int test_intx = 0;
+#ifdef CONFIG_PCI_MSI
+	unsigned int nr = 0;
+#endif
 
 	*op = pdev->sh_info->op;
 	barrier();
@@ -360,6 +364,7 @@
 			op->err = xen_pcibk_disable_msi(pdev, dev, op);
 			break;
 		case XEN_PCI_OP_enable_msix:
+			nr = op->value;
 			op->err = xen_pcibk_enable_msix(pdev, dev, op);
 			break;
 		case XEN_PCI_OP_disable_msix:
@@ -382,7 +387,7 @@
 	if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) {
 		unsigned int i;
 
-		for (i = 0; i < op->value; i++)
+		for (i = 0; i < nr; i++)
 			pdev->sh_info->op.msix_entries[i].vector =
 				op->msix_entries[i].vector;
 	}

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index ad4eb10..c46ee18 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c

@@ -849,15 +849,31 @@
 }
 
 /*
+  Check for a translation entry being present
+*/
+static struct v2p_entry *scsiback_chk_translation_entry(
+	struct vscsibk_info *info, struct ids_tuple *v)
+{
+	struct list_head *head = &(info->v2p_entry_lists);
+	struct v2p_entry *entry;
+
+	list_for_each_entry(entry, head, l)
+		if ((entry->v.chn == v->chn) &&
+		    (entry->v.tgt == v->tgt) &&
+		    (entry->v.lun == v->lun))
+			return entry;
+
+	return NULL;
+}
+
+/*
   Add a new translation entry
 */
 static int scsiback_add_translation_entry(struct vscsibk_info *info,
 					  char *phy, struct ids_tuple *v)
 {
 	int err = 0;
-	struct v2p_entry *entry;
 	struct v2p_entry *new;
-	struct list_head *head = &(info->v2p_entry_lists);
 	unsigned long flags;
 	char *lunp;
 	unsigned long long unpacked_lun;
@@ -917,15 +933,10 @@
 	spin_lock_irqsave(&info->v2p_lock, flags);
 
 	/* Check double assignment to identical virtual ID */
-	list_for_each_entry(entry, head, l) {
-		if ((entry->v.chn == v->chn) &&
-		    (entry->v.tgt == v->tgt) &&
-		    (entry->v.lun == v->lun)) {
-			pr_warn("Virtual ID is already used. Assignment was not performed.\n");
-			err = -EEXIST;
-			goto out;
-		}
-
+	if (scsiback_chk_translation_entry(info, v)) {
+		pr_warn("Virtual ID is already used. Assignment was not performed.\n");
+		err = -EEXIST;
+		goto out;
 	}
 
 	/* Create a new translation entry and add to the list */
@@ -933,18 +944,18 @@
 	new->v = *v;
 	new->tpg = tpg;
 	new->lun = unpacked_lun;
-	list_add_tail(&new->l, head);
+	list_add_tail(&new->l, &info->v2p_entry_lists);
 
 out:
 	spin_unlock_irqrestore(&info->v2p_lock, flags);
 
 out_free:
-	mutex_lock(&tpg->tv_tpg_mutex);
-	tpg->tv_tpg_fe_count--;
-	mutex_unlock(&tpg->tv_tpg_mutex);
-
-	if (err)
+	if (err) {
+		mutex_lock(&tpg->tv_tpg_mutex);
+		tpg->tv_tpg_fe_count--;
+		mutex_unlock(&tpg->tv_tpg_mutex);
 		kfree(new);
+	}
 
 	return err;
 }
@@ -956,39 +967,40 @@
 }
 
 /*
-  Delete the translation entry specfied
+  Delete the translation entry specified
 */
 static int scsiback_del_translation_entry(struct vscsibk_info *info,
 					  struct ids_tuple *v)
 {
 	struct v2p_entry *entry;
-	struct list_head *head = &(info->v2p_entry_lists);
 	unsigned long flags;
+	int ret = 0;
 
 	spin_lock_irqsave(&info->v2p_lock, flags);
 	/* Find out the translation entry specified */
-	list_for_each_entry(entry, head, l) {
-		if ((entry->v.chn == v->chn) &&
-		    (entry->v.tgt == v->tgt) &&
-		    (entry->v.lun == v->lun)) {
-			goto found;
-		}
-	}
+	entry = scsiback_chk_translation_entry(info, v);
+	if (entry)
+		__scsiback_del_translation_entry(entry);
+	else
+		ret = -ENOENT;
 
 	spin_unlock_irqrestore(&info->v2p_lock, flags);
-	return 1;
-
-found:
-	/* Delete the translation entry specfied */
-	__scsiback_del_translation_entry(entry);
-
-	spin_unlock_irqrestore(&info->v2p_lock, flags);
-	return 0;
+	return ret;
 }
 
 static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
 				char *phy, struct ids_tuple *vir, int try)
 {
+	struct v2p_entry *entry;
+	unsigned long flags;
+
+	if (try) {
+		spin_lock_irqsave(&info->v2p_lock, flags);
+		entry = scsiback_chk_translation_entry(info, vir);
+		spin_unlock_irqrestore(&info->v2p_lock, flags);
+		if (entry)
+			return;
+	}
 	if (!scsiback_add_translation_entry(info, phy, vir)) {
 		if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
 				  "%d", XenbusStateInitialised)) {

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 9433e46..912b64e 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c

@@ -188,6 +188,8 @@
 
 	if (len == 0)
 		return 0;
+	if (len > XENSTORE_PAYLOAD_MAX)
+		return -EINVAL;
 
 	rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
 	if (rb == NULL)

diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0548c53..22fc7c8 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c

@@ -511,8 +511,6 @@
 	pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
 		 page->index, to);
 	BUG_ON(to > PAGE_CACHE_SIZE);
-	kmap(page);
-	data = page_address(page);
 	bsize = AFFS_SB(sb)->s_data_blksize;
 	tmp = page->index << PAGE_CACHE_SHIFT;
 	bidx = tmp / bsize;
@@ -524,14 +522,15 @@
 			return PTR_ERR(bh);
 		tmp = min(bsize - boff, to - pos);
 		BUG_ON(pos + tmp > to || tmp > bsize);
+		data = kmap_atomic(page);
 		memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
+		kunmap_atomic(data);
 		affs_brelse(bh);
 		bidx++;
 		pos += tmp;
 		boff = 0;
 	}
 	flush_dcache_page(page);
-	kunmap(page);
 	return 0;
 }
 

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 051ea48..7d914c6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c

@@ -653,7 +653,7 @@
 
 	if ((current->flags & PF_RANDOMIZE) &&
 		!(current->personality & ADDR_NO_RANDOMIZE)) {
-		random_variable = (unsigned long) get_random_int();
+		random_variable = get_random_long();
 		random_variable &= STACK_RND_MASK;
 		random_variable <<= PAGE_SHIFT;
 	}

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 39b3a17..826b164 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -1201,7 +1201,11 @@
 		bdev->bd_disk = disk;
 		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
-		bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
+		if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+			bdev->bd_inode->i_flags = S_DAX;
+		else
+			bdev->bd_inode->i_flags = 0;
+
 		if (!partno) {
 			ret = -ENXIO;
 			bdev->bd_part = disk_get_part(disk, partno);
@@ -1693,13 +1697,24 @@
 	return try_to_free_buffers(page);
 }
 
+static int blkdev_writepages(struct address_space *mapping,
+			     struct writeback_control *wbc)
+{
+	if (dax_mapping(mapping)) {
+		struct block_device *bdev = I_BDEV(mapping->host);
+
+		return dax_writeback_mapping_range(mapping, bdev, wbc);
+	}
+	return generic_writepages(mapping, wbc);
+}
+
 static const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.readpages	= blkdev_readpages,
 	.writepage	= blkdev_writepage,
 	.write_begin	= blkdev_write_begin,
 	.write_end	= blkdev_write_end,
-	.writepages	= generic_writepages,
+	.writepages	= blkdev_writepages,
 	.releasepage	= blkdev_releasepage,
 	.direct_IO	= blkdev_direct_IO,
 	.is_dirty_writeback = buffer_check_dirty_writeback,

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4545e2e..5699bbc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -931,7 +931,7 @@
 	if (bio_flags & EXTENT_BIO_TREE_LOG)
 		return 0;
 #ifdef CONFIG_X86
-	if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
+	if (static_cpu_has(X86_FEATURE_XMM4_2))
 		return 0;
 #endif
 	return 1;

diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7cf8509..2c849b0 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c

@@ -310,8 +310,16 @@
 		set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
 
 		err = btrfs_insert_fs_root(root->fs_info, root);
+		/*
+		 * The root might have been inserted already, as before we look
+		 * for orphan roots, log replay might have happened, which
+		 * triggers a transaction commit and qgroup accounting, which
+		 * in turn reads and inserts fs roots while doing backref
+		 * walking.
+		 */
+		if (err == -EEXIST)
+			err = 0;
 		if (err) {
-			BUG_ON(err == -EEXIST);
 			btrfs_free_fs_root(root);
 			break;
 		}

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c2221378..19adeb0 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c

@@ -1756,6 +1756,10 @@
 	u32 pool;
 	int ret, flags;
 
+	/* does not support pool namespace yet */
+	if (ci->i_pool_ns_len)
+		return -EIO;
+
 	if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
 				NOPOOLPERM))
 		return 0;

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cdbf8cf..6fe0ad2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c

@@ -2753,7 +2753,8 @@
 			     void *inline_data, int inline_len,
 			     struct ceph_buffer *xattr_buf,
 			     struct ceph_mds_session *session,
-			     struct ceph_cap *cap, int issued)
+			     struct ceph_cap *cap, int issued,
+			     u32 pool_ns_len)
 	__releases(ci->i_ceph_lock)
 	__releases(mdsc->snap_rwsem)
 {
@@ -2873,6 +2874,8 @@
 	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
 		/* file layout may have changed */
 		ci->i_layout = grant->layout;
+		ci->i_pool_ns_len = pool_ns_len;
+
 		/* size/truncate_seq? */
 		queue_trunc = ceph_fill_file_size(inode, issued,
 					le32_to_cpu(grant->truncate_seq),
@@ -3411,6 +3414,7 @@
 	u32  inline_len = 0;
 	void *snaptrace;
 	size_t snaptrace_len;
+	u32 pool_ns_len = 0;
 	void *p, *end;
 
 	dout("handle_caps from mds%d\n", mds);
@@ -3463,6 +3467,21 @@
 		p += inline_len;
 	}
 
+	if (le16_to_cpu(msg->hdr.version) >= 8) {
+		u64 flush_tid;
+		u32 caller_uid, caller_gid;
+		u32 osd_epoch_barrier;
+		/* version >= 5 */
+		ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
+		/* version >= 6 */
+		ceph_decode_64_safe(&p, end, flush_tid, bad);
+		/* version >= 7 */
+		ceph_decode_32_safe(&p, end, caller_uid, bad);
+		ceph_decode_32_safe(&p, end, caller_gid, bad);
+		/* version >= 8 */
+		ceph_decode_32_safe(&p, end, pool_ns_len, bad);
+	}
+
 	/* lookup ino */
 	inode = ceph_find_inode(sb, vino);
 	ci = ceph_inode(inode);
@@ -3518,7 +3537,8 @@
 				  &cap, &issued);
 		handle_cap_grant(mdsc, inode, h,
 				 inline_version, inline_data, inline_len,
-				 msg->middle, session, cap, issued);
+				 msg->middle, session, cap, issued,
+				 pool_ns_len);
 		if (realm)
 			ceph_put_snap_realm(mdsc, realm);
 		goto done_unlocked;
@@ -3542,7 +3562,8 @@
 		issued |= __ceph_caps_dirty(ci);
 		handle_cap_grant(mdsc, inode, h,
 				 inline_version, inline_data, inline_len,
-				 msg->middle, session, cap, issued);
+				 msg->middle, session, cap, issued,
+				 pool_ns_len);
 		goto done_unlocked;
 
 	case CEPH_CAP_OP_FLUSH_ACK:

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index fb4ba2e..5849b88 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -396,6 +396,7 @@
 	ci->i_symlink = NULL;
 
 	memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+	ci->i_pool_ns_len = 0;
 
 	ci->i_fragtree = RB_ROOT;
 	mutex_init(&ci->i_fragtree_mutex);
@@ -756,6 +757,7 @@
 		if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool)
 			ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
 		ci->i_layout = info->layout;
+		ci->i_pool_ns_len = iinfo->pool_ns_len;
 
 		queue_trunc = ceph_fill_file_size(inode, issued,
 					le32_to_cpu(info->truncate_seq),

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e7b130a..911d64d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -100,6 +100,14 @@
 	} else
 		info->inline_version = CEPH_INLINE_NONE;
 
+	if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
+		ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
+		ceph_decode_need(p, end, info->pool_ns_len, bad);
+		*p += info->pool_ns_len;
+	} else {
+		info->pool_ns_len = 0;
+	}
+
 	return 0;
 bad:
 	return err;
@@ -2298,6 +2306,14 @@
 		ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
 				  CEPH_CAP_PIN);
 
+	/* deny access to directories with pool_ns layouts */
+	if (req->r_inode && S_ISDIR(req->r_inode->i_mode) &&
+	    ceph_inode(req->r_inode)->i_pool_ns_len)
+		return -EIO;
+	if (req->r_locked_dir &&
+	    ceph_inode(req->r_locked_dir)->i_pool_ns_len)
+		return -EIO;
+
 	/* issue */
 	mutex_lock(&mdsc->mutex);
 	__register_request(mdsc, req, dir);

diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ccf11ef..37712cc 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h

@@ -44,6 +44,7 @@
 	u64 inline_version;
 	u32 inline_len;
 	char *inline_data;
+	u32 pool_ns_len;
 };
 
 /*

diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75b7d12..9c458eb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h

@@ -287,6 +287,7 @@
 
 	struct ceph_dir_layout i_dir_layout;
 	struct ceph_file_layout i_layout;
+	size_t i_pool_ns_len;
 	char *i_symlink;
 
 	/* for dirs */

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c48ca13..2eea403 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c

@@ -1013,7 +1013,6 @@
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.clone_file_range = cifs_clone_file_range,
-	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };

diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 68c4547..83aac8b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h

@@ -31,19 +31,15 @@
  * so that it will fit. We use hash_64 to convert the value to 31 bits, and
  * then add 1, to ensure that we don't end up with a 0 as the value.
  */
-#if BITS_PER_LONG == 64
 static inline ino_t
 cifs_uniqueid_to_ino_t(u64 fileid)
 {
+	if ((sizeof(ino_t)) < (sizeof(u64)))
+		return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
+
 	return (ino_t)fileid;
+
 }
-#else
-static inline ino_t
-cifs_uniqueid_to_ino_t(u64 fileid)
-{
-	return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
-}
-#endif
 
 extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 90b4f9f..76fcb50 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c

@@ -1396,11 +1396,10 @@
  * current bigbuf.
  */
 static int
-cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+discard_remaining_data(struct TCP_Server_Info *server)
 {
 	unsigned int rfclen = get_rfc1002_length(server->smallbuf);
 	int remaining = rfclen + 4 - server->total_read;
-	struct cifs_readdata *rdata = mid->callback_data;
 
 	while (remaining > 0) {
 		int length;
@@ -1414,10 +1413,20 @@
 		remaining -= length;
 	}
 
-	dequeue_mid(mid, rdata->result);
 	return 0;
 }
 
+static int
+cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	int length;
+	struct cifs_readdata *rdata = mid->callback_data;
+
+	length = discard_remaining_data(server);
+	dequeue_mid(mid, rdata->result);
+	return length;
+}
+
 int
 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 {
@@ -1446,6 +1455,12 @@
 		return length;
 	server->total_read += length;
 
+	if (server->ops->is_status_pending &&
+	    server->ops->is_status_pending(buf, server, 0)) {
+		discard_remaining_data(server);
+		return -1;
+	}
+
 	/* Was the SMB read successful? */
 	rdata->result = server->ops->map_error(buf, false);
 	if (rdata->result != 0) {

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 10f8d5c..42e1f44 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c

@@ -1106,21 +1106,25 @@
 {
 	char *data_offset;
 	struct create_context *cc;
-	unsigned int next = 0;
+	unsigned int next;
+	unsigned int remaining;
 	char *name;
 
 	data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset);
+	remaining = le32_to_cpu(rsp->CreateContextsLength);
 	cc = (struct create_context *)data_offset;
-	do {
-		cc = (struct create_context *)((char *)cc + next);
+	while (remaining >= sizeof(struct create_context)) {
 		name = le16_to_cpu(cc->NameOffset) + (char *)cc;
-		if (le16_to_cpu(cc->NameLength) != 4 ||
-		    strncmp(name, "RqLs", 4)) {
-			next = le32_to_cpu(cc->Next);
-			continue;
-		}
-		return server->ops->parse_lease_buf(cc, epoch);
-	} while (next != 0);
+		if (le16_to_cpu(cc->NameLength) == 4 &&
+		    strncmp(name, "RqLs", 4) == 0)
+			return server->ops->parse_lease_buf(cc, epoch);
+
+		next = le32_to_cpu(cc->Next);
+		if (!next)
+			break;
+		remaining -= next;
+		cc = (struct create_context *)((char *)cc + next);
+	}
 
 	return 0;
 }

diff --git a/fs/dax.c b/fs/dax.c
index fc2e314..bbb2ad7 100644
--- a/fs/dax.c
+++ b/fs/dax.c

@@ -79,15 +79,14 @@
 }
 
 /*
- * dax_clear_blocks() is called from within transaction context from XFS,
+ * dax_clear_sectors() is called from within transaction context from XFS,
  * and hence this means the stack from this point must follow GFP_NOFS
  * semantics for all operations.
  */
-int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
 {
-	struct block_device *bdev = inode->i_sb->s_bdev;
 	struct blk_dax_ctl dax = {
-		.sector = block << (inode->i_blkbits - 9),
+		.sector = _sector,
 		.size = _size,
 	};
 
@@ -109,7 +108,7 @@
 	wmb_pmem();
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dax_clear_blocks);
+EXPORT_SYMBOL_GPL(dax_clear_sectors);
 
 /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
 static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
@@ -485,11 +484,10 @@
  * end]. This is required by data integrity operations to ensure file data is
  * on persistent storage prior to completion of the operation.
  */
-int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
-		loff_t end)
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
-	struct block_device *bdev = inode->i_sb->s_bdev;
 	pgoff_t start_index, end_index, pmd_index;
 	pgoff_t indices[PAGEVEC_SIZE];
 	struct pagevec pvec;
@@ -500,8 +498,11 @@
 	if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
 		return -EIO;
 
-	start_index = start >> PAGE_CACHE_SHIFT;
-	end_index = end >> PAGE_CACHE_SHIFT;
+	if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
+		return 0;
+
+	start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
+	end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
 	pmd_index = DAX_PMD_INDEX(start_index);
 
 	rcu_read_lock();
@@ -1055,6 +1056,7 @@
 int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct file *file = vma->vm_file;
+	int error;
 
 	/*
 	 * We pass NO_SECTOR to dax_radix_entry() because we expect that a
@@ -1064,7 +1066,13 @@
 	 * saves us from having to make a call to get_block() here to look
 	 * up the sector.
 	 */
-	dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
+	error = dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false,
+			true);
+
+	if (error == -ENOMEM)
+		return VM_FAULT_OOM;
+	if (error)
+		return VM_FAULT_SIGBUS;
 	return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);

diff --git a/fs/dcache.c b/fs/dcache.c
index 92d5140..2398f9f9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c

@@ -269,9 +269,6 @@
 	return dentry->d_name.name != dentry->d_iname;
 }
 
-/*
- * Make sure other CPUs see the inode attached before the type is set.
- */
 static inline void __d_set_inode_and_type(struct dentry *dentry,
 					  struct inode *inode,
 					  unsigned type_flags)
@@ -279,28 +276,18 @@
 	unsigned flags;
 
 	dentry->d_inode = inode;
-	smp_wmb();
 	flags = READ_ONCE(dentry->d_flags);
 	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
 	flags |= type_flags;
 	WRITE_ONCE(dentry->d_flags, flags);
 }
 
-/*
- * Ideally, we want to make sure that other CPUs see the flags cleared before
- * the inode is detached, but this is really a violation of RCU principles
- * since the ordering suggests we should always set inode before flags.
- *
- * We should instead replace or discard the entire dentry - but that sucks
- * performancewise on mass deletion/rename.
- */
 static inline void __d_clear_type_and_inode(struct dentry *dentry)
 {
 	unsigned flags = READ_ONCE(dentry->d_flags);
 
 	flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
 	WRITE_ONCE(dentry->d_flags, flags);
-	smp_wmb();
 	dentry->d_inode = NULL;
 }
 
@@ -370,9 +357,11 @@
 	__releases(dentry->d_inode->i_lock)
 {
 	struct inode *inode = dentry->d_inode;
+
+	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_clear_type_and_inode(dentry);
 	hlist_del_init(&dentry->d_u.d_alias);
-	dentry_rcuwalk_invalidate(dentry);
+	raw_write_seqcount_end(&dentry->d_seq);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
 	if (!inode->i_nlink)
@@ -1758,8 +1747,9 @@
 	spin_lock(&dentry->d_lock);
 	if (inode)
 		hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_set_inode_and_type(dentry, inode, add_flags);
-	dentry_rcuwalk_invalidate(dentry);
+	raw_write_seqcount_end(&dentry->d_seq);
 	spin_unlock(&dentry->d_lock);
 	fsnotify_d_instantiate(dentry, inode);
 }

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2c88d68..c1400b1 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c

@@ -80,23 +80,6 @@
 	return ret;
 }
 
-static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct inode *inode = file_inode(vma->vm_file);
-	struct ext2_inode_info *ei = EXT2_I(inode);
-	int ret;
-
-	sb_start_pagefault(inode->i_sb);
-	file_update_time(vma->vm_file);
-	down_read(&ei->dax_sem);
-
-	ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL);
-
-	up_read(&ei->dax_sem);
-	sb_end_pagefault(inode->i_sb);
-	return ret;
-}
-
 static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 		struct vm_fault *vmf)
 {
@@ -124,7 +107,7 @@
 static const struct vm_operations_struct ext2_dax_vm_ops = {
 	.fault		= ext2_dax_fault,
 	.pmd_fault	= ext2_dax_pmd_fault,
-	.page_mkwrite	= ext2_dax_mkwrite,
+	.page_mkwrite	= ext2_dax_fault,
 	.pfn_mkwrite	= ext2_dax_pfn_mkwrite,
 };
 

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 338eefd..6bd58e6 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c

@@ -737,8 +737,10 @@
 		 * so that it's not found by another thread before it's
 		 * initialised
 		 */
-		err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
-						1 << inode->i_blkbits);
+		err = dax_clear_sectors(inode->i_sb->s_bdev,
+				le32_to_cpu(chain[depth-1].key) <<
+				(inode->i_blkbits - 9),
+				1 << inode->i_blkbits);
 		if (err) {
 			mutex_unlock(&ei->truncate_mutex);
 			goto cleanup;
@@ -874,6 +876,14 @@
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
+#ifdef CONFIG_FS_DAX
+	if (dax_mapping(mapping)) {
+		return dax_writeback_mapping_range(mapping,
+						   mapping->host->i_sb->s_bdev,
+						   wbc);
+	}
+#endif
+
 	return mpage_writepages(mapping, wbc, ext2_get_block);
 }
 
@@ -1296,7 +1306,7 @@
 		inode->i_flags |= S_NOATIME;
 	if (flags & EXT2_DIRSYNC_FL)
 		inode->i_flags |= S_DIRSYNC;
-	if (test_opt(inode->i_sb, DAX))
+	if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
 		inode->i_flags |= S_DAX;
 }
 

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 474f1a4..4cd318f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c

@@ -262,23 +262,8 @@
 	return result;
 }
 
-static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	int err;
-	struct inode *inode = file_inode(vma->vm_file);
-
-	sb_start_pagefault(inode->i_sb);
-	file_update_time(vma->vm_file);
-	down_read(&EXT4_I(inode)->i_mmap_sem);
-	err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
-	up_read(&EXT4_I(inode)->i_mmap_sem);
-	sb_end_pagefault(inode->i_sb);
-
-	return err;
-}
-
 /*
- * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
  * handler we check for races agaist truncate. Note that since we cycle through
  * i_mmap_sem, we are sure that also any hole punching that began before we
  * were called is finished by now and so if it included part of the file we
@@ -311,7 +296,7 @@
 static const struct vm_operations_struct ext4_dax_vm_ops = {
 	.fault		= ext4_dax_fault,
 	.pmd_fault	= ext4_dax_pmd_fault,
-	.page_mkwrite	= ext4_dax_mkwrite,
+	.page_mkwrite	= ext4_dax_fault,
 	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
 };
 #else

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9cc57c3..aee960b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -2478,6 +2478,10 @@
 
 	trace_ext4_writepages(inode, wbc);
 
+	if (dax_mapping(mapping))
+		return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
+						   wbc);
+
 	/*
 	 * No pages to write? This is mainly a kludge to avoid starting
 	 * a transaction for special inodes like journal inode on last iput()
@@ -4155,7 +4159,7 @@
 		new_fl |= S_NOATIME;
 	if (flags & EXT4_DIRSYNC_FL)
 		new_fl |= S_DIRSYNC;
-	if (test_opt(inode->i_sb, DAX))
+	if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
 		new_fl |= S_DAX;
 	inode_set_flags(inode, new_fl,
 			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a99b010..eae5917 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c

@@ -583,6 +583,11 @@
 				 "Online defrag not supported with bigalloc");
 			err = -EOPNOTSUPP;
 			goto mext_out;
+		} else if (IS_DAX(inode)) {
+			ext4_msg(sb, KERN_ERR,
+				 "Online defrag not supported with DAX");
+			err = -EOPNOTSUPP;
+			goto mext_out;
 		}
 
 		err = mnt_want_write_file(filp);

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index e032a04..4098acc 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c

@@ -390,6 +390,7 @@
 		*err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
 		if (*err < 0)
 			break;
+		bh = bh->b_this_page;
 	}
 	if (!*err)
 		*err = block_commit_write(pagep[0], from, from + replaced_size);

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1f76d89..5c46ed9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c

@@ -223,6 +223,9 @@
 #define WB_FRN_HIST_MAX_SLOTS	(WB_FRN_HIST_THR_SLOTS / 2 + 1)
 					/* one round can affect upto 5 slots */
 
+static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
+static struct workqueue_struct *isw_wq;
+
 void __inode_attach_wb(struct inode *inode, struct page *page)
 {
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -317,7 +320,6 @@
 	struct inode_switch_wbs_context *isw =
 		container_of(work, struct inode_switch_wbs_context, work);
 	struct inode *inode = isw->inode;
-	struct super_block *sb = inode->i_sb;
 	struct address_space *mapping = inode->i_mapping;
 	struct bdi_writeback *old_wb = inode->i_wb;
 	struct bdi_writeback *new_wb = isw->new_wb;
@@ -424,8 +426,9 @@
 	wb_put(new_wb);
 
 	iput(inode);
-	deactivate_super(sb);
 	kfree(isw);
+
+	atomic_dec(&isw_nr_in_flight);
 }
 
 static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -435,7 +438,7 @@
 
 	/* needs to grab bh-unsafe locks, bounce to work item */
 	INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
-	schedule_work(&isw->work);
+	queue_work(isw_wq, &isw->work);
 }
 
 /**
@@ -471,20 +474,20 @@
 
 	/* while holding I_WB_SWITCH, no one else can update the association */
 	spin_lock(&inode->i_lock);
-
-	if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
-	    inode_to_wb(inode) == isw->new_wb)
-		goto out_unlock;
-
-	if (!atomic_inc_not_zero(&inode->i_sb->s_active))
-		goto out_unlock;
-
+	if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
+	    inode->i_state & (I_WB_SWITCH | I_FREEING) ||
+	    inode_to_wb(inode) == isw->new_wb) {
+		spin_unlock(&inode->i_lock);
+		goto out_free;
+	}
 	inode->i_state |= I_WB_SWITCH;
 	spin_unlock(&inode->i_lock);
 
 	ihold(inode);
 	isw->inode = inode;
 
+	atomic_inc(&isw_nr_in_flight);
+
 	/*
 	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
 	 * the RCU protected stat update paths to grab the mapping's
@@ -494,8 +497,6 @@
 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
 	return;
 
-out_unlock:
-	spin_unlock(&inode->i_lock);
 out_free:
 	if (isw->new_wb)
 		wb_put(isw->new_wb);
@@ -847,6 +848,33 @@
 		wb_put(last_wb);
 }
 
+/**
+ * cgroup_writeback_umount - flush inode wb switches for umount
+ *
+ * This function is called when a super_block is about to be destroyed and
+ * flushes in-flight inode wb switches.  An inode wb switch goes through
+ * RCU and then workqueue, so the two need to be flushed in order to ensure
+ * that all previously scheduled switches are finished.  As wb switches are
+ * rare occurrences and synchronize_rcu() can take a while, perform
+ * flushing iff wb switches are in flight.
+ */
+void cgroup_writeback_umount(void)
+{
+	if (atomic_read(&isw_nr_in_flight)) {
+		synchronize_rcu();
+		flush_workqueue(isw_wq);
+	}
+}
+
+static int __init cgroup_writeback_init(void)
+{
+	isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
+	if (!isw_wq)
+		return -ENOMEM;
+	return 0;
+}
+fs_initcall(cgroup_writeback_init);
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static struct bdi_writeback *

diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 506765a..bb8d67e 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c

@@ -376,12 +376,11 @@
 	struct inode *inode = d_inode(dentry);
 	dnode_secno dno;
 	int r;
-	int rep = 0;
 	int err;
 
 	hpfs_lock(dir->i_sb);
 	hpfs_adjust_length(name, &len);
-again:
+
 	err = -ENOENT;
 	de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
 	if (!de)
@@ -401,33 +400,9 @@
 		hpfs_error(dir->i_sb, "there was error when removing dirent");
 		err = -EFSERROR;
 		break;
-	case 2:		/* no space for deleting, try to truncate file */
-
+	case 2:		/* no space for deleting */
 		err = -ENOSPC;
-		if (rep++)
-			break;
-
-		dentry_unhash(dentry);
-		if (!d_unhashed(dentry)) {
-			hpfs_unlock(dir->i_sb);
-			return -ENOSPC;
-		}
-		if (generic_permission(inode, MAY_WRITE) ||
-		    !S_ISREG(inode->i_mode) ||
-		    get_write_access(inode)) {
-			d_rehash(dentry);
-		} else {
-			struct iattr newattrs;
-			/*pr_info("truncating file before delete.\n");*/
-			newattrs.ia_size = 0;
-			newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-			err = notify_change(dentry, &newattrs, NULL);
-			put_write_access(inode);
-			if (!err)
-				goto again;
-		}
-		hpfs_unlock(dir->i_sb);
-		return -ENOSPC;
+		break;
 	default:
 		drop_nlink(inode);
 		err = 0;

diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking
index 3ea3655..8918ac9 100644
--- a/fs/jffs2/README.Locking
+++ b/fs/jffs2/README.Locking

@@ -2,10 +2,6 @@
 	JFFS2 LOCKING DOCUMENTATION
 	---------------------------
 
-At least theoretically, JFFS2 does not require the Big Kernel Lock
-(BKL), which was always helpfully obtained for it by Linux 2.4 VFS
-code. It has its own locking, as described below.
-
 This document attempts to describe the existing locking rules for
 JFFS2. It is not expected to remain perfectly up to date, but ought to
 be fairly close.
@@ -69,6 +65,7 @@
 	   any f->sem held.
 	2. Never attempt to lock two file mutexes in one thread.
 	   No ordering rules have been made for doing so.
+	3. Never lock a page cache page with f->sem held.
 
 
 	erase_completion_lock spinlock

diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 0ae91ad..b288c8a 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c

@@ -50,7 +50,8 @@
 
 
 static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
-				    struct jffs2_inode_cache *ic)
+				    struct jffs2_inode_cache *ic,
+				    int *dir_hardlinks)
 {
 	struct jffs2_full_dirent *fd;
 
@@ -69,19 +70,21 @@
 			dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
 				  fd->name, fd->ino, ic->ino);
 			jffs2_mark_node_obsolete(c, fd->raw);
+			/* Clear the ic/raw union so it doesn't cause problems later. */
+			fd->ic = NULL;
 			continue;
 		}
 
+		/* From this point, fd->raw is no longer used so we can set fd->ic */
+		fd->ic = child_ic;
+		child_ic->pino_nlink++;
+		/* If we appear (at this stage) to have hard-linked directories,
+		 * set a flag to trigger a scan later */
 		if (fd->type == DT_DIR) {
-			if (child_ic->pino_nlink) {
-				JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
-					    fd->name, fd->ino, ic->ino);
-				/* TODO: What do we do about it? */
-			} else {
-				child_ic->pino_nlink = ic->ino;
-			}
-		} else
-			child_ic->pino_nlink++;
+			child_ic->flags |= INO_FLAGS_IS_DIR;
+			if (child_ic->pino_nlink > 1)
+				*dir_hardlinks = 1;
+		}
 
 		dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
 		/* Can't free scan_dents so far. We might need them in pass 2 */
@@ -95,8 +98,7 @@
 */
 static int jffs2_build_filesystem(struct jffs2_sb_info *c)
 {
-	int ret;
-	int i;
+	int ret, i, dir_hardlinks = 0;
 	struct jffs2_inode_cache *ic;
 	struct jffs2_full_dirent *fd;
 	struct jffs2_full_dirent *dead_fds = NULL;
@@ -120,7 +122,7 @@
 	/* Now scan the directory tree, increasing nlink according to every dirent found. */
 	for_each_inode(i, c, ic) {
 		if (ic->scan_dents) {
-			jffs2_build_inode_pass1(c, ic);
+			jffs2_build_inode_pass1(c, ic, &dir_hardlinks);
 			cond_resched();
 		}
 	}
@@ -156,6 +158,20 @@
 	}
 
 	dbg_fsbuild("pass 2a complete\n");
+
+	if (dir_hardlinks) {
+		/* If we detected directory hardlinks earlier, *hopefully*
+		 * they are gone now because some of the links were from
+		 * dead directories which still had some old dirents lying
+		 * around and not yet garbage-collected, but which have
+		 * been discarded above. So clear the pino_nlink field
+		 * in each directory, so that the final scan below can
+		 * print appropriate warnings. */
+		for_each_inode(i, c, ic) {
+			if (ic->flags & INO_FLAGS_IS_DIR)
+				ic->pino_nlink = 0;
+		}
+	}
 	dbg_fsbuild("freeing temporary data structures\n");
 
 	/* Finally, we can scan again and free the dirent structs */
@@ -163,6 +179,33 @@
 		while(ic->scan_dents) {
 			fd = ic->scan_dents;
 			ic->scan_dents = fd->next;
+			/* We do use the pino_nlink field to count nlink of
+			 * directories during fs build, so set it to the
+			 * parent ino# now. Now that there's hopefully only
+			 * one. */
+			if (fd->type == DT_DIR) {
+				if (!fd->ic) {
+					/* We'll have complained about it and marked the coresponding
+					   raw node obsolete already. Just skip it. */
+					continue;
+				}
+
+				/* We *have* to have set this in jffs2_build_inode_pass1() */
+				BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR));
+
+				/* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks
+				 * is set. Otherwise, we know this should never trigger anyway, so
+				 * we don't do the check. And ic->pino_nlink still contains the nlink
+				 * value (which is 1). */
+				if (dir_hardlinks && fd->ic->pino_nlink) {
+					JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n",
+						    fd->name, fd->ino, ic->ino, fd->ic->pino_nlink);
+					/* Should we unlink it from its previous parent? */
+				}
+
+				/* For directories, ic->pino_nlink holds that parent inode # */
+				fd->ic->pino_nlink = ic->ino;
+			}
 			jffs2_free_full_dirent(fd);
 		}
 		ic->scan_dents = NULL;
@@ -241,11 +284,7 @@
 
 			/* Reduce nlink of the child. If it's now zero, stick it on the
 			   dead_fds list to be cleaned up later. Else just free the fd */
-
-			if (fd->type == DT_DIR)
-				child_ic->pino_nlink = 0;
-			else
-				child_ic->pino_nlink--;
+			child_ic->pino_nlink--;
 
 			if (!child_ic->pino_nlink) {
 				dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index d211b8e..30c4c9e 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c

@@ -843,9 +843,14 @@
 
 		pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n",
 			  __func__, ret);
-		/* Might as well let the VFS know */
-		d_instantiate(new_dentry, d_inode(old_dentry));
-		ihold(d_inode(old_dentry));
+		/*
+		 * We can't keep the target in dcache after that.
+		 * For one thing, we can't afford dentry aliases for directories.
+		 * For another, if there was a victim, we _can't_ set new inode
+		 * for that sucker and we have to trigger mount eviction - the
+		 * caller won't do it on its own since we are returning an error.
+		 */
+		d_invalidate(new_dentry);
 		new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
 		return ret;
 	}

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c5ac594..cad86ba 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c

@@ -137,39 +137,33 @@
 	struct page *pg;
 	struct inode *inode = mapping->host;
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
-	struct jffs2_raw_inode ri;
-	uint32_t alloc_len = 0;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	uint32_t pageofs = index << PAGE_CACHE_SHIFT;
 	int ret = 0;
 
+	pg = grab_cache_page_write_begin(mapping, index, flags);
+	if (!pg)
+		return -ENOMEM;
+	*pagep = pg;
+
 	jffs2_dbg(1, "%s()\n", __func__);
 
 	if (pageofs > inode->i_size) {
-		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
-					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
-		if (ret)
-			return ret;
-	}
-
-	mutex_lock(&f->sem);
-	pg = grab_cache_page_write_begin(mapping, index, flags);
-	if (!pg) {
-		if (alloc_len)
-			jffs2_complete_reservation(c);
-		mutex_unlock(&f->sem);
-		return -ENOMEM;
-	}
-	*pagep = pg;
-
-	if (alloc_len) {
 		/* Make new hole frag from old EOF to new page */
+		struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+		struct jffs2_raw_inode ri;
 		struct jffs2_full_dnode *fn;
+		uint32_t alloc_len;
 
 		jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
 			  (unsigned int)inode->i_size, pageofs);
 
+		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+		if (ret)
+			goto out_page;
+
+		mutex_lock(&f->sem);
 		memset(&ri, 0, sizeof(ri));
 
 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -196,6 +190,7 @@
 		if (IS_ERR(fn)) {
 			ret = PTR_ERR(fn);
 			jffs2_complete_reservation(c);
+			mutex_unlock(&f->sem);
 			goto out_page;
 		}
 		ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -210,10 +205,12 @@
 			jffs2_mark_node_obsolete(c, fn->raw);
 			jffs2_free_full_dnode(fn);
 			jffs2_complete_reservation(c);
+			mutex_unlock(&f->sem);
 			goto out_page;
 		}
 		jffs2_complete_reservation(c);
 		inode->i_size = pageofs;
+		mutex_unlock(&f->sem);
 	}
 
 	/*
@@ -222,18 +219,18 @@
 	 * case of a short-copy.
 	 */
 	if (!PageUptodate(pg)) {
+		mutex_lock(&f->sem);
 		ret = jffs2_do_readpage_nolock(inode, pg);
+		mutex_unlock(&f->sem);
 		if (ret)
 			goto out_page;
 	}
-	mutex_unlock(&f->sem);
 	jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
 	return ret;
 
 out_page:
 	unlock_page(pg);
 	page_cache_release(pg);
-	mutex_unlock(&f->sem);
 	return ret;
 }
 

diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 5a2dec2..95d5880 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c

@@ -1296,14 +1296,17 @@
 		BUG_ON(start > orig_start);
 	}
 
-	/* First, use readpage() to read the appropriate page into the page cache */
-	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
-	 *    triggered garbage collection in the first place?
-	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
-	 *    page OK. We'll actually write it out again in commit_write, which is a little
-	 *    suboptimal, but at least we're correct.
-	 */
+	/* The rules state that we must obtain the page lock *before* f->sem, so
+	 * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
+	 * actually going to *change* so we're safe; we only allow reading.
+	 *
+	 * It is important to note that jffs2_write_begin() will ensure that its
+	 * page is marked Uptodate before allocating space. That means that if we
+	 * end up here trying to GC the *same* page that jffs2_write_begin() is
+	 * trying to write out, read_cache_page() will not deadlock. */
+	mutex_unlock(&f->sem);
 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
+	mutex_lock(&f->sem);
 
 	if (IS_ERR(pg_ptr)) {
 		pr_warn("read_cache_page() returned error: %ld\n",

diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index fa35ff7..0637271 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h

@@ -194,6 +194,7 @@
 #define INO_STATE_CLEARING	6	/* In clear_inode() */
 
 #define INO_FLAGS_XATTR_CHECKED	0x01	/* has no duplicate xattr_ref */
+#define INO_FLAGS_IS_DIR	0x02	/* is a directory */
 
 #define RAWNODE_CLASS_INODE_CACHE	0
 #define RAWNODE_CLASS_XATTR_DATUM	1
@@ -249,7 +250,10 @@
 
 struct jffs2_full_dirent
 {
-	struct jffs2_raw_node_ref *raw;
+	union {
+		struct jffs2_raw_node_ref *raw;
+		struct jffs2_inode_cache *ic; /* Just during part of build */
+	};
 	struct jffs2_full_dirent *next;
 	uint32_t version;
 	uint32_t ino; /* == zero for unlink */

diff --git a/fs/namei.c b/fs/namei.c
index f624d13..9c590e0 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -1712,6 +1712,11 @@
 		return 0;
 	if (!follow)
 		return 0;
+	/* make sure that d_is_symlink above matches inode */
+	if (nd->flags & LOOKUP_RCU) {
+		if (read_seqcount_retry(&link->dentry->d_seq, seq))
+			return -ECHILD;
+	}
 	return pick_link(nd, link, inode, seq);
 }
 
@@ -1743,11 +1748,11 @@
 		if (err < 0)
 			return err;
 
-		inode = d_backing_inode(path.dentry);
 		seq = 0;	/* we are already out of RCU mode */
 		err = -ENOENT;
 		if (d_is_negative(path.dentry))
 			goto out_path_put;
+		inode = d_backing_inode(path.dentry);
 	}
 
 	if (flags & WALK_PUT)
@@ -3192,12 +3197,12 @@
 		return error;
 
 	BUG_ON(nd->flags & LOOKUP_RCU);
-	inode = d_backing_inode(path.dentry);
 	seq = 0;	/* out of RCU mode, so the value doesn't matter */
 	if (unlikely(d_is_negative(path.dentry))) {
 		path_to_nameidata(&path, nd);
 		return -ENOENT;
 	}
+	inode = d_backing_inode(path.dentry);
 finish_lookup:
 	if (nd->depth)
 		put_link(nd);
@@ -3206,11 +3211,6 @@
 	if (unlikely(error))
 		return error;
 
-	if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
-		path_to_nameidata(&path, nd);
-		return -ELOOP;
-	}
-
 	if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
 		path_to_nameidata(&path, nd);
 	} else {
@@ -3229,6 +3229,10 @@
 		return error;
 	}
 	audit_inode(nd->name, nd->path.dentry, 0);
+	if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
+		error = -ELOOP;
+		goto out;
+	}
 	error = -EISDIR;
 	if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
 		goto out;
@@ -3273,6 +3277,10 @@
 			goto exit_fput;
 	}
 out:
+	if (unlikely(error > 0)) {
+		WARN_ON(1);
+		error = -EINVAL;
+	}
 	if (got_write)
 		mnt_drop_write(nd->path.mnt);
 	path_put(&save_parent);

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 26c2de2..b7f8eae 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c

@@ -633,7 +633,7 @@
 				d_rehash(newdent);
 		} else {
 			spin_lock(&dentry->d_lock);
-			NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE;
+			NCP_FINFO(dir)->flags &= ~NCPI_DIR_CACHE;
 			spin_unlock(&dentry->d_lock);
 		}
 	} else {

diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index c59a59c..35ab51c 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c

@@ -476,6 +476,7 @@
 
 		for (i = 0; i < nr_pages; i++)
 			put_page(arg->layoutupdate_pages[i]);
+		vfree(arg->start_p);
 		kfree(arg->layoutupdate_pages);
 	} else {
 		put_page(arg->layoutupdate_page);
@@ -559,10 +560,15 @@
 
 	if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
 		void *p = start_p, *end = p + arg->layoutupdate_len;
+		struct page *page = NULL;
 		int i = 0;
 
-		for ( ; p < end; p += PAGE_SIZE)
-			arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
+		arg->start_p = start_p;
+		for ( ; p < end; p += PAGE_SIZE) {
+			page = vmalloc_to_page(p);
+			arg->layoutupdate_pages[i++] = page;
+			get_page(page);
+		}
 	}
 
 	dprintk("%s found %zu ranges\n", __func__, count);

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index bd25dc7..dff8346 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c

@@ -16,29 +16,8 @@
 
 #define NFSDBG_FACILITY NFSDBG_PROC
 
-static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
-				fmode_t fmode)
-{
-	struct nfs_open_context *open;
-	struct nfs_lock_context *lock;
-	int ret;
-
-	open = get_nfs_open_context(nfs_file_open_context(file));
-	lock = nfs_get_lock_context(open);
-	if (IS_ERR(lock)) {
-		put_nfs_open_context(open);
-		return PTR_ERR(lock);
-	}
-
-	ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
-
-	nfs_put_lock_context(lock);
-	put_nfs_open_context(open);
-	return ret;
-}
-
 static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
-				 loff_t offset, loff_t len)
+		struct nfs_lock_context *lock, loff_t offset, loff_t len)
 {
 	struct inode *inode = file_inode(filep);
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -56,7 +35,8 @@
 	msg->rpc_argp = &args;
 	msg->rpc_resp = &res;
 
-	status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+	status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
+			lock, FMODE_WRITE);
 	if (status)
 		return status;
 
@@ -78,15 +58,26 @@
 {
 	struct nfs_server *server = NFS_SERVER(file_inode(filep));
 	struct nfs4_exception exception = { };
+	struct nfs_lock_context *lock;
 	int err;
 
+	lock = nfs_get_lock_context(nfs_file_open_context(filep));
+	if (IS_ERR(lock))
+		return PTR_ERR(lock);
+
+	exception.inode = file_inode(filep);
+	exception.state = lock->open_context->state;
+
 	do {
-		err = _nfs42_proc_fallocate(msg, filep, offset, len);
-		if (err == -ENOTSUPP)
-			return -EOPNOTSUPP;
+		err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
+		if (err == -ENOTSUPP) {
+			err = -EOPNOTSUPP;
+			break;
+		}
 		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 
+	nfs_put_lock_context(lock);
 	return err;
 }
 
@@ -135,7 +126,8 @@
 	return err;
 }
 
-static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+static loff_t _nfs42_proc_llseek(struct file *filep,
+		struct nfs_lock_context *lock, loff_t offset, int whence)
 {
 	struct inode *inode = file_inode(filep);
 	struct nfs42_seek_args args = {
@@ -156,7 +148,8 @@
 	if (!nfs_server_capable(inode, NFS_CAP_SEEK))
 		return -ENOTSUPP;
 
-	status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
+	status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
+			lock, FMODE_READ);
 	if (status)
 		return status;
 
@@ -175,17 +168,28 @@
 {
 	struct nfs_server *server = NFS_SERVER(file_inode(filep));
 	struct nfs4_exception exception = { };
+	struct nfs_lock_context *lock;
 	loff_t err;
 
+	lock = nfs_get_lock_context(nfs_file_open_context(filep));
+	if (IS_ERR(lock))
+		return PTR_ERR(lock);
+
+	exception.inode = file_inode(filep);
+	exception.state = lock->open_context->state;
+
 	do {
-		err = _nfs42_proc_llseek(filep, offset, whence);
+		err = _nfs42_proc_llseek(filep, lock, offset, whence);
 		if (err >= 0)
 			break;
-		if (err == -ENOTSUPP)
-			return -EOPNOTSUPP;
+		if (err == -ENOTSUPP) {
+			err = -EOPNOTSUPP;
+			break;
+		}
 		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 
+	nfs_put_lock_context(lock);
 	return err;
 }
 
@@ -298,8 +302,9 @@
 }
 
 static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
-			     struct file *dst_f, loff_t src_offset,
-			     loff_t dst_offset, loff_t count)
+		struct file *dst_f, struct nfs_lock_context *src_lock,
+		struct nfs_lock_context *dst_lock, loff_t src_offset,
+		loff_t dst_offset, loff_t count)
 {
 	struct inode *src_inode = file_inode(src_f);
 	struct inode *dst_inode = file_inode(dst_f);
@@ -320,11 +325,13 @@
 	msg->rpc_argp = &args;
 	msg->rpc_resp = &res;
 
-	status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
+	status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+			src_lock, FMODE_READ);
 	if (status)
 		return status;
 
-	status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
+	status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+			dst_lock, FMODE_WRITE);
 	if (status)
 		return status;
 
@@ -349,22 +356,48 @@
 	};
 	struct inode *inode = file_inode(src_f);
 	struct nfs_server *server = NFS_SERVER(file_inode(src_f));
-	struct nfs4_exception exception = { };
-	int err;
+	struct nfs_lock_context *src_lock;
+	struct nfs_lock_context *dst_lock;
+	struct nfs4_exception src_exception = { };
+	struct nfs4_exception dst_exception = { };
+	int err, err2;
 
 	if (!nfs_server_capable(inode, NFS_CAP_CLONE))
 		return -EOPNOTSUPP;
 
+	src_lock = nfs_get_lock_context(nfs_file_open_context(src_f));
+	if (IS_ERR(src_lock))
+		return PTR_ERR(src_lock);
+
+	src_exception.inode = file_inode(src_f);
+	src_exception.state = src_lock->open_context->state;
+
+	dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f));
+	if (IS_ERR(dst_lock)) {
+		err = PTR_ERR(dst_lock);
+		goto out_put_src_lock;
+	}
+
+	dst_exception.inode = file_inode(dst_f);
+	dst_exception.state = dst_lock->open_context->state;
+
 	do {
-		err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
-					dst_offset, count);
+		err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock,
+					src_offset, dst_offset, count);
 		if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
-			return -EOPNOTSUPP;
+			err = -EOPNOTSUPP;
+			break;
 		}
-		err = nfs4_handle_exception(server, err, &exception);
-	} while (exception.retry);
 
+		err2 = nfs4_handle_exception(server, err, &src_exception);
+		err = nfs4_handle_exception(server, err, &dst_exception);
+		if (!err)
+			err = err2;
+	} while (src_exception.retry || dst_exception.retry);
+
+	nfs_put_lock_context(dst_lock);
+out_put_src_lock:
+	nfs_put_lock_context(src_lock);
 	return err;
-
 }

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bfc33a..1488159 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c

@@ -2466,9 +2466,9 @@
 		dentry = d_add_unique(dentry, igrab(state->inode));
 		if (dentry == NULL) {
 			dentry = opendata->dentry;
-		} else if (dentry != ctx->dentry) {
+		} else {
 			dput(ctx->dentry);
-			ctx->dentry = dget(dentry);
+			ctx->dentry = dentry;
 		}
 		nfs_set_verifier(dentry,
 				nfs_save_change_attribute(d_inode(opendata->dir)));

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 482b6e9..2fa483e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c

@@ -252,6 +252,27 @@
 	}
 }
 
+/*
+ * Mark a pnfs_layout_hdr and all associated layout segments as invalid
+ *
+ * In order to continue using the pnfs_layout_hdr, a full recovery
+ * is required.
+ * Note that caller must hold inode->i_lock.
+ */
+static int
+pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+		struct list_head *lseg_list)
+{
+	struct pnfs_layout_range range = {
+		.iomode = IOMODE_ANY,
+		.offset = 0,
+		.length = NFS4_MAX_UINT64,
+	};
+
+	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+	return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range);
+}
+
 static int
 pnfs_iomode_to_fail_bit(u32 iomode)
 {
@@ -554,9 +575,8 @@
 	spin_lock(&nfsi->vfs_inode.i_lock);
 	lo = nfsi->layout;
 	if (lo) {
-		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
-		pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 		pnfs_get_layout_hdr(lo);
+		pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
 		spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -617,11 +637,6 @@
 {
 	struct pnfs_layout_hdr *lo;
 	struct inode *inode;
-	struct pnfs_layout_range range = {
-		.iomode = IOMODE_ANY,
-		.offset = 0,
-		.length = NFS4_MAX_UINT64,
-	};
 	LIST_HEAD(lseg_list);
 	int ret = 0;
 
@@ -636,11 +651,11 @@
 
 		spin_lock(&inode->i_lock);
 		list_del_init(&lo->plh_bulk_destroy);
-		lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
-		if (is_bulk_recall)
-			set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
-		if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
+		if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+			if (is_bulk_recall)
+				set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
 			ret = -EAGAIN;
+		}
 		spin_unlock(&inode->i_lock);
 		pnfs_free_lseg_list(&lseg_list);
 		/* Free all lsegs that are attached to commit buckets */
@@ -1738,8 +1753,19 @@
 	if (lo->plh_return_iomode != 0)
 		iomode = IOMODE_ANY;
 	lo->plh_return_iomode = iomode;
+	set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
 }
 
+/**
+ * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
+ * @lo: pointer to layout header
+ * @tmp_list: list header to be used with pnfs_free_lseg_list()
+ * @return_range: describe layout segment ranges to be returned
+ *
+ * This function is mainly intended for use by layoutrecall. It attempts
+ * to free the layout segment immediately, or else to mark it for return
+ * as soon as its reference count drops to zero.
+ */
 int
 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
 				struct list_head *tmp_list,
@@ -1762,12 +1788,11 @@
 				lseg, lseg->pls_range.iomode,
 				lseg->pls_range.offset,
 				lseg->pls_range.length);
+			if (mark_lseg_invalid(lseg, tmp_list))
+				continue;
+			remaining++;
 			set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
 			pnfs_set_plh_return_iomode(lo, return_range->iomode);
-			if (!mark_lseg_invalid(lseg, tmp_list))
-				remaining++;
-			set_bit(NFS_LAYOUT_RETURN_REQUESTED,
-					&lo->plh_flags);
 		}
 	return remaining;
 }

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 794fd15..cda0361 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c

@@ -956,6 +956,7 @@
 		tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
 				update_isize, end);
 		if (tmp_ret < 0) {
+			ocfs2_inode_unlock(inode, 1);
 			ret = tmp_ret;
 			mlog_errno(ret);
 			brelse(di_bh);

diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 9581d190..77ebc2b 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c

@@ -147,6 +147,10 @@
 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
 	if (ret < 0) {
 		mlog_errno(ret);
+		if (ret == -ENOMEM)
+			ret = VM_FAULT_OOM;
+		else
+			ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
 

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ed95272..52f6de5 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c

@@ -618,7 +618,8 @@
 	 * sole user of this dentry.  Too tricky...  Just unhash for
 	 * now.
 	 */
-	d_drop(dentry);
+	if (!err)
+		d_drop(dentry);
 	inode_unlock(dir);
 
 	return err;
@@ -903,6 +904,13 @@
 	if (!overwrite && new_is_dir && !old_opaque && new_opaque)
 		ovl_remove_opaque(newdentry);
 
+	/*
+	 * Old dentry now lives in different location. Dentries in
+	 * lowerstack are stale. We cannot drop them here because
+	 * access to them is lockless. This could be only pure upper
+	 * or opaque directory - numlower is zero. Or upper non-dir
+	 * entry - its pureness is tracked by flag opaque.
+	 */
 	if (old_opaque != new_opaque) {
 		ovl_dentry_set_opaque(old, new_opaque);
 		if (!overwrite)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 49e2045..a4ff5d0 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c

@@ -65,6 +65,8 @@
 
 		inode_lock(upperdentry->d_inode);
 		err = notify_change(upperdentry, attr, NULL);
+		if (!err)
+			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
 		inode_unlock(upperdentry->d_inode);
 	}
 	ovl_drop_write(dentry);

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 8d826bd..619ad4b 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c

@@ -76,12 +76,14 @@
 	if (oe->__upperdentry) {
 		type = __OVL_PATH_UPPER;
 
-		if (oe->numlower) {
-			if (S_ISDIR(dentry->d_inode->i_mode))
-				type |= __OVL_PATH_MERGE;
-		} else if (!oe->opaque) {
+		/*
+		 * Non-dir dentry can hold lower dentry from previous
+		 * location. Its purity depends only on opaque flag.
+		 */
+		if (oe->numlower && S_ISDIR(dentry->d_inode->i_mode))
+			type |= __OVL_PATH_MERGE;
+		else if (!oe->opaque)
 			type |= __OVL_PATH_PURE;
-		}
 	} else {
 		if (oe->numlower > 1)
 			type |= __OVL_PATH_MERGE;
@@ -341,6 +343,7 @@
 
 static const struct dentry_operations ovl_reval_dentry_operations = {
 	.d_release = ovl_dentry_release,
+	.d_select_inode = ovl_d_select_inode,
 	.d_revalidate = ovl_dentry_revalidate,
 	.d_weak_revalidate = ovl_dentry_weak_revalidate,
 };

diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e..c524fdd 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c

@@ -202,6 +202,11 @@
 static struct mountpoint *mp;
 static struct hlist_head *list;
 
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
+
 static int propagate_one(struct mount *m)
 {
 	struct mount *child;
@@ -212,7 +217,7 @@
 	/* skip if mountpoint isn't covered by it */
 	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
 		return 0;
-	if (m->mnt_group_id == last_dest->mnt_group_id) {
+	if (peers(m, last_dest)) {
 		type = CL_MAKE_SHARED;
 	} else {
 		struct mount *n, *p;
@@ -223,7 +228,7 @@
 					last_source = last_source->mnt_master;
 					last_dest = last_source->mnt_parent;
 				}
-				if (n->mnt_group_id != last_dest->mnt_group_id) {
+				if (!peers(n, last_dest)) {
 					last_source = last_source->mnt_master;
 					last_dest = last_source->mnt_parent;
 				}

diff --git a/fs/read_write.c b/fs/read_write.c
index 324ec27..dadf24e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c

@@ -17,6 +17,7 @@
 #include <linux/splice.h>
 #include <linux/compat.h>
 #include <linux/mount.h>
+#include <linux/fs.h>
 #include "internal.h"
 
 #include <asm/uaccess.h>
@@ -183,7 +184,7 @@
 	switch (whence) {
 	case SEEK_SET: case SEEK_CUR:
 		return generic_file_llseek_size(file, offset, whence,
-						~0ULL, 0);
+						OFFSET_MAX, 0);
 	default:
 		return -EINVAL;
 	}
@@ -1532,10 +1533,12 @@
 
 	if (!(file_in->f_mode & FMODE_READ) ||
 	    !(file_out->f_mode & FMODE_WRITE) ||
-	    (file_out->f_flags & O_APPEND) ||
-	    !file_in->f_op->clone_file_range)
+	    (file_out->f_flags & O_APPEND))
 		return -EBADF;
 
+	if (!file_in->f_op->clone_file_range)
+		return -EOPNOTSUPP;
+
 	ret = clone_verify_area(file_in, pos_in, len, false);
 	if (ret)
 		return ret;

diff --git a/fs/super.c b/fs/super.c
index 1182af8..74914b1 100644
--- a/fs/super.c
+++ b/fs/super.c

@@ -415,6 +415,7 @@
 		sb->s_flags &= ~MS_ACTIVE;
 
 		fsnotify_unmount_inodes(sb);
+		cgroup_writeback_umount();
 
 		evict_inodes(sb);
 

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5031170..66cdb44 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c

@@ -287,6 +287,12 @@
 		goto out;
 
 	/*
+	 * We don't do userfault handling for the final child pid update.
+	 */
+	if (current->flags & PF_EXITING)
+		goto out;
+
+	/*
 	 * Check that we can return VM_FAULT_RETRY.
 	 *
 	 * NOTE: it should become possible to return VM_FAULT_RETRY

diff --git a/fs/xattr.c b/fs/xattr.c
index 07d0e47..4861322 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c

@@ -940,7 +940,7 @@
 	bool trusted = capable(CAP_SYS_ADMIN);
 	struct simple_xattr *xattr;
 	ssize_t remaining_size = size;
-	int err;
+	int err = 0;
 
 #ifdef CONFIG_FS_POSIX_ACL
 	if (inode->i_acl) {
@@ -965,11 +965,11 @@
 
 		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
 		if (err)
-			return err;
+			break;
 	}
 	spin_unlock(&xattrs->lock);
 
-	return size - remaining_size;
+	return err ? err : size - remaining_size;
 }
 
 /*

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089..a9ebabfe 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c

@@ -55,7 +55,7 @@
 	} while ((bh = bh->b_this_page) != head);
 }
 
-STATIC struct block_device *
+struct block_device *
 xfs_find_bdev_for_inode(
 	struct inode		*inode)
 {
@@ -1208,6 +1208,10 @@
 	struct writeback_control *wbc)
 {
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+	if (dax_mapping(mapping))
+		return dax_writeback_mapping_range(mapping,
+				xfs_find_bdev_for_inode(mapping->host), wbc);
+
 	return generic_writepages(mapping, wbc);
 }
 

diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f6ffc9a..a4343c6 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h

@@ -62,5 +62,6 @@
 			         struct buffer_head *map_bh, int create);
 
 extern void xfs_count_page_state(struct page *, int *, int *);
+extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
 
 #endif /* __XFS_AOPS_H__ */

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 45ec9e4..6c87601 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c

@@ -75,7 +75,8 @@
 	ssize_t		size = XFS_FSB_TO_B(mp, count_fsb);
 
 	if (IS_DAX(VFS_I(ip)))
-		return dax_clear_blocks(VFS_I(ip), block, size);
+		return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
+				sector, size);
 
 	/*
 	 * let the block layer decide on the fastest method of

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 594f7e6..be55688 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c

@@ -1109,27 +1109,10 @@
 	bool			tmp_wrapped;
 
 	/*
-	 * Search backwards through the log looking for the log record header
-	 * block. This wraps all the way back around to the head so something is
-	 * seriously wrong if we can't find it.
-	 */
-	found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
-				      rhead, wrapped);
-	if (found < 0)
-		return found;
-	if (!found) {
-		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
-		return -EIO;
-	}
-
-	*tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
-
-	/*
-	 * Now that we have a tail block, check the head of the log for torn
-	 * writes. Search again until we hit the tail or the maximum number of
-	 * log record I/Os that could have been in flight at one time. Use a
-	 * temporary buffer so we don't trash the rhead/bp pointer from the
-	 * call above.
+	 * Check the head of the log for torn writes. Search backwards from the
+	 * head until we hit the tail or the maximum number of log record I/Os
+	 * that could have been in flight at one time. Use a temporary buffer so
+	 * we don't trash the rhead/bp pointers from the caller.
 	 */
 	tmp_bp = xlog_get_bp(log, 1);
 	if (!tmp_bp)
@@ -1216,6 +1199,115 @@
 }
 
 /*
+ * Check whether the head of the log points to an unmount record. In other
+ * words, determine whether the log is clean. If so, update the in-core state
+ * appropriately.
+ */
+static int
+xlog_check_unmount_rec(
+	struct xlog		*log,
+	xfs_daddr_t		*head_blk,
+	xfs_daddr_t		*tail_blk,
+	struct xlog_rec_header	*rhead,
+	xfs_daddr_t		rhead_blk,
+	struct xfs_buf		*bp,
+	bool			*clean)
+{
+	struct xlog_op_header	*op_head;
+	xfs_daddr_t		umount_data_blk;
+	xfs_daddr_t		after_umount_blk;
+	int			hblks;
+	int			error;
+	char			*offset;
+
+	*clean = false;
+
+	/*
+	 * Look for unmount record. If we find it, then we know there was a
+	 * clean unmount. Since 'i' could be the last block in the physical
+	 * log, we convert to a log block before comparing to the head_blk.
+	 *
+	 * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+	 * below. We won't want to clear the unmount record if there is one, so
+	 * we pass the lsn of the unmount record rather than the block after it.
+	 */
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+		int	h_size = be32_to_cpu(rhead->h_size);
+		int	h_version = be32_to_cpu(rhead->h_version);
+
+		if ((h_version & XLOG_VERSION_2) &&
+		    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+			hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+			if (h_size % XLOG_HEADER_CYCLE_SIZE)
+				hblks++;
+		} else {
+			hblks = 1;
+		}
+	} else {
+		hblks = 1;
+	}
+	after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+	after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
+	if (*head_blk == after_umount_blk &&
+	    be32_to_cpu(rhead->h_num_logops) == 1) {
+		umount_data_blk = rhead_blk + hblks;
+		umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
+		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+		if (error)
+			return error;
+
+		op_head = (struct xlog_op_header *)offset;
+		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+			/*
+			 * Set tail and last sync so that newly written log
+			 * records will point recovery to after the current
+			 * unmount record.
+			 */
+			xlog_assign_atomic_lsn(&log->l_tail_lsn,
+					log->l_curr_cycle, after_umount_blk);
+			xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+					log->l_curr_cycle, after_umount_blk);
+			*tail_blk = after_umount_blk;
+
+			*clean = true;
+		}
+	}
+
+	return 0;
+}
+
+static void
+xlog_set_state(
+	struct xlog		*log,
+	xfs_daddr_t		head_blk,
+	struct xlog_rec_header	*rhead,
+	xfs_daddr_t		rhead_blk,
+	bool			bump_cycle)
+{
+	/*
+	 * Reset log values according to the state of the log when we
+	 * crashed.  In the case where head_blk == 0, we bump curr_cycle
+	 * one because the next write starts a new cycle rather than
+	 * continuing the cycle of the last good log record.  At this
+	 * point we have guaranteed that all partial log records have been
+	 * accounted for.  Therefore, we know that the last good log record
+	 * written was complete and ended exactly on the end boundary
+	 * of the physical log.
+	 */
+	log->l_prev_block = rhead_blk;
+	log->l_curr_block = (int)head_blk;
+	log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+	if (bump_cycle)
+		log->l_curr_cycle++;
+	atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+	atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+	xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+					BBTOB(log->l_curr_block));
+	xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+					BBTOB(log->l_curr_block));
+}
+
+/*
  * Find the sync block number or the tail of the log.
  *
  * This will be the block number of the last record to have its
@@ -1238,22 +1330,20 @@
 	xfs_daddr_t		*tail_blk)
 {
 	xlog_rec_header_t	*rhead;
-	xlog_op_header_t	*op_head;
 	char			*offset = NULL;
 	xfs_buf_t		*bp;
 	int			error;
-	xfs_daddr_t		umount_data_blk;
-	xfs_daddr_t		after_umount_blk;
 	xfs_daddr_t		rhead_blk;
 	xfs_lsn_t		tail_lsn;
-	int			hblks;
 	bool			wrapped = false;
+	bool			clean = false;
 
 	/*
 	 * Find previous log record
 	 */
 	if ((error = xlog_find_head(log, head_blk)))
 		return error;
+	ASSERT(*head_blk < INT_MAX);
 
 	bp = xlog_get_bp(log, 1);
 	if (!bp)
@@ -1271,100 +1361,75 @@
 	}
 
 	/*
-	 * Trim the head block back to skip over torn records. We can have
-	 * multiple log I/Os in flight at any time, so we assume CRC failures
-	 * back through the previous several records are torn writes and skip
-	 * them.
+	 * Search backwards through the log looking for the log record header
+	 * block. This wraps all the way back around to the head so something is
+	 * seriously wrong if we can't find it.
 	 */
-	ASSERT(*head_blk < INT_MAX);
-	error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
-				 &rhead, &wrapped);
+	error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+				      &rhead_blk, &rhead, &wrapped);
+	if (error < 0)
+		return error;
+	if (!error) {
+		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+		return -EIO;
+	}
+	*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
+
+	/*
+	 * Set the log state based on the current head record.
+	 */
+	xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
+	tail_lsn = atomic64_read(&log->l_tail_lsn);
+
+	/*
+	 * Look for an unmount record at the head of the log. This sets the log
+	 * state to determine whether recovery is necessary.
+	 */
+	error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+				       rhead_blk, bp, &clean);
 	if (error)
 		goto done;
 
 	/*
-	 * Reset log values according to the state of the log when we
-	 * crashed.  In the case where head_blk == 0, we bump curr_cycle
-	 * one because the next write starts a new cycle rather than
-	 * continuing the cycle of the last good log record.  At this
-	 * point we have guaranteed that all partial log records have been
-	 * accounted for.  Therefore, we know that the last good log record
-	 * written was complete and ended exactly on the end boundary
-	 * of the physical log.
-	 */
-	log->l_prev_block = rhead_blk;
-	log->l_curr_block = (int)*head_blk;
-	log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-	if (wrapped)
-		log->l_curr_cycle++;
-	atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-	atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-	xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
-					BBTOB(log->l_curr_block));
-	xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
-					BBTOB(log->l_curr_block));
-
-	/*
-	 * Look for unmount record.  If we find it, then we know there
-	 * was a clean unmount.  Since 'i' could be the last block in
-	 * the physical log, we convert to a log block before comparing
-	 * to the head_blk.
+	 * Verify the log head if the log is not clean (e.g., we have anything
+	 * but an unmount record at the head). This uses CRC verification to
+	 * detect and trim torn writes. If discovered, CRC failures are
+	 * considered torn writes and the log head is trimmed accordingly.
 	 *
-	 * Save the current tail lsn to use to pass to
-	 * xlog_clear_stale_blocks() below.  We won't want to clear the
-	 * unmount record if there is one, so we pass the lsn of the
-	 * unmount record rather than the block after it.
+	 * Note that we can only run CRC verification when the log is dirty
+	 * because there's no guarantee that the log data behind an unmount
+	 * record is compatible with the current architecture.
 	 */
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-		int	h_size = be32_to_cpu(rhead->h_size);
-		int	h_version = be32_to_cpu(rhead->h_version);
+	if (!clean) {
+		xfs_daddr_t	orig_head = *head_blk;
 
-		if ((h_version & XLOG_VERSION_2) &&
-		    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
-			hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-			if (h_size % XLOG_HEADER_CYCLE_SIZE)
-				hblks++;
-		} else {
-			hblks = 1;
-		}
-	} else {
-		hblks = 1;
-	}
-	after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
-	after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
-	tail_lsn = atomic64_read(&log->l_tail_lsn);
-	if (*head_blk == after_umount_blk &&
-	    be32_to_cpu(rhead->h_num_logops) == 1) {
-		umount_data_blk = rhead_blk + hblks;
-		umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
-		error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+		error = xlog_verify_head(log, head_blk, tail_blk, bp,
+					 &rhead_blk, &rhead, &wrapped);
 		if (error)
 			goto done;
 
-		op_head = (xlog_op_header_t *)offset;
-		if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
-			/*
-			 * Set tail and last sync so that newly written
-			 * log records will point recovery to after the
-			 * current unmount record.
-			 */
-			xlog_assign_atomic_lsn(&log->l_tail_lsn,
-					log->l_curr_cycle, after_umount_blk);
-			xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
-					log->l_curr_cycle, after_umount_blk);
-			*tail_blk = after_umount_blk;
-
-			/*
-			 * Note that the unmount was clean. If the unmount
-			 * was not clean, we need to know this to rebuild the
-			 * superblock counters from the perag headers if we
-			 * have a filesystem using non-persistent counters.
-			 */
-			log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+		/* update in-core state again if the head changed */
+		if (*head_blk != orig_head) {
+			xlog_set_state(log, *head_blk, rhead, rhead_blk,
+				       wrapped);
+			tail_lsn = atomic64_read(&log->l_tail_lsn);
+			error = xlog_check_unmount_rec(log, head_blk, tail_blk,
+						       rhead, rhead_blk, bp,
+						       &clean);
+			if (error)
+				goto done;
 		}
 	}
 
 	/*
+	 * Note that the unmount was clean. If the unmount was not clean, we
+	 * need to know this to rebuild the superblock counters from the perag
+	 * headers if we have a filesystem using non-persistent counters.
+	 */
+	if (clean)
+		log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+
+	/*
 	 * Make sure that there are no blocks in front of the head
 	 * with the same cycle number as the head.  This can happen
 	 * because we allow multiple outstanding log writes concurrently,

diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 39e1cb2..35a52a8 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h

@@ -120,11 +120,6 @@
 #endif
 
 /*
- * Initializier
- */
-#define	__ARCH_SPIN_LOCK_UNLOCKED	{ ATOMIC_INIT(0) }
-
-/*
  * Remapping spinlock architecture specific functions to the corresponding
  * queued spinlock functions.
  */

diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
index 85f888e..034acd0 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h

@@ -33,6 +33,11 @@
 } arch_spinlock_t;
 
 /*
+ * Initializier
+ */
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ ATOMIC_INIT(0) }
+
+/*
  * Bitfields in the atomic value:
  *
  * When NR_CPUS < 16K

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c4bd0e2..772c784 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h

@@ -256,6 +256,7 @@
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
+		*(.data..ro_after_init)	/* Read only after init */	\
 		*(__vermagic)		/* Kernel version magic */	\
 		. = ALIGN(8);						\
 		VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .;		\

diff --git a/include/linux/ata.h b/include/linux/ata.h
index d2992bf..c1a2f34 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h

@@ -487,8 +487,8 @@
 };
 
 enum ata_ioctls {
-	ATA_IOC_GET_IO32	= 0x309,
-	ATA_IOC_SET_IO32	= 0x324,
+	ATA_IOC_GET_IO32	= 0x309, /* HDIO_GET_32BIT */
+	ATA_IOC_SET_IO32	= 0x324, /* HDIO_SET_32BIT */
 };
 
 /* core structures */

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 301de78..6c502cb 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h

@@ -548,6 +548,27 @@
 }
 #endif
 
+/**
+ * fetch_or - perform *ptr |= mask and return old value of *ptr
+ * @ptr: pointer to value
+ * @mask: mask to OR on the value
+ *
+ * cmpxchg based fetch_or, macro so it works for different integer types
+ */
+#ifndef fetch_or
+#define fetch_or(ptr, mask)						\
+({	typeof(*(ptr)) __old, __val = *(ptr);				\
+	for (;;) {							\
+		__old = cmpxchg((ptr), __val, __val | (mask));		\
+		if (__old == __val)					\
+			break;						\
+		__val = __old;						\
+	}								\
+	__old;								\
+})
+#endif
+
+
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5349e68..88bc64f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h

@@ -310,6 +310,38 @@
 	bio->bi_flags &= ~(1U << bit);
 }
 
+static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
+{
+	*bv = bio_iovec(bio);
+}
+
+static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
+{
+	struct bvec_iter iter = bio->bi_iter;
+	int idx;
+
+	if (unlikely(!bio_multiple_segments(bio))) {
+		*bv = bio_iovec(bio);
+		return;
+	}
+
+	bio_advance_iter(bio, &iter, iter.bi_size);
+
+	if (!iter.bi_bvec_done)
+		idx = iter.bi_idx - 1;
+	else	/* in the middle of bvec */
+		idx = iter.bi_idx;
+
+	*bv = bio->bi_io_vec[idx];
+
+	/*
+	 * iter.bi_bvec_done records actual length of the last bvec
+	 * if this bio ends in the middle of one io vector
+	 */
+	if (iter.bi_bvec_done)
+		bv->bv_len = iter.bi_bvec_done;
+}
+
 enum bip_flags {
 	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
 	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4571ef1..413c84f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h

@@ -895,7 +895,7 @@
 {
 	struct request_queue *q = rq->q;
 
-	if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
+	if (unlikely(rq->cmd_type != REQ_TYPE_FS))
 		return q->limits.max_hw_sectors;
 
 	if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
@@ -1372,6 +1372,13 @@
 	page_cache_release(p.v);
 }
 
+static inline bool __bvec_gap_to_prev(struct request_queue *q,
+				struct bio_vec *bprv, unsigned int offset)
+{
+	return offset ||
+		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+}
+
 /*
  * Check if adding a bio_vec after bprv with offset would create a gap in
  * the SG list. Most drivers don't care about this, but some do.
@@ -1381,18 +1388,22 @@
 {
 	if (!queue_virt_boundary(q))
 		return false;
-	return offset ||
-		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+	return __bvec_gap_to_prev(q, bprv, offset);
 }
 
 static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 			 struct bio *next)
 {
-	if (!bio_has_data(prev))
-		return false;
+	if (bio_has_data(prev) && queue_virt_boundary(q)) {
+		struct bio_vec pb, nb;
 
-	return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1],
-				next->bi_io_vec[0].bv_offset);
+		bio_get_last_bvec(prev, &pb);
+		bio_get_first_bvec(next, &nb);
+
+		return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
+	}
+
+	return false;
 }
 
 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)

diff --git a/include/linux/cache.h b/include/linux/cache.h
index 17e7e82..1be04f8 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h

@@ -12,10 +12,24 @@
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
+/*
+ * __read_mostly is used to keep rarely changing variables out of frequently
+ * updated cachelines. If an architecture doesn't support it, ignore the
+ * hint.
+ */
 #ifndef __read_mostly
 #define __read_mostly
 #endif
 
+/*
+ * __ro_after_init is used to mark things that are read-only after init (i.e.
+ * after mark_rodata_ro() has been called). These are effectively read-only,
+ * but may get written to during init, so can't live in .rodata (via "const").
+ */
+#ifndef __ro_after_init
+#define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
+#endif
+
 #ifndef ____cacheline_aligned
 #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
 #endif

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index c1ef6f1..15151f3 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h

@@ -75,6 +75,7 @@
 #define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
 // duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
 #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
+#define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index bdcf358..0d442e3 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h

@@ -190,9 +190,9 @@
 extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
 
 static inline void
-clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
+clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec)
 {
-	return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec);
+	return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec);
 }
 
 extern void clockevents_suspend(void);

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 6013021..a307bf6 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h

@@ -118,6 +118,23 @@
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
 
+static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from)
+{
+	/*  freq = cyc/from
+	 *  mult/2^shift  = ns/cyc
+	 *  mult = ns/cyc * 2^shift
+	 *  mult = from/freq * 2^shift
+	 *  mult = from * 2^shift / freq
+	 *  mult = (from<<shift) / freq
+	 */
+	u64 tmp = ((u64)from) << shift_constant;
+
+	tmp += freq/2; /* round for do_div */
+	do_div(tmp, freq);
+
+	return (u32)tmp;
+}
+
 /**
  * clocksource_khz2mult - calculates mult from khz and shift
  * @khz:		Clocksource frequency in KHz
@@ -128,19 +145,7 @@
  */
 static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant)
 {
-	/*  khz = cyc/(Million ns)
-	 *  mult/2^shift  = ns/cyc
-	 *  mult = ns/cyc * 2^shift
-	 *  mult = 1Million/khz * 2^shift
-	 *  mult = 1000000 * 2^shift / khz
-	 *  mult = (1000000<<shift) / khz
-	 */
-	u64 tmp = ((u64)1000000) << shift_constant;
-
-	tmp += khz/2; /* round for do_div */
-	do_div(tmp, khz);
-
-	return (u32)tmp;
+	return clocksource_freq2mult(khz, shift_constant, NSEC_PER_MSEC);
 }
 
 /**
@@ -154,19 +159,7 @@
  */
 static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
 {
-	/*  hz = cyc/(Billion ns)
-	 *  mult/2^shift  = ns/cyc
-	 *  mult = ns/cyc * 2^shift
-	 *  mult = 1Billion/hz * 2^shift
-	 *  mult = 1000000000 * 2^shift / hz
-	 *  mult = (1000000000<<shift) / hz
-	 */
-	u64 tmp = ((u64)1000000000) << shift_constant;
-
-	tmp += hz/2; /* round for do_div */
-	do_div(tmp, hz);
-
-	return (u32)tmp;
+	return clocksource_freq2mult(hz, shift_constant, NSEC_PER_SEC);
 }
 
 /**

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 48f5aab..b5ff988 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h

@@ -20,12 +20,14 @@
 # define __pmem		__attribute__((noderef, address_space(5)))
 #ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu		__attribute__((noderef, address_space(4)))
-#else
+#else /* CONFIG_SPARSE_RCU_POINTER */
 # define __rcu
-#endif
+#endif /* CONFIG_SPARSE_RCU_POINTER */
+# define __private	__attribute__((noderef))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
-#else
+# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member))
+#else /* __CHECKER__ */
 # define __user
 # define __kernel
 # define __safe
@@ -44,7 +46,9 @@
 # define __percpu
 # define __rcu
 # define __pmem
-#endif
+# define __private
+# define ACCESS_PRIVATE(p, member) ((p)->member)
+#endif /* __CHECKER__ */
 
 /* Indirect macros required for expanded argument pasting, eg. __LINE__. */
 #define ___PASTE(a,b) a##b
@@ -263,8 +267,9 @@
  * In contrast to ACCESS_ONCE these two macros will also work on aggregate
  * data types like structs or unions. If the size of the accessed data
  * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
- * compile-time warning.
+ * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at
+ * least two memcpy()s: one for the __builtin_memcpy() and then one for
+ * the macro doing the copy of variable - '__u' allocated on the stack.
  *
  * Their two major use cases are: (1) Mediating communication between
  * process-level code and irq/NMI handlers, all running on the same CPU,

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d2ca8c3..f9b1fab 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h

@@ -16,6 +16,7 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
+#include <linux/cpuhotplug.h>
 
 struct device;
 struct device_node;
@@ -27,6 +28,9 @@
 	struct device dev;
 };
 
+extern void boot_cpu_init(void);
+extern void boot_cpu_state_init(void);
+
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
 extern bool cpu_is_hotpluggable(unsigned cpu);
@@ -74,7 +78,7 @@
 	/* migration should happen before other stuff but after perf */
 	CPU_PRI_PERF		= 20,
 	CPU_PRI_MIGRATION	= 10,
-	CPU_PRI_SMPBOOT		= 9,
+
 	/* bring up workqueues before normal notifiers and down after */
 	CPU_PRI_WORKQUEUE_UP	= 5,
 	CPU_PRI_WORKQUEUE_DOWN	= -5,
@@ -97,9 +101,7 @@
 					* Called on the new cpu, just before
 					* enabling interrupts. Must not sleep,
 					* must not fail */
-#define CPU_DYING_IDLE		0x000B /* CPU (unsigned)v dying, reached
-					* idle loop. */
-#define CPU_BROKEN		0x000C /* CPU (unsigned)v did not die properly,
+#define CPU_BROKEN		0x000B /* CPU (unsigned)v did not die properly,
 					* perhaps due to preemption. */
 
 /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend
@@ -118,6 +120,7 @@
 
 
 #ifdef CONFIG_SMP
+extern bool cpuhp_tasks_frozen;
 /* Need to know about CPUs going up/down? */
 #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
 #define cpu_notifier(fn, pri) {					\
@@ -167,7 +170,6 @@
 }
 #endif
 
-void smpboot_thread_init(void);
 int cpu_up(unsigned int cpu);
 void notify_cpu_starting(unsigned int cpu);
 extern void cpu_maps_update_begin(void);
@@ -177,6 +179,7 @@
 #define cpu_notifier_register_done	cpu_maps_update_done
 
 #else	/* CONFIG_SMP */
+#define cpuhp_tasks_frozen	0
 
 #define cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
 #define __cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
@@ -215,10 +218,6 @@
 {
 }
 
-static inline void smpboot_thread_init(void)
-{
-}
-
 #endif /* CONFIG_SMP */
 extern struct bus_type cpu_subsys;
 
@@ -265,11 +264,6 @@
 static inline void enable_nonboot_cpus(void) {}
 #endif /* !CONFIG_PM_SLEEP_SMP */
 
-enum cpuhp_state {
-	CPUHP_OFFLINE,
-	CPUHP_ONLINE,
-};
-
 void cpu_startup_entry(enum cpuhp_state state);
 
 void cpu_idle_poll_ctrl(bool enable);
@@ -280,14 +274,15 @@
 void arch_cpu_idle_exit(void);
 void arch_cpu_idle_dead(void);
 
-DECLARE_PER_CPU(bool, cpu_dead_idle);
-
 int cpu_report_state(int cpu);
 int cpu_check_up_prepare(int cpu);
 void cpu_set_state_online(int cpu);
 #ifdef CONFIG_HOTPLUG_CPU
 bool cpu_wait_death(unsigned int cpu, int seconds);
 bool cpu_report_death(void);
+void cpuhp_report_idle_dead(void);
+#else
+static inline void cpuhp_report_idle_dead(void) { }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
 #endif /* _LINUX_CPU_H_ */

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
new file mode 100644
index 0000000..5d68e15
--- /dev/null
+++ b/include/linux/cpuhotplug.h

@@ -0,0 +1,93 @@
+#ifndef __CPUHOTPLUG_H
+#define __CPUHOTPLUG_H
+
+enum cpuhp_state {
+	CPUHP_OFFLINE,
+	CPUHP_CREATE_THREADS,
+	CPUHP_NOTIFY_PREPARE,
+	CPUHP_BRINGUP_CPU,
+	CPUHP_AP_IDLE_DEAD,
+	CPUHP_AP_OFFLINE,
+	CPUHP_AP_NOTIFY_STARTING,
+	CPUHP_AP_ONLINE,
+	CPUHP_TEARDOWN_CPU,
+	CPUHP_AP_ONLINE_IDLE,
+	CPUHP_AP_SMPBOOT_THREADS,
+	CPUHP_AP_NOTIFY_ONLINE,
+	CPUHP_AP_ONLINE_DYN,
+	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
+	CPUHP_ONLINE,
+};
+
+int __cpuhp_setup_state(enum cpuhp_state state,	const char *name, bool invoke,
+			int (*startup)(unsigned int cpu),
+			int (*teardown)(unsigned int cpu));
+
+/**
+ * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
+ * @state:	The state for which the calls are installed
+ * @name:	Name of the callback (will be used in debug output)
+ * @startup:	startup callback function
+ * @teardown:	teardown callback function
+ *
+ * Installs the callback functions and invokes the startup callback on
+ * the present cpus which have already reached the @state.
+ */
+static inline int cpuhp_setup_state(enum cpuhp_state state,
+				    const char *name,
+				    int (*startup)(unsigned int cpu),
+				    int (*teardown)(unsigned int cpu))
+{
+	return __cpuhp_setup_state(state, name, true, startup, teardown);
+}
+
+/**
+ * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
+ *			       callbacks
+ * @state:	The state for which the calls are installed
+ * @name:	Name of the callback.
+ * @startup:	startup callback function
+ * @teardown:	teardown callback function
+ *
+ * Same as @cpuhp_setup_state except that no calls are executed are invoked
+ * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n.
+ */
+static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
+					    const char *name,
+					    int (*startup)(unsigned int cpu),
+					    int (*teardown)(unsigned int cpu))
+{
+	return __cpuhp_setup_state(state, name, false, startup, teardown);
+}
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+
+/**
+ * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
+ * @state:	The state for which the calls are removed
+ *
+ * Removes the callback functions and invokes the teardown callback on
+ * the present cpus which have already reached the @state.
+ */
+static inline void cpuhp_remove_state(enum cpuhp_state state)
+{
+	__cpuhp_remove_state(state, true);
+}
+
+/**
+ * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking
+ *				teardown
+ * @state:	The state for which the calls are removed
+ */
+static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
+{
+	__cpuhp_remove_state(state, false);
+}
+
+#ifdef CONFIG_SMP
+void cpuhp_online_idle(enum cpuhp_state state);
+#else
+static inline void cpuhp_online_idle(enum cpuhp_state state) { }
+#endif
+
+#endif

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 818e450..636dd59 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h

@@ -7,7 +7,7 @@
 
 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
 		  get_block_t, dio_iodone_t, int flags);
-int dax_clear_blocks(struct inode *, sector_t block, long size);
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
@@ -52,6 +52,8 @@
 {
 	return mapping->host && IS_DAX(mapping->host);
 }
-int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
-		loff_t end);
+
+struct writeback_control;
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc);
 #endif

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7781ce11..c4b5f4b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h

@@ -409,9 +409,7 @@
  */
 static inline unsigned __d_entry_type(const struct dentry *dentry)
 {
-	unsigned type = READ_ONCE(dentry->d_flags);
-	smp_rmb();
-	return type & DCACHE_ENTRY_TYPE;
+	return dentry->d_flags & DCACHE_ENTRY_TYPE;
 }
 
 static inline bool d_is_miss(const struct dentry *dentry)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 75857cd..5e45cf9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h

@@ -386,7 +386,7 @@
 	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
 		return;
 
-	if (!ops->free)
+	if (!ops->free || !cpu_addr)
 		return;
 
 	debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
@@ -641,31 +641,40 @@
 }
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
-static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
-					   dma_addr_t *dma_addr, gfp_t gfp)
+static inline void *dma_alloc_wc(struct device *dev, size_t size,
+				 dma_addr_t *dma_addr, gfp_t gfp)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs);
 }
+#ifndef dma_alloc_writecombine
+#define dma_alloc_writecombine dma_alloc_wc
+#endif
 
-static inline void dma_free_writecombine(struct device *dev, size_t size,
-					 void *cpu_addr, dma_addr_t dma_addr)
+static inline void dma_free_wc(struct device *dev, size_t size,
+			       void *cpu_addr, dma_addr_t dma_addr)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs);
 }
+#ifndef dma_free_writecombine
+#define dma_free_writecombine dma_free_wc
+#endif
 
-static inline int dma_mmap_writecombine(struct device *dev,
-					struct vm_area_struct *vma,
-					void *cpu_addr, dma_addr_t dma_addr,
-					size_t size)
+static inline int dma_mmap_wc(struct device *dev,
+			      struct vm_area_struct *vma,
+			      void *cpu_addr, dma_addr_t dma_addr,
+			      size_t size)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
 }
+#ifndef dma_mmap_writecombine
+#define dma_mmap_writecombine dma_mmap_wc
+#endif
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c2b340e..6d9df3f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h

@@ -713,6 +713,18 @@
 #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
 #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
 
+static inline unsigned long get_lock_parent_ip(void)
+{
+	unsigned long addr = CALLER_ADDR0;
+
+	if (!in_lock_functions(addr))
+		return addr;
+	addr = CALLER_ADDR1;
+	if (!in_lock_functions(addr))
+		return addr;
+	return CALLER_ADDR2;
+}
+
 #ifdef CONFIG_IRQSOFF_TRACER
   extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
   extern void time_hardirqs_off(unsigned long a0, unsigned long a1);

diff --git a/include/linux/init.h b/include/linux/init.h
index b449f37..aedb254 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h

@@ -142,6 +142,10 @@
 void __init load_default_modules(void);
 int __init init_rootfs(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
+
 extern void (*late_time_init)(void);
 
 extern bool initcall_debug;

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 24bea08..afb4559 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h

@@ -20,6 +20,7 @@
 	resource_size_t end;
 	const char *name;
 	unsigned long flags;
+	unsigned long desc;
 	struct resource *parent, *sibling, *child;
 };
 
@@ -49,12 +50,19 @@
 #define IORESOURCE_WINDOW	0x00200000	/* forwarded by bridge */
 #define IORESOURCE_MUXED	0x00400000	/* Resource is software muxed */
 
+#define IORESOURCE_EXT_TYPE_BITS 0x01000000	/* Resource extended types */
+#define IORESOURCE_SYSRAM	0x01000000	/* System RAM (modifier) */
+
 #define IORESOURCE_EXCLUSIVE	0x08000000	/* Userland may not map this resource */
+
 #define IORESOURCE_DISABLED	0x10000000
 #define IORESOURCE_UNSET	0x20000000	/* No address assigned yet */
 #define IORESOURCE_AUTO		0x40000000
 #define IORESOURCE_BUSY		0x80000000	/* Driver has marked this resource busy */
 
+/* I/O resource extended types */
+#define IORESOURCE_SYSTEM_RAM		(IORESOURCE_MEM|IORESOURCE_SYSRAM)
+
 /* PnP IRQ specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_IRQ_HIGHEDGE		(1<<0)
 #define IORESOURCE_IRQ_LOWEDGE		(1<<1)
@@ -105,6 +113,22 @@
 /* PCI control bits.  Shares IORESOURCE_BITS with above PCI ROM.  */
 #define IORESOURCE_PCI_FIXED		(1<<4)	/* Do not move resource */
 
+/*
+ * I/O Resource Descriptors
+ *
+ * Descriptors are used by walk_iomem_res_desc() and region_intersects()
+ * for searching a specific resource range in the iomem table.  Assign
+ * a new descriptor when a resource range supports the search interfaces.
+ * Otherwise, resource.desc must be set to IORES_DESC_NONE (0).
+ */
+enum {
+	IORES_DESC_NONE				= 0,
+	IORES_DESC_CRASH_KERNEL			= 1,
+	IORES_DESC_ACPI_TABLES			= 2,
+	IORES_DESC_ACPI_NV_STORAGE		= 3,
+	IORES_DESC_PERSISTENT_MEMORY		= 4,
+	IORES_DESC_PERSISTENT_MEMORY_LEGACY	= 5,
+};
 
 /* helpers to define resources */
 #define DEFINE_RES_NAMED(_start, _size, _name, _flags)			\
@@ -113,6 +137,7 @@
 		.end = (_start) + (_size) - 1,				\
 		.name = (_name),					\
 		.flags = (_flags),					\
+		.desc = IORES_DESC_NONE,				\
 	}
 
 #define DEFINE_RES_IO_NAMED(_start, _size, _name)			\
@@ -170,6 +195,10 @@
 {
 	return res->flags & IORESOURCE_TYPE_BITS;
 }
+static inline unsigned long resource_ext_type(const struct resource *res)
+{
+	return res->flags & IORESOURCE_EXT_TYPE_BITS;
+}
 /* True iff r1 completely contains r2 */
 static inline bool resource_contains(struct resource *r1, struct resource *r2)
 {
@@ -239,8 +268,8 @@
 walk_system_ram_res(u64 start, u64 end, void *arg,
 		    int (*func)(u64, u64, void *));
 extern int
-walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
-	       int (*func)(u64, u64, void *));
+walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
+		    void *arg, int (*func)(u64, u64, void *));
 
 /* True if any part of r1 overlaps r2 */
 static inline bool resource_overlaps(struct resource *r1, struct resource *r2)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3c1c967..c4de623 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h

@@ -133,17 +133,23 @@
  *			Use accessor functions to deal with it
  * @node:		node index useful for balancing
  * @handler_data:	per-IRQ data for the irq_chip methods
- * @affinity:		IRQ affinity on SMP
+ * @affinity:		IRQ affinity on SMP. If this is an IPI
+ *			related irq, then this is the mask of the
+ *			CPUs to which an IPI can be sent.
  * @msi_desc:		MSI descriptor
+ * @ipi_offset:		Offset of first IPI target cpu in @affinity. Optional.
  */
 struct irq_common_data {
-	unsigned int		state_use_accessors;
+	unsigned int		__private state_use_accessors;
 #ifdef CONFIG_NUMA
 	unsigned int		node;
 #endif
 	void			*handler_data;
 	struct msi_desc		*msi_desc;
 	cpumask_var_t		affinity;
+#ifdef CONFIG_GENERIC_IRQ_IPI
+	unsigned int		ipi_offset;
+#endif
 };
 
 /**
@@ -208,7 +214,7 @@
 	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
 };
 
-#define __irqd_to_state(d)		((d)->common->state_use_accessors)
+#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
 
 static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
 {
@@ -299,6 +305,8 @@
 	__irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU;
 }
 
+#undef __irqd_to_state
+
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
 {
 	return d->hwirq;
@@ -341,6 +349,8 @@
  * @irq_get_irqchip_state:	return the internal state of an interrupt
  * @irq_set_irqchip_state:	set the internal state of a interrupt
  * @irq_set_vcpu_affinity:	optional to target a vCPU in a virtual machine
+ * @ipi_send_single:	send a single IPI to destination cpus
+ * @ipi_send_mask:	send an IPI to destination cpus in cpumask
  * @flags:		chip specific flags
  */
 struct irq_chip {
@@ -385,6 +395,9 @@
 
 	int		(*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info);
 
+	void		(*ipi_send_single)(struct irq_data *data, unsigned int cpu);
+	void		(*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest);
+
 	unsigned long	flags;
 };
 
@@ -934,4 +947,12 @@
 		return readl(gc->reg_base + reg_offset);
 }
 
+/* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */
+#define INVALID_HWIRQ	(~0UL)
+irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu);
+int __ipi_send_single(struct irq_desc *desc, unsigned int cpu);
+int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest);
+int ipi_send_single(unsigned int virq, unsigned int cpu);
+int ipi_send_mask(unsigned int virq, const struct cpumask *dest);
+
 #endif /* _LINUX_IRQ_H */

diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index ce824db..80f89e4 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h

@@ -261,9 +261,6 @@
 extern void gic_write_cpu_compare(cycle_t cnt, int cpu);
 extern void gic_start_count(void);
 extern void gic_stop_count(void);
-extern void gic_send_ipi(unsigned int intr);
-extern unsigned int plat_ipi_call_int_xlate(unsigned int);
-extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
 extern int gic_get_c0_compare_int(void);
 extern int gic_get_c0_perfcount_int(void);
 extern int gic_get_c0_fdc_int(void);

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 04579d9..ed48594 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h

@@ -74,6 +74,7 @@
 	DOMAIN_BUS_PCI_MSI,
 	DOMAIN_BUS_PLATFORM_MSI,
 	DOMAIN_BUS_NEXUS,
+	DOMAIN_BUS_IPI,
 };
 
 /**
@@ -172,6 +173,12 @@
 	/* Core calls alloc/free recursive through the domain hierarchy. */
 	IRQ_DOMAIN_FLAG_AUTO_RECURSIVE	= (1 << 1),
 
+	/* Irq domain is an IPI domain with virq per cpu */
+	IRQ_DOMAIN_FLAG_IPI_PER_CPU	= (1 << 2),
+
+	/* Irq domain is an IPI domain with single virq */
+	IRQ_DOMAIN_FLAG_IPI_SINGLE	= (1 << 3),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -206,6 +213,8 @@
 extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
 						   enum irq_domain_bus_token bus_token);
 extern void irq_set_default_host(struct irq_domain *host);
+extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
+				  irq_hw_number_t hwirq, int node);
 
 static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
 {
@@ -335,6 +344,11 @@
 			const u32 *intspec, unsigned int intsize,
 			irq_hw_number_t *out_hwirq, unsigned int *out_type);
 
+/* IPI functions */
+unsigned int irq_reserve_ipi(struct irq_domain *domain,
+			     const struct cpumask *dest);
+void irq_destroy_ipi(unsigned int irq);
+
 /* V2 interfaces to support hierarchy IRQ domains. */
 extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
 						unsigned int virq);
@@ -400,6 +414,22 @@
 {
 	return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY;
 }
+
+static inline bool irq_domain_is_ipi(struct irq_domain *domain)
+{
+	return domain->flags &
+		(IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE);
+}
+
+static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_IPI_PER_CPU;
+}
+
+static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE;
+}
 #else	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 static inline void irq_domain_activate_irq(struct irq_data *data) { }
 static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
@@ -413,6 +443,21 @@
 {
 	return false;
 }
+
+static inline bool irq_domain_is_ipi(struct irq_domain *domain)
+{
+	return false;
+}
+
+static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain)
+{
+	return false;
+}
+
+static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
+{
+	return false;
+}
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
 #else /* CONFIG_IRQ_DOMAIN */

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4b9f85c..0fdc798 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h

@@ -1,6 +1,7 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
+#include <linux/sched.h>
 #include <linux/types.h>
 
 struct kmem_cache;
@@ -13,7 +14,6 @@
 
 #include <asm/kasan.h>
 #include <asm/pgtable.h>
-#include <linux/sched.h>
 
 extern unsigned char kasan_zero_page[PAGE_SIZE];
 extern pte_t kasan_zero_pte[PTRS_PER_PTE];
@@ -43,6 +43,8 @@
 
 void kasan_unpoison_shadow(const void *address, size_t size);
 
+void kasan_unpoison_task_stack(struct task_struct *task);
+
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
 
@@ -66,6 +68,8 @@
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
+static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+
 static inline void kasan_enable_current(void) {}
 static inline void kasan_disable_current(void) {}
 

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 861f690..5276fe0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h

@@ -25,6 +25,7 @@
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
+#include <linux/swait.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -218,7 +219,7 @@
 	int fpu_active;
 	int guest_fpu_loaded, guest_xcr0_loaded;
 	unsigned char fpu_counter;
-	wait_queue_head_t wq;
+	struct swait_queue_head wq;
 	struct pid *pid;
 	int sigset_active;
 	sigset_t sigset;
@@ -782,7 +783,7 @@
 }
 #endif
 
-static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
 	return vcpu->arch.wqp;

diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index e23121f..59ccab2 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h

@@ -37,6 +37,9 @@
 
 void clear_all_latency_tracing(struct task_struct *p);
 
+extern int sysctl_latencytop(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #else
 
 static inline void

diff --git a/include/linux/libata.h b/include/linux/libata.h
index bec2abb..2c4ebef 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h

@@ -720,7 +720,7 @@
 	union {
 		u16		id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */
 		u32		gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */
-	};
+	} ____cacheline_aligned;
 
 	/* DEVSLP Timing Variables from Identify Device Data Log */
 	u8			devslp_timing[ATA_LOG_DEVSLP_SIZE];

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index bed40df..141ffdd 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h

@@ -26,9 +26,8 @@
 
 	/* need to set a limit somewhere, but yes, this is likely overkill */
 	ND_IOCTL_MAX_BUFLEN = SZ_4M,
-	ND_CMD_MAX_ELEM = 4,
+	ND_CMD_MAX_ELEM = 5,
 	ND_CMD_MAX_ENVELOPE = 16,
-	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 	ND_MAX_MAPPINGS = 32,
 
 	/* region flag indicating to direct-map persistent memory by default */

diff --git a/include/linux/list.h b/include/linux/list.h
index 30cf420..5356f4d 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h

@@ -113,17 +113,6 @@
 extern void list_del(struct list_head *entry);
 #endif
 
-#ifdef CONFIG_DEBUG_LIST
-/*
- * See devm_memremap_pages() which wants DEBUG_LIST=y to assert if one
- * of the pages it allocates is ever passed to list_add()
- */
-extern void list_force_poison(struct list_head *entry);
-#else
-/* fallback to the less strict LIST_POISON* definitions */
-#define list_force_poison list_del
-#endif
-
 /**
  * list_replace - replace old entry by new one
  * @old : the element to be replaced

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 4dca42f..d026b19 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h

@@ -261,7 +261,6 @@
 /*
  * Initialization, self-test and debugging-output methods:
  */
-extern void lockdep_init(void);
 extern void lockdep_info(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
@@ -392,7 +391,6 @@
 # define lockdep_set_current_reclaim_state(g)	do { } while (0)
 # define lockdep_clear_current_reclaim_state()	do { } while (0)
 # define lockdep_trace_alloc(g)			do { } while (0)
-# define lockdep_init()				do { } while (0)
 # define lockdep_info()				do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 430a929..a0e8cc8 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h

@@ -44,6 +44,8 @@
 
 #include <linux/timecounter.h>
 
+#define DEFAULT_UAR_PAGE_SHIFT  12
+
 #define MAX_MSIX_P_PORT		17
 #define MAX_MSIX		64
 #define MIN_MSIX_P_PORT		5
@@ -856,6 +858,7 @@
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 	struct mlx4_vf_dev     *dev_vfs;
+	u8  uar_page_shift;
 };
 
 struct mlx4_clock_params {
@@ -1528,4 +1531,14 @@
 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
 				   struct mlx4_clock_params *params);
 
+static inline int mlx4_to_hw_uar_index(struct mlx4_dev *dev, int index)
+{
+	return (index << (PAGE_SHIFT - dev->uar_page_shift));
+}
+
+static inline int mlx4_get_num_reserved_uar(struct mlx4_dev *dev)
+{
+	/* The first 128 UARs are used for EQ doorbells */
+	return (128 >> (PAGE_SHIFT - dev->uar_page_shift));
+}
 #endif /* MLX4_DEVICE_H */

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 231ab6b..58eef02 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h

@@ -207,15 +207,15 @@
 	u8         outer_dmac[0x1];
 	u8         outer_smac[0x1];
 	u8         outer_ether_type[0x1];
-	u8         reserved_0[0x1];
+	u8         reserved_at_3[0x1];
 	u8         outer_first_prio[0x1];
 	u8         outer_first_cfi[0x1];
 	u8         outer_first_vid[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_7[0x1];
 	u8         outer_second_prio[0x1];
 	u8         outer_second_cfi[0x1];
 	u8         outer_second_vid[0x1];
-	u8         reserved_2[0x1];
+	u8         reserved_at_b[0x1];
 	u8         outer_sip[0x1];
 	u8         outer_dip[0x1];
 	u8         outer_frag[0x1];
@@ -230,21 +230,21 @@
 	u8         outer_gre_protocol[0x1];
 	u8         outer_gre_key[0x1];
 	u8         outer_vxlan_vni[0x1];
-	u8         reserved_3[0x5];
+	u8         reserved_at_1a[0x5];
 	u8         source_eswitch_port[0x1];
 
 	u8         inner_dmac[0x1];
 	u8         inner_smac[0x1];
 	u8         inner_ether_type[0x1];
-	u8         reserved_4[0x1];
+	u8         reserved_at_23[0x1];
 	u8         inner_first_prio[0x1];
 	u8         inner_first_cfi[0x1];
 	u8         inner_first_vid[0x1];
-	u8         reserved_5[0x1];
+	u8         reserved_at_27[0x1];
 	u8         inner_second_prio[0x1];
 	u8         inner_second_cfi[0x1];
 	u8         inner_second_vid[0x1];
-	u8         reserved_6[0x1];
+	u8         reserved_at_2b[0x1];
 	u8         inner_sip[0x1];
 	u8         inner_dip[0x1];
 	u8         inner_frag[0x1];
@@ -256,37 +256,37 @@
 	u8         inner_tcp_sport[0x1];
 	u8         inner_tcp_dport[0x1];
 	u8         inner_tcp_flags[0x1];
-	u8         reserved_7[0x9];
+	u8         reserved_at_37[0x9];
 
-	u8         reserved_8[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         ft_support[0x1];
-	u8         reserved_0[0x2];
+	u8         reserved_at_1[0x2];
 	u8	   flow_modify_en[0x1];
 	u8         modify_root[0x1];
 	u8         identified_miss_table_mode[0x1];
 	u8         flow_table_modify[0x1];
-	u8         reserved_1[0x19];
+	u8         reserved_at_7[0x19];
 
-	u8         reserved_2[0x2];
+	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
-	u8         reserved_3[0x10];
+	u8         reserved_at_28[0x10];
 	u8         max_ft_level[0x8];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_5[0x18];
+	u8         reserved_at_60[0x18];
 	u8         log_max_ft_num[0x8];
 
-	u8         reserved_6[0x18];
+	u8         reserved_at_80[0x18];
 	u8         log_max_destination[0x8];
 
-	u8         reserved_7[0x18];
+	u8         reserved_at_a0[0x18];
 	u8         log_max_flow[0x8];
 
-	u8         reserved_8[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_flow_table_fields_supported_bits ft_field_support;
 
@@ -298,13 +298,13 @@
 	u8         receive[0x1];
 	u8         write[0x1];
 	u8         read[0x1];
-	u8         reserved_0[0x1];
+	u8         reserved_at_4[0x1];
 	u8         srq_receive[0x1];
-	u8         reserved_1[0x1a];
+	u8         reserved_at_6[0x1a];
 };
 
 struct mlx5_ifc_ipv4_layout_bits {
-	u8         reserved_0[0x60];
+	u8         reserved_at_0[0x60];
 
 	u8         ipv4[0x20];
 };
@@ -316,7 +316,7 @@
 union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
 	struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
 	struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
-	u8         reserved_0[0x80];
+	u8         reserved_at_0[0x80];
 };
 
 struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
@@ -336,15 +336,15 @@
 	u8         ip_dscp[0x6];
 	u8         ip_ecn[0x2];
 	u8         vlan_tag[0x1];
-	u8         reserved_0[0x1];
+	u8         reserved_at_91[0x1];
 	u8         frag[0x1];
-	u8         reserved_1[0x4];
+	u8         reserved_at_93[0x4];
 	u8         tcp_flags[0x9];
 
 	u8         tcp_sport[0x10];
 	u8         tcp_dport[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_c0[0x20];
 
 	u8         udp_sport[0x10];
 	u8         udp_dport[0x10];
@@ -355,9 +355,9 @@
 };
 
 struct mlx5_ifc_fte_match_set_misc_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         source_port[0x10];
 
 	u8         outer_second_prio[0x3];
@@ -369,31 +369,31 @@
 
 	u8         outer_second_vlan_tag[0x1];
 	u8         inner_second_vlan_tag[0x1];
-	u8         reserved_2[0xe];
+	u8         reserved_at_62[0xe];
 	u8         gre_protocol[0x10];
 
 	u8         gre_key_h[0x18];
 	u8         gre_key_l[0x8];
 
 	u8         vxlan_vni[0x18];
-	u8         reserved_3[0x8];
+	u8         reserved_at_b8[0x8];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_c0[0x20];
 
-	u8         reserved_5[0xc];
+	u8         reserved_at_e0[0xc];
 	u8         outer_ipv6_flow_label[0x14];
 
-	u8         reserved_6[0xc];
+	u8         reserved_at_100[0xc];
 	u8         inner_ipv6_flow_label[0x14];
 
-	u8         reserved_7[0xe0];
+	u8         reserved_at_120[0xe0];
 };
 
 struct mlx5_ifc_cmd_pas_bits {
 	u8         pa_h[0x20];
 
 	u8         pa_l[0x14];
-	u8         reserved_0[0xc];
+	u8         reserved_at_34[0xc];
 };
 
 struct mlx5_ifc_uint64_bits {
@@ -418,31 +418,31 @@
 struct mlx5_ifc_ads_bits {
 	u8         fl[0x1];
 	u8         free_ar[0x1];
-	u8         reserved_0[0xe];
+	u8         reserved_at_2[0xe];
 	u8         pkey_index[0x10];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_20[0x8];
 	u8         grh[0x1];
 	u8         mlid[0x7];
 	u8         rlid[0x10];
 
 	u8         ack_timeout[0x5];
-	u8         reserved_2[0x3];
+	u8         reserved_at_45[0x3];
 	u8         src_addr_index[0x8];
-	u8         reserved_3[0x4];
+	u8         reserved_at_50[0x4];
 	u8         stat_rate[0x4];
 	u8         hop_limit[0x8];
 
-	u8         reserved_4[0x4];
+	u8         reserved_at_60[0x4];
 	u8         tclass[0x8];
 	u8         flow_label[0x14];
 
 	u8         rgid_rip[16][0x8];
 
-	u8         reserved_5[0x4];
+	u8         reserved_at_100[0x4];
 	u8         f_dscp[0x1];
 	u8         f_ecn[0x1];
-	u8         reserved_6[0x1];
+	u8         reserved_at_106[0x1];
 	u8         f_eth_prio[0x1];
 	u8         ecn[0x2];
 	u8         dscp[0x6];
@@ -458,25 +458,25 @@
 };
 
 struct mlx5_ifc_flow_table_nic_cap_bits {
-	u8         reserved_0[0x200];
+	u8         reserved_at_0[0x200];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
-	u8         reserved_1[0x200];
+	u8         reserved_at_400[0x200];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive_sniffer;
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit;
 
-	u8         reserved_2[0x200];
+	u8         reserved_at_a00[0x200];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
 
-	u8         reserved_3[0x7200];
+	u8         reserved_at_e00[0x7200];
 };
 
 struct mlx5_ifc_flow_table_eswitch_cap_bits {
-	u8     reserved_0[0x200];
+	u8     reserved_at_0[0x200];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
 
@@ -484,7 +484,7 @@
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress;
 
-	u8      reserved_1[0x7800];
+	u8      reserved_at_800[0x7800];
 };
 
 struct mlx5_ifc_e_switch_cap_bits {
@@ -493,9 +493,9 @@
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert_if_not_exist[0x1];
 	u8         vport_cvlan_insert_overwrite[0x1];
-	u8         reserved_0[0x1b];
+	u8         reserved_at_5[0x1b];
 
-	u8         reserved_1[0x7e0];
+	u8         reserved_at_20[0x7e0];
 };
 
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
@@ -504,51 +504,51 @@
 	u8         lro_cap[0x1];
 	u8         lro_psh_flag[0x1];
 	u8         lro_time_stamp[0x1];
-	u8         reserved_0[0x3];
+	u8         reserved_at_5[0x3];
 	u8         self_lb_en_modifiable[0x1];
-	u8         reserved_1[0x2];
+	u8         reserved_at_9[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_2[0x4];
+	u8         reserved_at_10[0x4];
 	u8         rss_ind_tbl_cap[0x4];
-	u8         reserved_3[0x3];
+	u8         reserved_at_18[0x3];
 	u8         tunnel_lso_const_out_ip_id[0x1];
-	u8         reserved_4[0x2];
+	u8         reserved_at_1c[0x2];
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_6[0x10];
+	u8         reserved_at_40[0x10];
 	u8         lro_min_mss_size[0x10];
 
-	u8         reserved_7[0x120];
+	u8         reserved_at_60[0x120];
 
 	u8         lro_timer_supported_periods[4][0x20];
 
-	u8         reserved_8[0x600];
+	u8         reserved_at_200[0x600];
 };
 
 struct mlx5_ifc_roce_cap_bits {
 	u8         roce_apm[0x1];
-	u8         reserved_0[0x1f];
+	u8         reserved_at_1[0x1f];
 
-	u8         reserved_1[0x60];
+	u8         reserved_at_20[0x60];
 
-	u8         reserved_2[0xc];
+	u8         reserved_at_80[0xc];
 	u8         l3_type[0x4];
-	u8         reserved_3[0x8];
+	u8         reserved_at_90[0x8];
 	u8         roce_version[0x8];
 
-	u8         reserved_4[0x10];
+	u8         reserved_at_a0[0x10];
 	u8         r_roce_dest_udp_port[0x10];
 
 	u8         r_roce_max_src_udp_port[0x10];
 	u8         r_roce_min_src_udp_port[0x10];
 
-	u8         reserved_5[0x10];
+	u8         reserved_at_e0[0x10];
 	u8         roce_address_table_size[0x10];
 
-	u8         reserved_6[0x700];
+	u8         reserved_at_100[0x700];
 };
 
 enum {
@@ -576,35 +576,35 @@
 };
 
 struct mlx5_ifc_atomic_caps_bits {
-	u8         reserved_0[0x40];
+	u8         reserved_at_0[0x40];
 
 	u8         atomic_req_8B_endianess_mode[0x2];
-	u8         reserved_1[0x4];
+	u8         reserved_at_42[0x4];
 	u8         supported_atomic_req_8B_endianess_mode_1[0x1];
 
-	u8         reserved_2[0x19];
+	u8         reserved_at_47[0x19];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
-	u8         reserved_4[0x10];
+	u8         reserved_at_80[0x10];
 	u8         atomic_operations[0x10];
 
-	u8         reserved_5[0x10];
+	u8         reserved_at_a0[0x10];
 	u8         atomic_size_qp[0x10];
 
-	u8         reserved_6[0x10];
+	u8         reserved_at_c0[0x10];
 	u8         atomic_size_dc[0x10];
 
-	u8         reserved_7[0x720];
+	u8         reserved_at_e0[0x720];
 };
 
 struct mlx5_ifc_odp_cap_bits {
-	u8         reserved_0[0x40];
+	u8         reserved_at_0[0x40];
 
 	u8         sig[0x1];
-	u8         reserved_1[0x1f];
+	u8         reserved_at_41[0x1f];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_odp_per_transport_service_cap_bits rc_odp_caps;
 
@@ -612,7 +612,7 @@
 
 	struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps;
 
-	u8         reserved_3[0x720];
+	u8         reserved_at_e0[0x720];
 };
 
 enum {
@@ -660,55 +660,55 @@
 };
 
 struct mlx5_ifc_cmd_hca_cap_bits {
-	u8         reserved_0[0x80];
+	u8         reserved_at_0[0x80];
 
 	u8         log_max_srq_sz[0x8];
 	u8         log_max_qp_sz[0x8];
-	u8         reserved_1[0xb];
+	u8         reserved_at_90[0xb];
 	u8         log_max_qp[0x5];
 
-	u8         reserved_2[0xb];
+	u8         reserved_at_a0[0xb];
 	u8         log_max_srq[0x5];
-	u8         reserved_3[0x10];
+	u8         reserved_at_b0[0x10];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         log_max_cq_sz[0x8];
-	u8         reserved_5[0xb];
+	u8         reserved_at_d0[0xb];
 	u8         log_max_cq[0x5];
 
 	u8         log_max_eq_sz[0x8];
-	u8         reserved_6[0x2];
+	u8         reserved_at_e8[0x2];
 	u8         log_max_mkey[0x6];
-	u8         reserved_7[0xc];
+	u8         reserved_at_f0[0xc];
 	u8         log_max_eq[0x4];
 
 	u8         max_indirection[0x8];
-	u8         reserved_8[0x1];
+	u8         reserved_at_108[0x1];
 	u8         log_max_mrw_sz[0x7];
-	u8         reserved_9[0x2];
+	u8         reserved_at_110[0x2];
 	u8         log_max_bsf_list_size[0x6];
-	u8         reserved_10[0x2];
+	u8         reserved_at_118[0x2];
 	u8         log_max_klm_list_size[0x6];
 
-	u8         reserved_11[0xa];
+	u8         reserved_at_120[0xa];
 	u8         log_max_ra_req_dc[0x6];
-	u8         reserved_12[0xa];
+	u8         reserved_at_130[0xa];
 	u8         log_max_ra_res_dc[0x6];
 
-	u8         reserved_13[0xa];
+	u8         reserved_at_140[0xa];
 	u8         log_max_ra_req_qp[0x6];
-	u8         reserved_14[0xa];
+	u8         reserved_at_150[0xa];
 	u8         log_max_ra_res_qp[0x6];
 
 	u8         pad_cap[0x1];
 	u8         cc_query_allowed[0x1];
 	u8         cc_modify_allowed[0x1];
-	u8         reserved_15[0xd];
+	u8         reserved_at_163[0xd];
 	u8         gid_table_size[0x10];
 
 	u8         out_of_seq_cnt[0x1];
 	u8         vport_counters[0x1];
-	u8         reserved_16[0x4];
+	u8         reserved_at_182[0x4];
 	u8         max_qp_cnt[0xa];
 	u8         pkey_table_size[0x10];
 
@@ -716,158 +716,158 @@
 	u8         vhca_group_manager[0x1];
 	u8         ib_virt[0x1];
 	u8         eth_virt[0x1];
-	u8         reserved_17[0x1];
+	u8         reserved_at_1a4[0x1];
 	u8         ets[0x1];
 	u8         nic_flow_table[0x1];
 	u8         eswitch_flow_table[0x1];
 	u8	   early_vf_enable;
-	u8         reserved_18[0x2];
+	u8         reserved_at_1a8[0x2];
 	u8         local_ca_ack_delay[0x5];
-	u8         reserved_19[0x6];
+	u8         reserved_at_1af[0x6];
 	u8         port_type[0x2];
 	u8         num_ports[0x8];
 
-	u8         reserved_20[0x3];
+	u8         reserved_at_1bf[0x3];
 	u8         log_max_msg[0x5];
-	u8         reserved_21[0x18];
+	u8         reserved_at_1c7[0x18];
 
 	u8         stat_rate_support[0x10];
-	u8         reserved_22[0xc];
+	u8         reserved_at_1ef[0xc];
 	u8         cqe_version[0x4];
 
 	u8         compact_address_vector[0x1];
-	u8         reserved_23[0xe];
+	u8         reserved_at_200[0xe];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
-	u8         reserved_24[0x1];
+	u8         reserved_at_212[0x1];
 	u8         wq_signature[0x1];
 	u8         sctr_data_cqe[0x1];
-	u8         reserved_25[0x1];
+	u8         reserved_at_215[0x1];
 	u8         sho[0x1];
 	u8         tph[0x1];
 	u8         rf[0x1];
 	u8         dct[0x1];
-	u8         reserved_26[0x1];
+	u8         reserved_at_21a[0x1];
 	u8         eth_net_offloads[0x1];
 	u8         roce[0x1];
 	u8         atomic[0x1];
-	u8         reserved_27[0x1];
+	u8         reserved_at_21e[0x1];
 
 	u8         cq_oi[0x1];
 	u8         cq_resize[0x1];
 	u8         cq_moderation[0x1];
-	u8         reserved_28[0x3];
+	u8         reserved_at_222[0x3];
 	u8         cq_eq_remap[0x1];
 	u8         pg[0x1];
 	u8         block_lb_mc[0x1];
-	u8         reserved_29[0x1];
+	u8         reserved_at_228[0x1];
 	u8         scqe_break_moderation[0x1];
-	u8         reserved_30[0x1];
+	u8         reserved_at_22a[0x1];
 	u8         cd[0x1];
-	u8         reserved_31[0x1];
+	u8         reserved_at_22c[0x1];
 	u8         apm[0x1];
-	u8         reserved_32[0x7];
+	u8         reserved_at_22e[0x7];
 	u8         qkv[0x1];
 	u8         pkv[0x1];
-	u8         reserved_33[0x4];
+	u8         reserved_at_237[0x4];
 	u8         xrc[0x1];
 	u8         ud[0x1];
 	u8         uc[0x1];
 	u8         rc[0x1];
 
-	u8         reserved_34[0xa];
+	u8         reserved_at_23f[0xa];
 	u8         uar_sz[0x6];
-	u8         reserved_35[0x8];
+	u8         reserved_at_24f[0x8];
 	u8         log_pg_sz[0x8];
 
 	u8         bf[0x1];
-	u8         reserved_36[0x1];
+	u8         reserved_at_260[0x1];
 	u8         pad_tx_eth_packet[0x1];
-	u8         reserved_37[0x8];
+	u8         reserved_at_262[0x8];
 	u8         log_bf_reg_size[0x5];
-	u8         reserved_38[0x10];
+	u8         reserved_at_26f[0x10];
 
-	u8         reserved_39[0x10];
+	u8         reserved_at_27f[0x10];
 	u8         max_wqe_sz_sq[0x10];
 
-	u8         reserved_40[0x10];
+	u8         reserved_at_29f[0x10];
 	u8         max_wqe_sz_rq[0x10];
 
-	u8         reserved_41[0x10];
+	u8         reserved_at_2bf[0x10];
 	u8         max_wqe_sz_sq_dc[0x10];
 
-	u8         reserved_42[0x7];
+	u8         reserved_at_2df[0x7];
 	u8         max_qp_mcg[0x19];
 
-	u8         reserved_43[0x18];
+	u8         reserved_at_2ff[0x18];
 	u8         log_max_mcg[0x8];
 
-	u8         reserved_44[0x3];
+	u8         reserved_at_31f[0x3];
 	u8         log_max_transport_domain[0x5];
-	u8         reserved_45[0x3];
+	u8         reserved_at_327[0x3];
 	u8         log_max_pd[0x5];
-	u8         reserved_46[0xb];
+	u8         reserved_at_32f[0xb];
 	u8         log_max_xrcd[0x5];
 
-	u8         reserved_47[0x20];
+	u8         reserved_at_33f[0x20];
 
-	u8         reserved_48[0x3];
+	u8         reserved_at_35f[0x3];
 	u8         log_max_rq[0x5];
-	u8         reserved_49[0x3];
+	u8         reserved_at_367[0x3];
 	u8         log_max_sq[0x5];
-	u8         reserved_50[0x3];
+	u8         reserved_at_36f[0x3];
 	u8         log_max_tir[0x5];
-	u8         reserved_51[0x3];
+	u8         reserved_at_377[0x3];
 	u8         log_max_tis[0x5];
 
 	u8         basic_cyclic_rcv_wqe[0x1];
-	u8         reserved_52[0x2];
+	u8         reserved_at_380[0x2];
 	u8         log_max_rmp[0x5];
-	u8         reserved_53[0x3];
+	u8         reserved_at_387[0x3];
 	u8         log_max_rqt[0x5];
-	u8         reserved_54[0x3];
+	u8         reserved_at_38f[0x3];
 	u8         log_max_rqt_size[0x5];
-	u8         reserved_55[0x3];
+	u8         reserved_at_397[0x3];
 	u8         log_max_tis_per_sq[0x5];
 
-	u8         reserved_56[0x3];
+	u8         reserved_at_39f[0x3];
 	u8         log_max_stride_sz_rq[0x5];
-	u8         reserved_57[0x3];
+	u8         reserved_at_3a7[0x3];
 	u8         log_min_stride_sz_rq[0x5];
-	u8         reserved_58[0x3];
+	u8         reserved_at_3af[0x3];
 	u8         log_max_stride_sz_sq[0x5];
-	u8         reserved_59[0x3];
+	u8         reserved_at_3b7[0x3];
 	u8         log_min_stride_sz_sq[0x5];
 
-	u8         reserved_60[0x1b];
+	u8         reserved_at_3bf[0x1b];
 	u8         log_max_wq_sz[0x5];
 
 	u8         nic_vport_change_event[0x1];
-	u8         reserved_61[0xa];
+	u8         reserved_at_3e0[0xa];
 	u8         log_max_vlan_list[0x5];
-	u8         reserved_62[0x3];
+	u8         reserved_at_3ef[0x3];
 	u8         log_max_current_mc_list[0x5];
-	u8         reserved_63[0x3];
+	u8         reserved_at_3f7[0x3];
 	u8         log_max_current_uc_list[0x5];
 
-	u8         reserved_64[0x80];
+	u8         reserved_at_3ff[0x80];
 
-	u8         reserved_65[0x3];
+	u8         reserved_at_47f[0x3];
 	u8         log_max_l2_table[0x5];
-	u8         reserved_66[0x8];
+	u8         reserved_at_487[0x8];
 	u8         log_uar_page_sz[0x10];
 
-	u8         reserved_67[0x20];
+	u8         reserved_at_49f[0x20];
 	u8         device_frequency_mhz[0x20];
 	u8         device_frequency_khz[0x20];
-	u8         reserved_68[0x5f];
+	u8         reserved_at_4ff[0x5f];
 	u8         cqe_zip[0x1];
 
 	u8         cqe_zip_timeout[0x10];
 	u8         cqe_zip_max_num[0x10];
 
-	u8         reserved_69[0x220];
+	u8         reserved_at_57f[0x220];
 };
 
 enum mlx5_flow_destination_type {
@@ -880,7 +880,7 @@
 	u8         destination_type[0x8];
 	u8         destination_id[0x18];
 
-	u8         reserved_0[0x20];
+	u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_fte_match_param_bits {
@@ -890,7 +890,7 @@
 
 	struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers;
 
-	u8         reserved_0[0xa00];
+	u8         reserved_at_600[0xa00];
 };
 
 enum {
@@ -922,18 +922,18 @@
 	u8         wq_signature[0x1];
 	u8         end_padding_mode[0x2];
 	u8         cd_slave[0x1];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         hds_skip_first_sge[0x1];
 	u8         log2_hds_buf_size[0x3];
-	u8         reserved_1[0x7];
+	u8         reserved_at_24[0x7];
 	u8         page_offset[0x5];
 	u8         lwm[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         pd[0x18];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_60[0x8];
 	u8         uar_page[0x18];
 
 	u8         dbr_addr[0x40];
@@ -942,60 +942,60 @@
 
 	u8         sw_counter[0x20];
 
-	u8         reserved_4[0xc];
+	u8         reserved_at_100[0xc];
 	u8         log_wq_stride[0x4];
-	u8         reserved_5[0x3];
+	u8         reserved_at_110[0x3];
 	u8         log_wq_pg_sz[0x5];
-	u8         reserved_6[0x3];
+	u8         reserved_at_118[0x3];
 	u8         log_wq_sz[0x5];
 
-	u8         reserved_7[0x4e0];
+	u8         reserved_at_120[0x4e0];
 
 	struct mlx5_ifc_cmd_pas_bits pas[0];
 };
 
 struct mlx5_ifc_rq_num_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         rq_num[0x18];
 };
 
 struct mlx5_ifc_mac_address_layout_bits {
-	u8         reserved_0[0x10];
+	u8         reserved_at_0[0x10];
 	u8         mac_addr_47_32[0x10];
 
 	u8         mac_addr_31_0[0x20];
 };
 
 struct mlx5_ifc_vlan_layout_bits {
-	u8         reserved_0[0x14];
+	u8         reserved_at_0[0x14];
 	u8         vlan[0x0c];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_cong_control_r_roce_ecn_np_bits {
-	u8         reserved_0[0xa0];
+	u8         reserved_at_0[0xa0];
 
 	u8         min_time_between_cnps[0x20];
 
-	u8         reserved_1[0x12];
+	u8         reserved_at_c0[0x12];
 	u8         cnp_dscp[0x6];
-	u8         reserved_2[0x5];
+	u8         reserved_at_d8[0x5];
 	u8         cnp_802p_prio[0x3];
 
-	u8         reserved_3[0x720];
+	u8         reserved_at_e0[0x720];
 };
 
 struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits {
-	u8         reserved_0[0x60];
+	u8         reserved_at_0[0x60];
 
-	u8         reserved_1[0x4];
+	u8         reserved_at_60[0x4];
 	u8         clamp_tgt_rate[0x1];
-	u8         reserved_2[0x3];
+	u8         reserved_at_65[0x3];
 	u8         clamp_tgt_rate_after_time_inc[0x1];
-	u8         reserved_3[0x17];
+	u8         reserved_at_69[0x17];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_80[0x20];
 
 	u8         rpg_time_reset[0x20];
 
@@ -1015,7 +1015,7 @@
 
 	u8         rpg_min_rate[0x20];
 
-	u8         reserved_5[0xe0];
+	u8         reserved_at_1c0[0xe0];
 
 	u8         rate_to_set_on_first_cnp[0x20];
 
@@ -1025,15 +1025,15 @@
 
 	u8         rate_reduce_monitor_period[0x20];
 
-	u8         reserved_6[0x20];
+	u8         reserved_at_320[0x20];
 
 	u8         initial_alpha_value[0x20];
 
-	u8         reserved_7[0x4a0];
+	u8         reserved_at_360[0x4a0];
 };
 
 struct mlx5_ifc_cong_control_802_1qau_rp_bits {
-	u8         reserved_0[0x80];
+	u8         reserved_at_0[0x80];
 
 	u8         rppp_max_rps[0x20];
 
@@ -1055,7 +1055,7 @@
 
 	u8         rpg_min_rate[0x20];
 
-	u8         reserved_1[0x640];
+	u8         reserved_at_1c0[0x640];
 };
 
 enum {
@@ -1205,7 +1205,7 @@
 
 	u8         successful_recovery_events[0x20];
 
-	u8         reserved_0[0x180];
+	u8         reserved_at_640[0x180];
 };
 
 struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
@@ -1213,7 +1213,7 @@
 
 	u8         transmit_queue_low[0x20];
 
-	u8         reserved_0[0x780];
+	u8         reserved_at_40[0x780];
 };
 
 struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
@@ -1221,7 +1221,7 @@
 
 	u8         rx_octets_low[0x20];
 
-	u8         reserved_0[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	u8         rx_frames_high[0x20];
 
@@ -1231,7 +1231,7 @@
 
 	u8         tx_octets_low[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_180[0xc0];
 
 	u8         tx_frames_high[0x20];
 
@@ -1257,7 +1257,7 @@
 
 	u8         rx_pause_transition_low[0x20];
 
-	u8         reserved_2[0x400];
+	u8         reserved_at_3c0[0x400];
 };
 
 struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
@@ -1265,7 +1265,7 @@
 
 	u8         port_transmit_wait_low[0x20];
 
-	u8         reserved_0[0x780];
+	u8         reserved_at_40[0x780];
 };
 
 struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
@@ -1333,7 +1333,7 @@
 
 	u8         dot3out_pause_frames_low[0x20];
 
-	u8         reserved_0[0x3c0];
+	u8         reserved_at_400[0x3c0];
 };
 
 struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits {
@@ -1421,7 +1421,7 @@
 
 	u8         ether_stats_pkts8192to10239octets_low[0x20];
 
-	u8         reserved_0[0x280];
+	u8         reserved_at_540[0x280];
 };
 
 struct mlx5_ifc_eth_2863_cntrs_grp_data_layout_bits {
@@ -1477,7 +1477,7 @@
 
 	u8         if_out_broadcast_pkts_low[0x20];
 
-	u8         reserved_0[0x480];
+	u8         reserved_at_340[0x480];
 };
 
 struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
@@ -1557,54 +1557,54 @@
 
 	u8         a_pause_mac_ctrl_frames_transmitted_low[0x20];
 
-	u8         reserved_0[0x300];
+	u8         reserved_at_4c0[0x300];
 };
 
 struct mlx5_ifc_cmd_inter_comp_event_bits {
 	u8         command_completion_vector[0x20];
 
-	u8         reserved_0[0xc0];
+	u8         reserved_at_20[0xc0];
 };
 
 struct mlx5_ifc_stall_vl_event_bits {
-	u8         reserved_0[0x18];
+	u8         reserved_at_0[0x18];
 	u8         port_num[0x1];
-	u8         reserved_1[0x3];
+	u8         reserved_at_19[0x3];
 	u8         vl[0x4];
 
-	u8         reserved_2[0xa0];
+	u8         reserved_at_20[0xa0];
 };
 
 struct mlx5_ifc_db_bf_congestion_event_bits {
 	u8         event_subtype[0x8];
-	u8         reserved_0[0x8];
+	u8         reserved_at_8[0x8];
 	u8         congestion_level[0x8];
-	u8         reserved_1[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_2[0xa0];
+	u8         reserved_at_20[0xa0];
 };
 
 struct mlx5_ifc_gpio_event_bits {
-	u8         reserved_0[0x60];
+	u8         reserved_at_0[0x60];
 
 	u8         gpio_event_hi[0x20];
 
 	u8         gpio_event_lo[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_a0[0x40];
 };
 
 struct mlx5_ifc_port_state_change_event_bits {
-	u8         reserved_0[0x40];
+	u8         reserved_at_0[0x40];
 
 	u8         port_num[0x4];
-	u8         reserved_1[0x1c];
+	u8         reserved_at_44[0x1c];
 
-	u8         reserved_2[0x80];
+	u8         reserved_at_60[0x80];
 };
 
 struct mlx5_ifc_dropped_packet_logged_bits {
-	u8         reserved_0[0xe0];
+	u8         reserved_at_0[0xe0];
 };
 
 enum {
@@ -1613,15 +1613,15 @@
 };
 
 struct mlx5_ifc_cq_error_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         syndrome[0x8];
 
-	u8         reserved_3[0x80];
+	u8         reserved_at_60[0x80];
 };
 
 struct mlx5_ifc_rdma_page_fault_event_bits {
@@ -1629,14 +1629,14 @@
 
 	u8         r_key[0x20];
 
-	u8         reserved_0[0x10];
+	u8         reserved_at_40[0x10];
 	u8         packet_len[0x10];
 
 	u8         rdma_op_len[0x20];
 
 	u8         rdma_va[0x40];
 
-	u8         reserved_1[0x5];
+	u8         reserved_at_c0[0x5];
 	u8         rdma[0x1];
 	u8         write[0x1];
 	u8         requestor[0x1];
@@ -1646,15 +1646,15 @@
 struct mlx5_ifc_wqe_associated_page_fault_event_bits {
 	u8         bytes_committed[0x20];
 
-	u8         reserved_0[0x10];
+	u8         reserved_at_20[0x10];
 	u8         wqe_index[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_40[0x10];
 	u8         len[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_60[0x60];
 
-	u8         reserved_3[0x5];
+	u8         reserved_at_c0[0x5];
 	u8         rdma[0x1];
 	u8         write_read[0x1];
 	u8         requestor[0x1];
@@ -1662,26 +1662,26 @@
 };
 
 struct mlx5_ifc_qp_events_bits {
-	u8         reserved_0[0xa0];
+	u8         reserved_at_0[0xa0];
 
 	u8         type[0x8];
-	u8         reserved_1[0x18];
+	u8         reserved_at_a8[0x18];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         qpn_rqn_sqn[0x18];
 };
 
 struct mlx5_ifc_dct_events_bits {
-	u8         reserved_0[0xc0];
+	u8         reserved_at_0[0xc0];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         dct_number[0x18];
 };
 
 struct mlx5_ifc_comp_event_bits {
-	u8         reserved_0[0xc0];
+	u8         reserved_at_0[0xc0];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         cq_number[0x18];
 };
 
@@ -1754,41 +1754,41 @@
 
 struct mlx5_ifc_qpc_bits {
 	u8         state[0x4];
-	u8         reserved_0[0x4];
+	u8         reserved_at_4[0x4];
 	u8         st[0x8];
-	u8         reserved_1[0x3];
+	u8         reserved_at_10[0x3];
 	u8         pm_state[0x2];
-	u8         reserved_2[0x7];
+	u8         reserved_at_15[0x7];
 	u8         end_padding_mode[0x2];
-	u8         reserved_3[0x2];
+	u8         reserved_at_1e[0x2];
 
 	u8         wq_signature[0x1];
 	u8         block_lb_mc[0x1];
 	u8         atomic_like_write_en[0x1];
 	u8         latency_sensitive[0x1];
-	u8         reserved_4[0x1];
+	u8         reserved_at_24[0x1];
 	u8         drain_sigerr[0x1];
-	u8         reserved_5[0x2];
+	u8         reserved_at_26[0x2];
 	u8         pd[0x18];
 
 	u8         mtu[0x3];
 	u8         log_msg_max[0x5];
-	u8         reserved_6[0x1];
+	u8         reserved_at_48[0x1];
 	u8         log_rq_size[0x4];
 	u8         log_rq_stride[0x3];
 	u8         no_sq[0x1];
 	u8         log_sq_size[0x4];
-	u8         reserved_7[0x6];
+	u8         reserved_at_55[0x6];
 	u8         rlky[0x1];
-	u8         reserved_8[0x4];
+	u8         reserved_at_5c[0x4];
 
 	u8         counter_set_id[0x8];
 	u8         uar_page[0x18];
 
-	u8         reserved_9[0x8];
+	u8         reserved_at_80[0x8];
 	u8         user_index[0x18];
 
-	u8         reserved_10[0x3];
+	u8         reserved_at_a0[0x3];
 	u8         log_page_size[0x5];
 	u8         remote_qpn[0x18];
 
@@ -1797,66 +1797,66 @@
 	struct mlx5_ifc_ads_bits secondary_address_path;
 
 	u8         log_ack_req_freq[0x4];
-	u8         reserved_11[0x4];
+	u8         reserved_at_384[0x4];
 	u8         log_sra_max[0x3];
-	u8         reserved_12[0x2];
+	u8         reserved_at_38b[0x2];
 	u8         retry_count[0x3];
 	u8         rnr_retry[0x3];
-	u8         reserved_13[0x1];
+	u8         reserved_at_393[0x1];
 	u8         fre[0x1];
 	u8         cur_rnr_retry[0x3];
 	u8         cur_retry_count[0x3];
-	u8         reserved_14[0x5];
+	u8         reserved_at_39b[0x5];
 
-	u8         reserved_15[0x20];
+	u8         reserved_at_3a0[0x20];
 
-	u8         reserved_16[0x8];
+	u8         reserved_at_3c0[0x8];
 	u8         next_send_psn[0x18];
 
-	u8         reserved_17[0x8];
+	u8         reserved_at_3e0[0x8];
 	u8         cqn_snd[0x18];
 
-	u8         reserved_18[0x40];
+	u8         reserved_at_400[0x40];
 
-	u8         reserved_19[0x8];
+	u8         reserved_at_440[0x8];
 	u8         last_acked_psn[0x18];
 
-	u8         reserved_20[0x8];
+	u8         reserved_at_460[0x8];
 	u8         ssn[0x18];
 
-	u8         reserved_21[0x8];
+	u8         reserved_at_480[0x8];
 	u8         log_rra_max[0x3];
-	u8         reserved_22[0x1];
+	u8         reserved_at_48b[0x1];
 	u8         atomic_mode[0x4];
 	u8         rre[0x1];
 	u8         rwe[0x1];
 	u8         rae[0x1];
-	u8         reserved_23[0x1];
+	u8         reserved_at_493[0x1];
 	u8         page_offset[0x6];
-	u8         reserved_24[0x3];
+	u8         reserved_at_49a[0x3];
 	u8         cd_slave_receive[0x1];
 	u8         cd_slave_send[0x1];
 	u8         cd_master[0x1];
 
-	u8         reserved_25[0x3];
+	u8         reserved_at_4a0[0x3];
 	u8         min_rnr_nak[0x5];
 	u8         next_rcv_psn[0x18];
 
-	u8         reserved_26[0x8];
+	u8         reserved_at_4c0[0x8];
 	u8         xrcd[0x18];
 
-	u8         reserved_27[0x8];
+	u8         reserved_at_4e0[0x8];
 	u8         cqn_rcv[0x18];
 
 	u8         dbr_addr[0x40];
 
 	u8         q_key[0x20];
 
-	u8         reserved_28[0x5];
+	u8         reserved_at_560[0x5];
 	u8         rq_type[0x3];
 	u8         srqn_rmpn[0x18];
 
-	u8         reserved_29[0x8];
+	u8         reserved_at_580[0x8];
 	u8         rmsn[0x18];
 
 	u8         hw_sq_wqebb_counter[0x10];
@@ -1866,33 +1866,33 @@
 
 	u8         sw_rq_counter[0x20];
 
-	u8         reserved_30[0x20];
+	u8         reserved_at_600[0x20];
 
-	u8         reserved_31[0xf];
+	u8         reserved_at_620[0xf];
 	u8         cgs[0x1];
 	u8         cs_req[0x8];
 	u8         cs_res[0x8];
 
 	u8         dc_access_key[0x40];
 
-	u8         reserved_32[0xc0];
+	u8         reserved_at_680[0xc0];
 };
 
 struct mlx5_ifc_roce_addr_layout_bits {
 	u8         source_l3_address[16][0x8];
 
-	u8         reserved_0[0x3];
+	u8         reserved_at_80[0x3];
 	u8         vlan_valid[0x1];
 	u8         vlan_id[0xc];
 	u8         source_mac_47_32[0x10];
 
 	u8         source_mac_31_0[0x20];
 
-	u8         reserved_1[0x14];
+	u8         reserved_at_c0[0x14];
 	u8         roce_l3_type[0x4];
 	u8         roce_version[0x8];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_e0[0x20];
 };
 
 union mlx5_ifc_hca_cap_union_bits {
@@ -1904,7 +1904,7 @@
 	struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
 	struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
-	u8         reserved_0[0x8000];
+	u8         reserved_at_0[0x8000];
 };
 
 enum {
@@ -1914,24 +1914,24 @@
 };
 
 struct mlx5_ifc_flow_context_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         group_id[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         flow_tag[0x18];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_60[0x10];
 	u8         action[0x10];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_80[0x8];
 	u8         destination_list_size[0x18];
 
-	u8         reserved_4[0x160];
+	u8         reserved_at_a0[0x160];
 
 	struct mlx5_ifc_fte_match_param_bits match_value;
 
-	u8         reserved_5[0x600];
+	u8         reserved_at_1200[0x600];
 
 	struct mlx5_ifc_dest_format_struct_bits destination[0];
 };
@@ -1944,43 +1944,43 @@
 struct mlx5_ifc_xrc_srqc_bits {
 	u8         state[0x4];
 	u8         log_xrc_srq_size[0x4];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_22[0x1];
 	u8         rlky[0x1];
 	u8         basic_cyclic_rcv_wqe[0x1];
 	u8         log_rq_stride[0x3];
 	u8         xrcd[0x18];
 
 	u8         page_offset[0x6];
-	u8         reserved_2[0x2];
+	u8         reserved_at_46[0x2];
 	u8         cqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         user_index_equal_xrc_srqn[0x1];
-	u8         reserved_4[0x1];
+	u8         reserved_at_81[0x1];
 	u8         log_page_size[0x6];
 	u8         user_index[0x18];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_a0[0x20];
 
-	u8         reserved_6[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         pd[0x18];
 
 	u8         lwm[0x10];
 	u8         wqe_cnt[0x10];
 
-	u8         reserved_7[0x40];
+	u8         reserved_at_100[0x40];
 
 	u8         db_record_addr_h[0x20];
 
 	u8         db_record_addr_l[0x1e];
-	u8         reserved_8[0x2];
+	u8         reserved_at_17e[0x2];
 
-	u8         reserved_9[0x80];
+	u8         reserved_at_180[0x80];
 };
 
 struct mlx5_ifc_traffic_counter_bits {
@@ -1990,16 +1990,16 @@
 };
 
 struct mlx5_ifc_tisc_bits {
-	u8         reserved_0[0xc];
+	u8         reserved_at_0[0xc];
 	u8         prio[0x4];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x100];
+	u8         reserved_at_20[0x100];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_120[0x8];
 	u8         transport_domain[0x18];
 
-	u8         reserved_4[0x3c0];
+	u8         reserved_at_140[0x3c0];
 };
 
 enum {
@@ -2024,31 +2024,31 @@
 };
 
 struct mlx5_ifc_tirc_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         disp_type[0x4];
-	u8         reserved_1[0x1c];
+	u8         reserved_at_24[0x1c];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
-	u8         reserved_3[0x4];
+	u8         reserved_at_80[0x4];
 	u8         lro_timeout_period_usecs[0x10];
 	u8         lro_enable_mask[0x4];
 	u8         lro_max_ip_payload_size[0x8];
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_a0[0x40];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_e0[0x8];
 	u8         inline_rqn[0x18];
 
 	u8         rx_hash_symmetric[0x1];
-	u8         reserved_6[0x1];
+	u8         reserved_at_101[0x1];
 	u8         tunneled_offload_en[0x1];
-	u8         reserved_7[0x5];
+	u8         reserved_at_103[0x5];
 	u8         indirect_table[0x18];
 
 	u8         rx_hash_fn[0x4];
-	u8         reserved_8[0x2];
+	u8         reserved_at_124[0x2];
 	u8         self_lb_block[0x2];
 	u8         transport_domain[0x18];
 
@@ -2058,7 +2058,7 @@
 
 	struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner;
 
-	u8         reserved_9[0x4c0];
+	u8         reserved_at_2c0[0x4c0];
 };
 
 enum {
@@ -2069,39 +2069,39 @@
 struct mlx5_ifc_srqc_bits {
 	u8         state[0x4];
 	u8         log_srq_size[0x4];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_22[0x1];
 	u8         rlky[0x1];
-	u8         reserved_2[0x1];
+	u8         reserved_at_24[0x1];
 	u8         log_rq_stride[0x3];
 	u8         xrcd[0x18];
 
 	u8         page_offset[0x6];
-	u8         reserved_3[0x2];
+	u8         reserved_at_46[0x2];
 	u8         cqn[0x18];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_60[0x20];
 
-	u8         reserved_5[0x2];
+	u8         reserved_at_80[0x2];
 	u8         log_page_size[0x6];
-	u8         reserved_6[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_7[0x20];
+	u8         reserved_at_a0[0x20];
 
-	u8         reserved_8[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         pd[0x18];
 
 	u8         lwm[0x10];
 	u8         wqe_cnt[0x10];
 
-	u8         reserved_9[0x40];
+	u8         reserved_at_100[0x40];
 
 	u8         dbr_addr[0x40];
 
-	u8         reserved_10[0x80];
+	u8         reserved_at_180[0x80];
 };
 
 enum {
@@ -2115,39 +2115,39 @@
 	u8         cd_master[0x1];
 	u8         fre[0x1];
 	u8         flush_in_error_en[0x1];
-	u8         reserved_0[0x4];
+	u8         reserved_at_4[0x4];
 	u8         state[0x4];
-	u8         reserved_1[0x14];
+	u8         reserved_at_c[0x14];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_20[0x8];
 	u8         user_index[0x18];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_4[0xa0];
+	u8         reserved_at_60[0xa0];
 
 	u8         tis_lst_sz[0x10];
-	u8         reserved_5[0x10];
+	u8         reserved_at_110[0x10];
 
-	u8         reserved_6[0x40];
+	u8         reserved_at_120[0x40];
 
-	u8         reserved_7[0x8];
+	u8         reserved_at_160[0x8];
 	u8         tis_num_0[0x18];
 
 	struct mlx5_ifc_wq_bits wq;
 };
 
 struct mlx5_ifc_rqtc_bits {
-	u8         reserved_0[0xa0];
+	u8         reserved_at_0[0xa0];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_a0[0x10];
 	u8         rqt_max_size[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_c0[0x10];
 	u8         rqt_actual_size[0x10];
 
-	u8         reserved_3[0x6a0];
+	u8         reserved_at_e0[0x6a0];
 
 	struct mlx5_ifc_rq_num_bits rq_num[0];
 };
@@ -2165,27 +2165,27 @@
 
 struct mlx5_ifc_rqc_bits {
 	u8         rlky[0x1];
-	u8         reserved_0[0x2];
+	u8         reserved_at_1[0x2];
 	u8         vsd[0x1];
 	u8         mem_rq_type[0x4];
 	u8         state[0x4];
-	u8         reserved_1[0x1];
+	u8         reserved_at_c[0x1];
 	u8         flush_in_error_en[0x1];
-	u8         reserved_2[0x12];
+	u8         reserved_at_e[0x12];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_20[0x8];
 	u8         user_index[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
 	u8         counter_set_id[0x8];
-	u8         reserved_5[0x18];
+	u8         reserved_at_68[0x18];
 
-	u8         reserved_6[0x8];
+	u8         reserved_at_80[0x8];
 	u8         rmpn[0x18];
 
-	u8         reserved_7[0xe0];
+	u8         reserved_at_a0[0xe0];
 
 	struct mlx5_ifc_wq_bits wq;
 };
@@ -2196,31 +2196,31 @@
 };
 
 struct mlx5_ifc_rmpc_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         state[0x4];
-	u8         reserved_1[0x14];
+	u8         reserved_at_c[0x14];
 
 	u8         basic_cyclic_rcv_wqe[0x1];
-	u8         reserved_2[0x1f];
+	u8         reserved_at_21[0x1f];
 
-	u8         reserved_3[0x140];
+	u8         reserved_at_40[0x140];
 
 	struct mlx5_ifc_wq_bits wq;
 };
 
 struct mlx5_ifc_nic_vport_context_bits {
-	u8         reserved_0[0x1f];
+	u8         reserved_at_0[0x1f];
 	u8         roce_en[0x1];
 
 	u8         arm_change_event[0x1];
-	u8         reserved_1[0x1a];
+	u8         reserved_at_21[0x1a];
 	u8         event_on_mtu[0x1];
 	u8         event_on_promisc_change[0x1];
 	u8         event_on_vlan_change[0x1];
 	u8         event_on_mc_address_change[0x1];
 	u8         event_on_uc_address_change[0x1];
 
-	u8         reserved_2[0xf0];
+	u8         reserved_at_40[0xf0];
 
 	u8         mtu[0x10];
 
@@ -2228,21 +2228,21 @@
 	u8         port_guid[0x40];
 	u8         node_guid[0x40];
 
-	u8         reserved_3[0x140];
+	u8         reserved_at_200[0x140];
 	u8         qkey_violation_counter[0x10];
-	u8         reserved_4[0x430];
+	u8         reserved_at_350[0x430];
 
 	u8         promisc_uc[0x1];
 	u8         promisc_mc[0x1];
 	u8         promisc_all[0x1];
-	u8         reserved_5[0x2];
+	u8         reserved_at_783[0x2];
 	u8         allowed_list_type[0x3];
-	u8         reserved_6[0xc];
+	u8         reserved_at_788[0xc];
 	u8         allowed_list_size[0xc];
 
 	struct mlx5_ifc_mac_address_layout_bits permanent_address;
 
-	u8         reserved_7[0x20];
+	u8         reserved_at_7e0[0x20];
 
 	u8         current_uc_mac_address[0][0x40];
 };
@@ -2254,9 +2254,9 @@
 };
 
 struct mlx5_ifc_mkc_bits {
-	u8         reserved_0[0x1];
+	u8         reserved_at_0[0x1];
 	u8         free[0x1];
-	u8         reserved_1[0xd];
+	u8         reserved_at_2[0xd];
 	u8         small_fence_on_rdma_read_response[0x1];
 	u8         umr_en[0x1];
 	u8         a[0x1];
@@ -2265,19 +2265,19 @@
 	u8         lw[0x1];
 	u8         lr[0x1];
 	u8         access_mode[0x2];
-	u8         reserved_2[0x8];
+	u8         reserved_at_18[0x8];
 
 	u8         qpn[0x18];
 	u8         mkey_7_0[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         length64[0x1];
 	u8         bsf_en[0x1];
 	u8         sync_umr[0x1];
-	u8         reserved_4[0x2];
+	u8         reserved_at_63[0x2];
 	u8         expected_sigerr_count[0x1];
-	u8         reserved_5[0x1];
+	u8         reserved_at_66[0x1];
 	u8         en_rinval[0x1];
 	u8         pd[0x18];
 
@@ -2287,18 +2287,18 @@
 
 	u8         bsf_octword_size[0x20];
 
-	u8         reserved_6[0x80];
+	u8         reserved_at_120[0x80];
 
 	u8         translations_octword_size[0x20];
 
-	u8         reserved_7[0x1b];
+	u8         reserved_at_1c0[0x1b];
 	u8         log_page_size[0x5];
 
-	u8         reserved_8[0x20];
+	u8         reserved_at_1e0[0x20];
 };
 
 struct mlx5_ifc_pkey_bits {
-	u8         reserved_0[0x10];
+	u8         reserved_at_0[0x10];
 	u8         pkey[0x10];
 };
 
@@ -2309,19 +2309,19 @@
 struct mlx5_ifc_hca_vport_context_bits {
 	u8         field_select[0x20];
 
-	u8         reserved_0[0xe0];
+	u8         reserved_at_20[0xe0];
 
 	u8         sm_virt_aware[0x1];
 	u8         has_smi[0x1];
 	u8         has_raw[0x1];
 	u8         grh_required[0x1];
-	u8         reserved_1[0xc];
+	u8         reserved_at_104[0xc];
 	u8         port_physical_state[0x4];
 	u8         vport_state_policy[0x4];
 	u8         port_state[0x4];
 	u8         vport_state[0x4];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_120[0x20];
 
 	u8         system_image_guid[0x40];
 
@@ -2337,33 +2337,33 @@
 
 	u8         cap_mask2_field_select[0x20];
 
-	u8         reserved_3[0x80];
+	u8         reserved_at_280[0x80];
 
 	u8         lid[0x10];
-	u8         reserved_4[0x4];
+	u8         reserved_at_310[0x4];
 	u8         init_type_reply[0x4];
 	u8         lmc[0x3];
 	u8         subnet_timeout[0x5];
 
 	u8         sm_lid[0x10];
 	u8         sm_sl[0x4];
-	u8         reserved_5[0xc];
+	u8         reserved_at_334[0xc];
 
 	u8         qkey_violation_counter[0x10];
 	u8         pkey_violation_counter[0x10];
 
-	u8         reserved_6[0xca0];
+	u8         reserved_at_360[0xca0];
 };
 
 struct mlx5_ifc_esw_vport_context_bits {
-	u8         reserved_0[0x3];
+	u8         reserved_at_0[0x3];
 	u8         vport_svlan_strip[0x1];
 	u8         vport_cvlan_strip[0x1];
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert[0x2];
-	u8         reserved_1[0x18];
+	u8         reserved_at_8[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_20[0x20];
 
 	u8         svlan_cfi[0x1];
 	u8         svlan_pcp[0x3];
@@ -2372,7 +2372,7 @@
 	u8         cvlan_pcp[0x3];
 	u8         cvlan_id[0xc];
 
-	u8         reserved_3[0x7a0];
+	u8         reserved_at_60[0x7a0];
 };
 
 enum {
@@ -2387,41 +2387,41 @@
 
 struct mlx5_ifc_eqc_bits {
 	u8         status[0x4];
-	u8         reserved_0[0x9];
+	u8         reserved_at_4[0x9];
 	u8         ec[0x1];
 	u8         oi[0x1];
-	u8         reserved_1[0x5];
+	u8         reserved_at_f[0x5];
 	u8         st[0x4];
-	u8         reserved_2[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_4[0x14];
+	u8         reserved_at_40[0x14];
 	u8         page_offset[0x6];
-	u8         reserved_5[0x6];
+	u8         reserved_at_5a[0x6];
 
-	u8         reserved_6[0x3];
+	u8         reserved_at_60[0x3];
 	u8         log_eq_size[0x5];
 	u8         uar_page[0x18];
 
-	u8         reserved_7[0x20];
+	u8         reserved_at_80[0x20];
 
-	u8         reserved_8[0x18];
+	u8         reserved_at_a0[0x18];
 	u8         intr[0x8];
 
-	u8         reserved_9[0x3];
+	u8         reserved_at_c0[0x3];
 	u8         log_page_size[0x5];
-	u8         reserved_10[0x18];
+	u8         reserved_at_c8[0x18];
 
-	u8         reserved_11[0x60];
+	u8         reserved_at_e0[0x60];
 
-	u8         reserved_12[0x8];
+	u8         reserved_at_140[0x8];
 	u8         consumer_counter[0x18];
 
-	u8         reserved_13[0x8];
+	u8         reserved_at_160[0x8];
 	u8         producer_counter[0x18];
 
-	u8         reserved_14[0x80];
+	u8         reserved_at_180[0x80];
 };
 
 enum {
@@ -2445,14 +2445,14 @@
 };
 
 struct mlx5_ifc_dctc_bits {
-	u8         reserved_0[0x4];
+	u8         reserved_at_0[0x4];
 	u8         state[0x4];
-	u8         reserved_1[0x18];
+	u8         reserved_at_8[0x18];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_20[0x8];
 	u8         user_index[0x18];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
 	u8         counter_set_id[0x8];
@@ -2464,45 +2464,45 @@
 	u8         latency_sensitive[0x1];
 	u8         rlky[0x1];
 	u8         free_ar[0x1];
-	u8         reserved_4[0xd];
+	u8         reserved_at_73[0xd];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_80[0x8];
 	u8         cs_res[0x8];
-	u8         reserved_6[0x3];
+	u8         reserved_at_90[0x3];
 	u8         min_rnr_nak[0x5];
-	u8         reserved_7[0x8];
+	u8         reserved_at_98[0x8];
 
-	u8         reserved_8[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         srqn[0x18];
 
-	u8         reserved_9[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         pd[0x18];
 
 	u8         tclass[0x8];
-	u8         reserved_10[0x4];
+	u8         reserved_at_e8[0x4];
 	u8         flow_label[0x14];
 
 	u8         dc_access_key[0x40];
 
-	u8         reserved_11[0x5];
+	u8         reserved_at_140[0x5];
 	u8         mtu[0x3];
 	u8         port[0x8];
 	u8         pkey_index[0x10];
 
-	u8         reserved_12[0x8];
+	u8         reserved_at_160[0x8];
 	u8         my_addr_index[0x8];
-	u8         reserved_13[0x8];
+	u8         reserved_at_170[0x8];
 	u8         hop_limit[0x8];
 
 	u8         dc_access_key_violation_count[0x20];
 
-	u8         reserved_14[0x14];
+	u8         reserved_at_1a0[0x14];
 	u8         dei_cfi[0x1];
 	u8         eth_prio[0x3];
 	u8         ecn[0x2];
 	u8         dscp[0x6];
 
-	u8         reserved_15[0x40];
+	u8         reserved_at_1c0[0x40];
 };
 
 enum {
@@ -2524,54 +2524,54 @@
 
 struct mlx5_ifc_cqc_bits {
 	u8         status[0x4];
-	u8         reserved_0[0x4];
+	u8         reserved_at_4[0x4];
 	u8         cqe_sz[0x3];
 	u8         cc[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_c[0x1];
 	u8         scqe_break_moderation_en[0x1];
 	u8         oi[0x1];
-	u8         reserved_2[0x2];
+	u8         reserved_at_f[0x2];
 	u8         cqe_zip_en[0x1];
 	u8         mini_cqe_res_format[0x2];
 	u8         st[0x4];
-	u8         reserved_3[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_5[0x14];
+	u8         reserved_at_40[0x14];
 	u8         page_offset[0x6];
-	u8         reserved_6[0x6];
+	u8         reserved_at_5a[0x6];
 
-	u8         reserved_7[0x3];
+	u8         reserved_at_60[0x3];
 	u8         log_cq_size[0x5];
 	u8         uar_page[0x18];
 
-	u8         reserved_8[0x4];
+	u8         reserved_at_80[0x4];
 	u8         cq_period[0xc];
 	u8         cq_max_count[0x10];
 
-	u8         reserved_9[0x18];
+	u8         reserved_at_a0[0x18];
 	u8         c_eqn[0x8];
 
-	u8         reserved_10[0x3];
+	u8         reserved_at_c0[0x3];
 	u8         log_page_size[0x5];
-	u8         reserved_11[0x18];
+	u8         reserved_at_c8[0x18];
 
-	u8         reserved_12[0x20];
+	u8         reserved_at_e0[0x20];
 
-	u8         reserved_13[0x8];
+	u8         reserved_at_100[0x8];
 	u8         last_notified_index[0x18];
 
-	u8         reserved_14[0x8];
+	u8         reserved_at_120[0x8];
 	u8         last_solicit_index[0x18];
 
-	u8         reserved_15[0x8];
+	u8         reserved_at_140[0x8];
 	u8         consumer_counter[0x18];
 
-	u8         reserved_16[0x8];
+	u8         reserved_at_160[0x8];
 	u8         producer_counter[0x18];
 
-	u8         reserved_17[0x40];
+	u8         reserved_at_180[0x40];
 
 	u8         dbr_addr[0x40];
 };
@@ -2580,16 +2580,16 @@
 	struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp;
 	struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp;
 	struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np;
-	u8         reserved_0[0x800];
+	u8         reserved_at_0[0x800];
 };
 
 struct mlx5_ifc_query_adapter_param_block_bits {
-	u8         reserved_0[0xc0];
+	u8         reserved_at_0[0xc0];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         ieee_vendor_id[0x18];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_e0[0x10];
 	u8         vsd_vendor_id[0x10];
 
 	u8         vsd[208][0x8];
@@ -2600,14 +2600,14 @@
 union mlx5_ifc_modify_field_select_resize_field_select_auto_bits {
 	struct mlx5_ifc_modify_field_select_bits modify_field_select;
 	struct mlx5_ifc_resize_field_select_bits resize_field_select;
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 };
 
 union mlx5_ifc_field_select_802_1_r_roce_auto_bits {
 	struct mlx5_ifc_field_select_802_1qau_rp_bits field_select_802_1qau_rp;
 	struct mlx5_ifc_field_select_r_roce_rp_bits field_select_r_roce_rp;
 	struct mlx5_ifc_field_select_r_roce_np_bits field_select_r_roce_np;
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 };
 
 union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
@@ -2619,7 +2619,7 @@
 	struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
 	struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
 	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
-	u8         reserved_0[0x7c0];
+	u8         reserved_at_0[0x7c0];
 };
 
 union mlx5_ifc_event_auto_bits {
@@ -2635,23 +2635,23 @@
 	struct mlx5_ifc_db_bf_congestion_event_bits db_bf_congestion_event;
 	struct mlx5_ifc_stall_vl_event_bits stall_vl_event;
 	struct mlx5_ifc_cmd_inter_comp_event_bits cmd_inter_comp_event;
-	u8         reserved_0[0xe0];
+	u8         reserved_at_0[0xe0];
 };
 
 struct mlx5_ifc_health_buffer_bits {
-	u8         reserved_0[0x100];
+	u8         reserved_at_0[0x100];
 
 	u8         assert_existptr[0x20];
 
 	u8         assert_callra[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_140[0x40];
 
 	u8         fw_version[0x20];
 
 	u8         hw_id[0x20];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_1c0[0x20];
 
 	u8         irisc_index[0x8];
 	u8         synd[0x8];
@@ -2660,20 +2660,20 @@
 
 struct mlx5_ifc_register_loopback_control_bits {
 	u8         no_lb[0x1];
-	u8         reserved_0[0x7];
+	u8         reserved_at_1[0x7];
 	u8         port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_teardown_hca_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -2683,108 +2683,108 @@
 
 struct mlx5_ifc_teardown_hca_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         profile[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_sqerr2rts_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_sqerr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_sqd2rts_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_sqd2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_set_roce_address_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_roce_address_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         roce_address_index[0x10];
-	u8         reserved_2[0x10];
+	u8         reserved_at_50[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_roce_addr_layout_bits roce_address;
 };
 
 struct mlx5_ifc_set_mad_demux_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -2794,89 +2794,89 @@
 
 struct mlx5_ifc_set_mad_demux_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_3[0x6];
+	u8         reserved_at_60[0x6];
 	u8         demux_mode[0x2];
-	u8         reserved_4[0x18];
+	u8         reserved_at_68[0x18];
 };
 
 struct mlx5_ifc_set_l2_table_entry_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_l2_table_entry_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_40[0x60];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_index[0x18];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_c0[0x20];
 
-	u8         reserved_5[0x13];
+	u8         reserved_at_e0[0x13];
 	u8         vlan_valid[0x1];
 	u8         vlan[0xc];
 
 	struct mlx5_ifc_mac_address_layout_bits mac_address;
 
-	u8         reserved_6[0xc0];
+	u8         reserved_at_140[0xc0];
 };
 
 struct mlx5_ifc_set_issi_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_issi_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         current_issi[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_set_hca_cap_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_hca_cap_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	union mlx5_ifc_hca_cap_union_bits capability;
 };
@@ -2890,156 +2890,156 @@
 
 struct mlx5_ifc_set_fte_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_fte_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x18];
+	u8         reserved_at_c0[0x18];
 	u8         modify_enable_mask[0x8];
 
-	u8         reserved_6[0x20];
+	u8         reserved_at_e0[0x20];
 
 	u8         flow_index[0x20];
 
-	u8         reserved_7[0xe0];
+	u8         reserved_at_120[0xe0];
 
 	struct mlx5_ifc_flow_context_bits flow_context;
 };
 
 struct mlx5_ifc_rts2rts_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rts2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_rtr2rts_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rtr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_rst2init_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rst2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_query_xrc_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_2[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrc_srqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -3049,13 +3049,13 @@
 
 struct mlx5_ifc_query_vport_state_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_60[0x18];
 	u8         admin_state[0x4];
 	u8         state[0x4];
 };
@@ -3067,25 +3067,25 @@
 
 struct mlx5_ifc_query_vport_state_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_vport_counter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_traffic_counter_bits received_errors;
 
@@ -3111,7 +3111,7 @@
 
 	struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast;
 
-	u8         reserved_2[0xa00];
+	u8         reserved_at_680[0xa00];
 };
 
 enum {
@@ -3120,328 +3120,328 @@
 
 struct mlx5_ifc_query_vport_counter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x60];
+	u8         reserved_at_60[0x60];
 
 	u8         clear[0x1];
-	u8         reserved_4[0x1f];
+	u8         reserved_at_c1[0x1f];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_e0[0x20];
 };
 
 struct mlx5_ifc_query_tis_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_tisc_bits tis_context;
 };
 
 struct mlx5_ifc_query_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tisn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_tir_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_tirc_bits tir_context;
 };
 
 struct mlx5_ifc_query_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tirn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_srqc_bits srq_context_entry;
 
-	u8         reserved_2[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         srqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_sq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_sqc_bits sq_context;
 };
 
 struct mlx5_ifc_query_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         sqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_special_contexts_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         resd_lkey[0x20];
 };
 
 struct mlx5_ifc_query_special_contexts_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_rqt_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rqtc_bits rqt_context;
 };
 
 struct mlx5_ifc_query_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqtn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rqc_bits rq_context;
 };
 
 struct mlx5_ifc_query_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_roce_address_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_roce_addr_layout_bits roce_address;
 };
 
 struct mlx5_ifc_query_roce_address_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         roce_address_index[0x10];
-	u8         reserved_2[0x10];
+	u8         reserved_at_50[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_rmp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rmpc_bits rmp_context;
 };
 
 struct mlx5_ifc_query_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rmpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_3[0x80];
+	u8         reserved_at_800[0x80];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_q_counter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         rx_write_requests[0x20];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_a0[0x20];
 
 	u8         rx_read_requests[0x20];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_e0[0x20];
 
 	u8         rx_atomic_requests[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_120[0x20];
 
 	u8         rx_dct_connect[0x20];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_160[0x20];
 
 	u8         out_of_buffer[0x20];
 
-	u8         reserved_6[0x20];
+	u8         reserved_at_1a0[0x20];
 
 	u8         out_of_sequence[0x20];
 
-	u8         reserved_7[0x620];
+	u8         reserved_at_1e0[0x620];
 };
 
 struct mlx5_ifc_query_q_counter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x80];
+	u8         reserved_at_40[0x80];
 
 	u8         clear[0x1];
-	u8         reserved_3[0x1f];
+	u8         reserved_at_c1[0x1f];
 
-	u8         reserved_4[0x18];
+	u8         reserved_at_e0[0x18];
 	u8         counter_set_id[0x8];
 };
 
 struct mlx5_ifc_query_pages_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
 	u8         num_pages[0x20];
@@ -3455,55 +3455,55 @@
 
 struct mlx5_ifc_query_pages_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_nic_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
 };
 
 struct mlx5_ifc_query_nic_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x5];
+	u8         reserved_at_60[0x5];
 	u8         allowed_list_type[0x3];
-	u8         reserved_4[0x18];
+	u8         reserved_at_68[0x18];
 };
 
 struct mlx5_ifc_query_mkey_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
-	u8         reserved_2[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         bsf0_klm0_pas_mtt0_1[16][0x8];
 
@@ -3512,265 +3512,265 @@
 
 struct mlx5_ifc_query_mkey_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         mkey_index[0x18];
 
 	u8         pg_access[0x1];
-	u8         reserved_3[0x1f];
+	u8         reserved_at_61[0x1f];
 };
 
 struct mlx5_ifc_query_mad_demux_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         mad_dumux_parameters_block[0x20];
 };
 
 struct mlx5_ifc_query_mad_demux_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_l2_table_entry_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xa0];
+	u8         reserved_at_40[0xa0];
 
-	u8         reserved_2[0x13];
+	u8         reserved_at_e0[0x13];
 	u8         vlan_valid[0x1];
 	u8         vlan[0xc];
 
 	struct mlx5_ifc_mac_address_layout_bits mac_address;
 
-	u8         reserved_3[0xc0];
+	u8         reserved_at_140[0xc0];
 };
 
 struct mlx5_ifc_query_l2_table_entry_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_40[0x60];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_index[0x18];
 
-	u8         reserved_4[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_query_issi_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_40[0x10];
 	u8         current_issi[0x10];
 
-	u8         reserved_2[0xa0];
+	u8         reserved_at_60[0xa0];
 
-	u8         supported_issi_reserved[76][0x8];
+	u8         reserved_at_100[76][0x8];
 	u8         supported_issi_dw0[0x20];
 };
 
 struct mlx5_ifc_query_issi_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_hca_vport_pkey_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_pkey_bits pkey[0];
 };
 
 struct mlx5_ifc_query_hca_vport_pkey_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xb];
+	u8         reserved_at_41[0xb];
 	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         pkey_index[0x10];
 };
 
 struct mlx5_ifc_query_hca_vport_gid_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         gids_num[0x10];
-	u8         reserved_2[0x10];
+	u8         reserved_at_70[0x10];
 
 	struct mlx5_ifc_array128_auto_bits gid[0];
 };
 
 struct mlx5_ifc_query_hca_vport_gid_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xb];
+	u8         reserved_at_41[0xb];
 	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         gid_index[0x10];
 };
 
 struct mlx5_ifc_query_hca_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_hca_vport_context_bits hca_vport_context;
 };
 
 struct mlx5_ifc_query_hca_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xb];
+	u8         reserved_at_41[0xb];
 	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_hca_cap_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	union mlx5_ifc_hca_cap_union_bits capability;
 };
 
 struct mlx5_ifc_query_hca_cap_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x80];
+	u8         reserved_at_40[0x80];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         level[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_d0[0x8];
 	u8         log_size[0x8];
 
-	u8         reserved_4[0x120];
+	u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_query_flow_table_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_query_fte_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x1c0];
+	u8         reserved_at_40[0x1c0];
 
 	struct mlx5_ifc_flow_context_bits flow_context;
 };
 
 struct mlx5_ifc_query_fte_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x40];
+	u8         reserved_at_c0[0x40];
 
 	u8         flow_index[0x20];
 
-	u8         reserved_6[0xe0];
+	u8         reserved_at_120[0xe0];
 };
 
 enum {
@@ -3781,84 +3781,84 @@
 
 struct mlx5_ifc_query_flow_group_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0xa0];
+	u8         reserved_at_40[0xa0];
 
 	u8         start_flow_index[0x20];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_100[0x20];
 
 	u8         end_flow_index[0x20];
 
-	u8         reserved_3[0xa0];
+	u8         reserved_at_140[0xa0];
 
-	u8         reserved_4[0x18];
+	u8         reserved_at_1e0[0x18];
 	u8         match_criteria_enable[0x8];
 
 	struct mlx5_ifc_fte_match_param_bits match_criteria;
 
-	u8         reserved_5[0xe00];
+	u8         reserved_at_1200[0xe00];
 };
 
 struct mlx5_ifc_query_flow_group_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
 	u8         group_id[0x20];
 
-	u8         reserved_5[0x120];
+	u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_query_esw_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_esw_vport_context_bits esw_vport_context;
 };
 
 struct mlx5_ifc_query_esw_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_modify_esw_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_esw_vport_context_fields_select_bits {
-	u8         reserved[0x1c];
+	u8         reserved_at_0[0x1c];
 	u8         vport_cvlan_insert[0x1];
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_strip[0x1];
@@ -3867,13 +3867,13 @@
 
 struct mlx5_ifc_modify_esw_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
 	struct mlx5_ifc_esw_vport_context_fields_select_bits field_select;
@@ -3883,124 +3883,124 @@
 
 struct mlx5_ifc_query_eq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_eqc_bits eq_context_entry;
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_280[0x40];
 
 	u8         event_bitmask[0x40];
 
-	u8         reserved_3[0x580];
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_eq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         eq_number[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_dctc_bits dct_context_entry;
 
-	u8         reserved_2[0x180];
+	u8         reserved_at_280[0x180];
 };
 
 struct mlx5_ifc_query_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dctn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_2[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_query_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_status_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         enable[0x1];
 	u8         tag_enable[0x1];
-	u8         reserved_2[0x1e];
+	u8         reserved_at_62[0x1e];
 };
 
 struct mlx5_ifc_query_cong_status_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         priority[0x4];
 	u8         cong_protocol[0x4];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_statistics_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         cur_flows[0x20];
 
@@ -4014,7 +4014,7 @@
 
 	u8         cnp_handled_low[0x20];
 
-	u8         reserved_2[0x100];
+	u8         reserved_at_140[0x100];
 
 	u8         time_stamp_high[0x20];
 
@@ -4030,453 +4030,455 @@
 
 	u8         cnps_sent_low[0x20];
 
-	u8         reserved_3[0x560];
+	u8         reserved_at_320[0x560];
 };
 
 struct mlx5_ifc_query_cong_statistics_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         clear[0x1];
-	u8         reserved_2[0x1f];
+	u8         reserved_at_41[0x1f];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_cong_params_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters;
 };
 
 struct mlx5_ifc_query_cong_params_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x1c];
+	u8         reserved_at_40[0x1c];
 	u8         cong_protocol[0x4];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_query_adapter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_query_adapter_param_block_bits query_adapter_struct;
 };
 
 struct mlx5_ifc_query_adapter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2rst_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2rst_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_qp_2err_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_qp_2err_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_page_fault_resume_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_page_fault_resume_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         error[0x1];
-	u8         reserved_2[0x4];
+	u8         reserved_at_41[0x4];
 	u8         rdma[0x1];
 	u8         read_write[0x1];
 	u8         req_res[0x1];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_nop_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_nop_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_vport_state_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_vport_state_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x18];
+	u8         reserved_at_60[0x18];
 	u8         admin_state[0x4];
-	u8         reserved_4[0x4];
+	u8         reserved_at_7c[0x4];
 };
 
 struct mlx5_ifc_modify_tis_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_tis_bitmask_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
-	u8         reserved_1[0x1f];
+	u8         reserved_at_20[0x1f];
 	u8         prio[0x1];
 };
 
 struct mlx5_ifc_modify_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tisn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_modify_tis_bitmask_bits bitmask;
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_tisc_bits ctx;
 };
 
 struct mlx5_ifc_modify_tir_bitmask_bits {
-	u8	   reserved_0[0x20];
+	u8	   reserved_at_0[0x20];
 
-	u8         reserved_1[0x1b];
+	u8         reserved_at_20[0x1b];
 	u8         self_lb_en[0x1];
-	u8         reserved_2[0x3];
+	u8         reserved_at_3c[0x1];
+	u8         hash[0x1];
+	u8         reserved_at_3e[0x1];
 	u8         lro[0x1];
 };
 
 struct mlx5_ifc_modify_tir_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tirn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_modify_tir_bitmask_bits bitmask;
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_tirc_bits ctx;
 };
 
 struct mlx5_ifc_modify_sq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         sq_state[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_44[0x4];
 	u8         sqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         modify_bitmask[0x40];
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_sqc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rqt_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rqt_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_at_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_at_20[0x1f];
 	u8         rqn_list[0x1];
 };
 
 struct mlx5_ifc_modify_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqtn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_rqt_bitmask_bits bitmask;
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_rqtc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         rq_state[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_44[0x4];
 	u8         rqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         modify_bitmask[0x40];
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_rqc_bits ctx;
 };
 
 struct mlx5_ifc_modify_rmp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_rmp_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_at_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_at_20[0x1f];
 	u8         lwm[0x1];
 };
 
 struct mlx5_ifc_modify_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         rmp_state[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_44[0x4];
 	u8         rmpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_rmp_bitmask_bits bitmask;
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_c0[0x40];
 
 	struct mlx5_ifc_rmpc_bits ctx;
 };
 
 struct mlx5_ifc_modify_nic_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_nic_vport_field_select_bits {
-	u8         reserved_0[0x19];
+	u8         reserved_at_0[0x19];
 	u8         mtu[0x1];
 	u8         change_event[0x1];
 	u8         promisc[0x1];
 	u8         permanent_address[0x1];
 	u8         addresses_list[0x1];
 	u8         roce_en[0x1];
-	u8         reserved_1[0x1];
+	u8         reserved_at_1f[0x1];
 };
 
 struct mlx5_ifc_modify_nic_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xf];
+	u8         reserved_at_41[0xf];
 	u8         vport_number[0x10];
 
 	struct mlx5_ifc_modify_nic_vport_field_select_bits field_select;
 
-	u8         reserved_3[0x780];
+	u8         reserved_at_80[0x780];
 
 	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
 };
 
 struct mlx5_ifc_modify_hca_vport_context_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_hca_vport_context_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         other_vport[0x1];
-	u8         reserved_2[0xb];
+	u8         reserved_at_41[0xb];
 	u8         port_num[0x4];
 	u8         vport_number[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	struct mlx5_ifc_hca_vport_context_bits hca_vport_context;
 };
 
 struct mlx5_ifc_modify_cq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -4486,83 +4488,83 @@
 
 struct mlx5_ifc_modify_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
 	union mlx5_ifc_modify_field_select_resize_field_select_auto_bits modify_field_select_resize_field_select;
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_3[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_modify_cong_status_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_cong_status_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         priority[0x4];
 	u8         cong_protocol[0x4];
 
 	u8         enable[0x1];
 	u8         tag_enable[0x1];
-	u8         reserved_3[0x1e];
+	u8         reserved_at_62[0x1e];
 };
 
 struct mlx5_ifc_modify_cong_params_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_cong_params_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x1c];
+	u8         reserved_at_40[0x1c];
 	u8         cong_protocol[0x4];
 
 	union mlx5_ifc_field_select_802_1_r_roce_auto_bits field_select;
 
-	u8         reserved_3[0x80];
+	u8         reserved_at_80[0x80];
 
 	union mlx5_ifc_cong_control_roce_ecn_auto_bits congestion_parameters;
 };
 
 struct mlx5_ifc_manage_pages_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
 	u8         output_num_entries[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         pas[0][0x40];
 };
@@ -4575,12 +4577,12 @@
 
 struct mlx5_ifc_manage_pages_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
 	u8         input_num_entries[0x20];
@@ -4590,117 +4592,117 @@
 
 struct mlx5_ifc_mad_ifc_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         response_mad_packet[256][0x8];
 };
 
 struct mlx5_ifc_mad_ifc_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         remote_lid[0x10];
-	u8         reserved_2[0x8];
+	u8         reserved_at_50[0x8];
 	u8         port[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         mad[256][0x8];
 };
 
 struct mlx5_ifc_init_hca_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init_hca_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2rtr_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2rtr_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_init2init_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_init2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_800[0x80];
 };
 
 struct mlx5_ifc_get_dropped_packet_log_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         packet_headers_log[128][0x8];
 
@@ -4709,1029 +4711,1029 @@
 
 struct mlx5_ifc_get_dropped_packet_log_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_gen_eqe_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         eq_number[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         eqe[64][0x8];
 };
 
 struct mlx5_ifc_gen_eq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_enable_hca_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 };
 
 struct mlx5_ifc_enable_hca_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_drain_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_drain_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dctn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_disable_hca_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_40[0x20];
 };
 
 struct mlx5_ifc_disable_hca_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_detach_from_mcg_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_detach_from_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         multicast_gid[16][0x8];
 };
 
 struct mlx5_ifc_destroy_xrc_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrc_srqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_tis_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tisn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_tir_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tirn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         srqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_sq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         sqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rqt_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqtn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_rmp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rmpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_psv_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_psv_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         psvn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_mkey_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_mkey_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         mkey_index[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_flow_table_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_destroy_flow_group_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_flow_group_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
 	u8         group_id[0x20];
 
-	u8         reserved_5[0x120];
+	u8         reserved_at_e0[0x120];
 };
 
 struct mlx5_ifc_destroy_eq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_eq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         eq_number[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dctn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_destroy_cq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_destroy_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_delete_vxlan_udp_dport_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_vxlan_udp_dport_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         vxlan_udp_port[0x10];
 };
 
 struct mlx5_ifc_delete_l2_table_entry_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_l2_table_entry_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_40[0x60];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_index[0x18];
 
-	u8         reserved_4[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 struct mlx5_ifc_delete_fte_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_delete_fte_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x40];
+	u8         reserved_at_c0[0x40];
 
 	u8         flow_index[0x20];
 
-	u8         reserved_6[0xe0];
+	u8         reserved_at_120[0xe0];
 };
 
 struct mlx5_ifc_dealloc_xrcd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrcd[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_uar_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_uar_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         uar[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_transport_domain_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_transport_domain_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         transport_domain[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_q_counter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_q_counter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_40[0x18];
 	u8         counter_set_id[0x8];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_dealloc_pd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_dealloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         pd[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_xrc_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrc_srqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_3[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_tis_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tisn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_tisc_bits ctx;
 };
 
 struct mlx5_ifc_create_tir_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         tirn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_tirc_bits ctx;
 };
 
 struct mlx5_ifc_create_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         srqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_srqc_bits srq_context_entry;
 
-	u8         reserved_3[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_sq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         sqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_sqc_bits ctx;
 };
 
 struct mlx5_ifc_create_rqt_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqtn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rqtc_bits rqt_context;
 };
 
 struct mlx5_ifc_create_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rqc_bits ctx;
 };
 
 struct mlx5_ifc_create_rmp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         rmpn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0xc0];
+	u8         reserved_at_40[0xc0];
 
 	struct mlx5_ifc_rmpc_bits ctx;
 };
 
 struct mlx5_ifc_create_qp_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_a0[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_4[0x80];
+	u8         reserved_at_800[0x80];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_psv_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_80[0x8];
 	u8         psv0_index[0x18];
 
-	u8         reserved_3[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         psv1_index[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         psv2_index[0x18];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_e0[0x8];
 	u8         psv3_index[0x18];
 };
 
 struct mlx5_ifc_create_psv_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         num_psv[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_44[0x4];
 	u8         pd[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_mkey_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         mkey_index[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_mkey_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
 	u8         pg_access[0x1];
-	u8         reserved_3[0x1f];
+	u8         reserved_at_61[0x1f];
 
 	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
-	u8         reserved_4[0x80];
+	u8         reserved_at_280[0x80];
 
 	u8         translations_octword_actual_size[0x20];
 
-	u8         reserved_5[0x560];
+	u8         reserved_at_320[0x560];
 
 	u8         klm_pas_mtt[0][0x20];
 };
 
 struct mlx5_ifc_create_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_flow_table_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_a0[0x20];
 
-	u8         reserved_5[0x4];
+	u8         reserved_at_c0[0x4];
 	u8         table_miss_mode[0x4];
 	u8         level[0x8];
-	u8         reserved_6[0x8];
+	u8         reserved_at_d0[0x8];
 	u8         log_size[0x8];
 
-	u8         reserved_7[0x8];
+	u8         reserved_at_e0[0x8];
 	u8         table_miss_id[0x18];
 
-	u8         reserved_8[0x100];
+	u8         reserved_at_100[0x100];
 };
 
 struct mlx5_ifc_create_flow_group_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         group_id[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -5742,134 +5744,134 @@
 
 struct mlx5_ifc_create_flow_group_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_c0[0x20];
 
 	u8         start_flow_index[0x20];
 
-	u8         reserved_6[0x20];
+	u8         reserved_at_100[0x20];
 
 	u8         end_flow_index[0x20];
 
-	u8         reserved_7[0xa0];
+	u8         reserved_at_140[0xa0];
 
-	u8         reserved_8[0x18];
+	u8         reserved_at_1e0[0x18];
 	u8         match_criteria_enable[0x8];
 
 	struct mlx5_ifc_fte_match_param_bits match_criteria;
 
-	u8         reserved_9[0xe00];
+	u8         reserved_at_1200[0xe00];
 };
 
 struct mlx5_ifc_create_eq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x18];
+	u8         reserved_at_40[0x18];
 	u8         eq_number[0x8];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_eq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_eqc_bits eq_context_entry;
 
-	u8         reserved_3[0x40];
+	u8         reserved_at_280[0x40];
 
 	u8         event_bitmask[0x40];
 
-	u8         reserved_4[0x580];
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_create_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dctn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_dctc_bits dct_context_entry;
 
-	u8         reserved_3[0x180];
+	u8         reserved_at_280[0x180];
 };
 
 struct mlx5_ifc_create_cq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         cqn[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_create_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_3[0x600];
+	u8         reserved_at_280[0x600];
 
 	u8         pas[0][0x40];
 };
 
 struct mlx5_ifc_config_int_moderation_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x4];
+	u8         reserved_at_40[0x4];
 	u8         min_delay[0xc];
 	u8         int_vector[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 enum {
@@ -5879,49 +5881,49 @@
 
 struct mlx5_ifc_config_int_moderation_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x4];
+	u8         reserved_at_40[0x4];
 	u8         min_delay[0xc];
 	u8         int_vector[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_attach_to_mcg_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_attach_to_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 
 	u8         multicast_gid[16][0x8];
 };
 
 struct mlx5_ifc_arm_xrc_srq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -5930,25 +5932,25 @@
 
 struct mlx5_ifc_arm_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrc_srqn[0x18];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         lwm[0x10];
 };
 
 struct mlx5_ifc_arm_rq_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 enum {
@@ -5957,179 +5959,179 @@
 
 struct mlx5_ifc_arm_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         srq_number[0x18];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         lwm[0x10];
 };
 
 struct mlx5_ifc_arm_dct_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_arm_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_40[0x8];
 	u8         dct_number[0x18];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_xrcd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         xrcd[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_uar_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         uar[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_uar_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_transport_domain_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         transport_domain[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_transport_domain_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_q_counter_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x18];
+	u8         reserved_at_40[0x18];
 	u8         counter_set_id[0x8];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_q_counter_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_alloc_pd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x8];
+	u8         reserved_at_40[0x8];
 	u8         pd[0x18];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_alloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         vxlan_udp_port[0x10];
 };
 
 struct mlx5_ifc_access_register_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         register_data[0][0x20];
 };
@@ -6141,12 +6143,12 @@
 
 struct mlx5_ifc_access_register_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         register_id[0x10];
 
 	u8         argument[0x20];
@@ -6159,24 +6161,24 @@
 	u8         version[0x4];
 	u8         local_port[0x8];
 	u8         pnat[0x2];
-	u8         reserved_0[0x2];
+	u8         reserved_at_12[0x2];
 	u8         lane[0x4];
-	u8         reserved_1[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_3[0x7];
+	u8         reserved_at_40[0x7];
 	u8         polarity[0x1];
 	u8         ob_tap0[0x8];
 	u8         ob_tap1[0x8];
 	u8         ob_tap2[0x8];
 
-	u8         reserved_4[0xc];
+	u8         reserved_at_60[0xc];
 	u8         ob_preemp_mode[0x4];
 	u8         ob_reg[0x8];
 	u8         ob_bias[0x8];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_80[0x20];
 };
 
 struct mlx5_ifc_slrg_reg_bits {
@@ -6184,36 +6186,36 @@
 	u8         version[0x4];
 	u8         local_port[0x8];
 	u8         pnat[0x2];
-	u8         reserved_0[0x2];
+	u8         reserved_at_12[0x2];
 	u8         lane[0x4];
-	u8         reserved_1[0x8];
+	u8         reserved_at_18[0x8];
 
 	u8         time_to_link_up[0x10];
-	u8         reserved_2[0xc];
+	u8         reserved_at_30[0xc];
 	u8         grade_lane_speed[0x4];
 
 	u8         grade_version[0x8];
 	u8         grade[0x18];
 
-	u8         reserved_3[0x4];
+	u8         reserved_at_60[0x4];
 	u8         height_grade_type[0x4];
 	u8         height_grade[0x18];
 
 	u8         height_dz[0x10];
 	u8         height_dv[0x10];
 
-	u8         reserved_4[0x10];
+	u8         reserved_at_a0[0x10];
 	u8         height_sigma[0x10];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_c0[0x20];
 
-	u8         reserved_6[0x4];
+	u8         reserved_at_e0[0x4];
 	u8         phase_grade_type[0x4];
 	u8         phase_grade[0x18];
 
-	u8         reserved_7[0x8];
+	u8         reserved_at_100[0x8];
 	u8         phase_eo_pos[0x8];
-	u8         reserved_8[0x8];
+	u8         reserved_at_110[0x8];
 	u8         phase_eo_neg[0x8];
 
 	u8         ffe_set_tested[0x10];
@@ -6221,70 +6223,70 @@
 };
 
 struct mlx5_ifc_pvlc_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x1c];
+	u8         reserved_at_20[0x1c];
 	u8         vl_hw_cap[0x4];
 
-	u8         reserved_3[0x1c];
+	u8         reserved_at_40[0x1c];
 	u8         vl_admin[0x4];
 
-	u8         reserved_4[0x1c];
+	u8         reserved_at_60[0x1c];
 	u8         vl_operational[0x4];
 };
 
 struct mlx5_ifc_pude_reg_bits {
 	u8         swid[0x8];
 	u8         local_port[0x8];
-	u8         reserved_0[0x4];
+	u8         reserved_at_10[0x4];
 	u8         admin_status[0x4];
-	u8         reserved_1[0x4];
+	u8         reserved_at_18[0x4];
 	u8         oper_status[0x4];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_ptys_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0xd];
+	u8         reserved_at_10[0xd];
 	u8         proto_mask[0x3];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_20[0x40];
 
 	u8         eth_proto_capability[0x20];
 
 	u8         ib_link_width_capability[0x10];
 	u8         ib_proto_capability[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_a0[0x20];
 
 	u8         eth_proto_admin[0x20];
 
 	u8         ib_link_width_admin[0x10];
 	u8         ib_proto_admin[0x10];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_100[0x20];
 
 	u8         eth_proto_oper[0x20];
 
 	u8         ib_link_width_oper[0x10];
 	u8         ib_proto_oper[0x10];
 
-	u8         reserved_5[0x20];
+	u8         reserved_at_160[0x20];
 
 	u8         eth_proto_lp_advertise[0x20];
 
-	u8         reserved_6[0x60];
+	u8         reserved_at_1a0[0x60];
 };
 
 struct mlx5_ifc_ptas_reg_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         algorithm_options[0x10];
-	u8         reserved_1[0x4];
+	u8         reserved_at_30[0x4];
 	u8         repetitions_mode[0x4];
 	u8         num_of_repetitions[0x8];
 
@@ -6310,13 +6312,13 @@
 	u8         ndeo_error_threshold[0x10];
 
 	u8         mixer_offset_step_size[0x10];
-	u8         reserved_2[0x8];
+	u8         reserved_at_110[0x8];
 	u8         mix90_phase_for_voltage_bath[0x8];
 
 	u8         mixer_offset_start[0x10];
 	u8         mixer_offset_end[0x10];
 
-	u8         reserved_3[0x15];
+	u8         reserved_at_140[0x15];
 	u8         ber_test_time[0xb];
 };
 
@@ -6324,154 +6326,154 @@
 	u8         swid[0x8];
 	u8         local_port[0x8];
 	u8         sub_port[0x8];
-	u8         reserved_0[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_pqdr_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x5];
+	u8         reserved_at_10[0x5];
 	u8         prio[0x3];
-	u8         reserved_2[0x6];
+	u8         reserved_at_18[0x6];
 	u8         mode[0x2];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_20[0x20];
 
-	u8         reserved_4[0x10];
+	u8         reserved_at_40[0x10];
 	u8         min_threshold[0x10];
 
-	u8         reserved_5[0x10];
+	u8         reserved_at_60[0x10];
 	u8         max_threshold[0x10];
 
-	u8         reserved_6[0x10];
+	u8         reserved_at_80[0x10];
 	u8         mark_probability_denominator[0x10];
 
-	u8         reserved_7[0x60];
+	u8         reserved_at_a0[0x60];
 };
 
 struct mlx5_ifc_ppsc_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 
-	u8         reserved_3[0x1c];
+	u8         reserved_at_80[0x1c];
 	u8         wrps_admin[0x4];
 
-	u8         reserved_4[0x1c];
+	u8         reserved_at_a0[0x1c];
 	u8         wrps_status[0x4];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_c0[0x8];
 	u8         up_threshold[0x8];
-	u8         reserved_6[0x8];
+	u8         reserved_at_d0[0x8];
 	u8         down_threshold[0x8];
 
-	u8         reserved_7[0x20];
+	u8         reserved_at_e0[0x20];
 
-	u8         reserved_8[0x1c];
+	u8         reserved_at_100[0x1c];
 	u8         srps_admin[0x4];
 
-	u8         reserved_9[0x1c];
+	u8         reserved_at_120[0x1c];
 	u8         srps_status[0x4];
 
-	u8         reserved_10[0x40];
+	u8         reserved_at_140[0x40];
 };
 
 struct mlx5_ifc_pplr_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_20[0x8];
 	u8         lb_cap[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_30[0x8];
 	u8         lb_en[0x8];
 };
 
 struct mlx5_ifc_pplm_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_20[0x20];
 
 	u8         port_profile_mode[0x8];
 	u8         static_port_profile[0x8];
 	u8         active_port_profile[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_58[0x8];
 
 	u8         retransmission_active[0x8];
 	u8         fec_mode_active[0x18];
 
-	u8         reserved_4[0x20];
+	u8         reserved_at_80[0x20];
 };
 
 struct mlx5_ifc_ppcnt_reg_bits {
 	u8         swid[0x8];
 	u8         local_port[0x8];
 	u8         pnat[0x2];
-	u8         reserved_0[0x8];
+	u8         reserved_at_12[0x8];
 	u8         grp[0x6];
 
 	u8         clr[0x1];
-	u8         reserved_1[0x1c];
+	u8         reserved_at_21[0x1c];
 	u8         prio_tc[0x3];
 
 	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
 struct mlx5_ifc_ppad_reg_bits {
-	u8         reserved_0[0x3];
+	u8         reserved_at_0[0x3];
 	u8         single_mac[0x1];
-	u8         reserved_1[0x4];
+	u8         reserved_at_4[0x4];
 	u8         local_port[0x8];
 	u8         mac_47_32[0x10];
 
 	u8         mac_31_0[0x20];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_pmtu_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         max_mtu[0x10];
-	u8         reserved_2[0x10];
+	u8         reserved_at_30[0x10];
 
 	u8         admin_mtu[0x10];
-	u8         reserved_3[0x10];
+	u8         reserved_at_50[0x10];
 
 	u8         oper_mtu[0x10];
-	u8         reserved_4[0x10];
+	u8         reserved_at_70[0x10];
 };
 
 struct mlx5_ifc_pmpr_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         module[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0x18];
+	u8         reserved_at_20[0x18];
 	u8         attenuation_5g[0x8];
 
-	u8         reserved_3[0x18];
+	u8         reserved_at_40[0x18];
 	u8         attenuation_7g[0x8];
 
-	u8         reserved_4[0x18];
+	u8         reserved_at_60[0x18];
 	u8         attenuation_12g[0x8];
 };
 
 struct mlx5_ifc_pmpe_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         module[0x8];
-	u8         reserved_1[0xc];
+	u8         reserved_at_10[0xc];
 	u8         module_status[0x4];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_pmpc_reg_bits {
@@ -6479,20 +6481,20 @@
 };
 
 struct mlx5_ifc_pmlpn_reg_bits {
-	u8         reserved_0[0x4];
+	u8         reserved_at_0[0x4];
 	u8         mlpn_status[0x4];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         e[0x1];
-	u8         reserved_2[0x1f];
+	u8         reserved_at_21[0x1f];
 };
 
 struct mlx5_ifc_pmlp_reg_bits {
 	u8         rxtx[0x1];
-	u8         reserved_0[0x7];
+	u8         reserved_at_1[0x7];
 	u8         local_port[0x8];
-	u8         reserved_1[0x8];
+	u8         reserved_at_10[0x8];
 	u8         width[0x8];
 
 	u8         lane0_module_mapping[0x20];
@@ -6503,36 +6505,36 @@
 
 	u8         lane3_module_mapping[0x20];
 
-	u8         reserved_2[0x160];
+	u8         reserved_at_a0[0x160];
 };
 
 struct mlx5_ifc_pmaos_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         module[0x8];
-	u8         reserved_1[0x4];
+	u8         reserved_at_10[0x4];
 	u8         admin_status[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_18[0x4];
 	u8         oper_status[0x4];
 
 	u8         ase[0x1];
 	u8         ee[0x1];
-	u8         reserved_3[0x1c];
+	u8         reserved_at_22[0x1c];
 	u8         e[0x2];
 
-	u8         reserved_4[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_plpc_reg_bits {
-	u8         reserved_0[0x4];
+	u8         reserved_at_0[0x4];
 	u8         profile_id[0xc];
-	u8         reserved_1[0x4];
+	u8         reserved_at_10[0x4];
 	u8         proto_mask[0x4];
-	u8         reserved_2[0x8];
+	u8         reserved_at_18[0x8];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_20[0x10];
 	u8         lane_speed[0x10];
 
-	u8         reserved_4[0x17];
+	u8         reserved_at_40[0x17];
 	u8         lpbf[0x1];
 	u8         fec_mode_policy[0x8];
 
@@ -6545,44 +6547,44 @@
 	u8         retransmission_request_admin[0x8];
 	u8         fec_mode_request_admin[0x18];
 
-	u8         reserved_5[0x80];
+	u8         reserved_at_c0[0x80];
 };
 
 struct mlx5_ifc_plib_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x8];
+	u8         reserved_at_10[0x8];
 	u8         ib_port[0x8];
 
-	u8         reserved_2[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_plbf_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0xd];
+	u8         reserved_at_10[0xd];
 	u8         lbf_mode[0x3];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_20[0x20];
 };
 
 struct mlx5_ifc_pipg_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         dic[0x1];
-	u8         reserved_2[0x19];
+	u8         reserved_at_21[0x19];
 	u8         ipg[0x4];
-	u8         reserved_3[0x2];
+	u8         reserved_at_3e[0x2];
 };
 
 struct mlx5_ifc_pifr_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0xe0];
+	u8         reserved_at_20[0xe0];
 
 	u8         port_filter[8][0x20];
 
@@ -6590,36 +6592,36 @@
 };
 
 struct mlx5_ifc_pfcc_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         ppan[0x4];
-	u8         reserved_2[0x4];
+	u8         reserved_at_24[0x4];
 	u8         prio_mask_tx[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_30[0x8];
 	u8         prio_mask_rx[0x8];
 
 	u8         pptx[0x1];
 	u8         aptx[0x1];
-	u8         reserved_4[0x6];
+	u8         reserved_at_42[0x6];
 	u8         pfctx[0x8];
-	u8         reserved_5[0x10];
+	u8         reserved_at_50[0x10];
 
 	u8         pprx[0x1];
 	u8         aprx[0x1];
-	u8         reserved_6[0x6];
+	u8         reserved_at_62[0x6];
 	u8         pfcrx[0x8];
-	u8         reserved_7[0x10];
+	u8         reserved_at_70[0x10];
 
-	u8         reserved_8[0x80];
+	u8         reserved_at_80[0x80];
 };
 
 struct mlx5_ifc_pelc_reg_bits {
 	u8         op[0x4];
-	u8         reserved_0[0x4];
+	u8         reserved_at_4[0x4];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         op_admin[0x8];
 	u8         op_capability[0x8];
@@ -6634,28 +6636,28 @@
 
 	u8         active[0x40];
 
-	u8         reserved_2[0x80];
+	u8         reserved_at_140[0x80];
 };
 
 struct mlx5_ifc_peir_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_2[0xc];
+	u8         reserved_at_20[0xc];
 	u8         error_count[0x4];
-	u8         reserved_3[0x10];
+	u8         reserved_at_30[0x10];
 
-	u8         reserved_4[0xc];
+	u8         reserved_at_40[0xc];
 	u8         lane[0x4];
-	u8         reserved_5[0x8];
+	u8         reserved_at_50[0x8];
 	u8         error_type[0x8];
 };
 
 struct mlx5_ifc_pcap_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_1[0x10];
+	u8         reserved_at_10[0x10];
 
 	u8         port_capability_mask[4][0x20];
 };
@@ -6663,46 +6665,46 @@
 struct mlx5_ifc_paos_reg_bits {
 	u8         swid[0x8];
 	u8         local_port[0x8];
-	u8         reserved_0[0x4];
+	u8         reserved_at_10[0x4];
 	u8         admin_status[0x4];
-	u8         reserved_1[0x4];
+	u8         reserved_at_18[0x4];
 	u8         oper_status[0x4];
 
 	u8         ase[0x1];
 	u8         ee[0x1];
-	u8         reserved_2[0x1c];
+	u8         reserved_at_22[0x1c];
 	u8         e[0x2];
 
-	u8         reserved_3[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_pamp_reg_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         opamp_group[0x8];
-	u8         reserved_1[0xc];
+	u8         reserved_at_10[0xc];
 	u8         opamp_group_type[0x4];
 
 	u8         start_index[0x10];
-	u8         reserved_2[0x4];
+	u8         reserved_at_30[0x4];
 	u8         num_of_indices[0xc];
 
 	u8         index_data[18][0x10];
 };
 
 struct mlx5_ifc_lane_2_module_mapping_bits {
-	u8         reserved_0[0x6];
+	u8         reserved_at_0[0x6];
 	u8         rx_lane[0x2];
-	u8         reserved_1[0x6];
+	u8         reserved_at_8[0x6];
 	u8         tx_lane[0x2];
-	u8         reserved_2[0x8];
+	u8         reserved_at_10[0x8];
 	u8         module[0x8];
 };
 
 struct mlx5_ifc_bufferx_reg_bits {
-	u8         reserved_0[0x6];
+	u8         reserved_at_0[0x6];
 	u8         lossy[0x1];
 	u8         epsb[0x1];
-	u8         reserved_1[0xc];
+	u8         reserved_at_8[0xc];
 	u8         size[0xc];
 
 	u8         xoff_threshold[0x10];
@@ -6714,21 +6716,21 @@
 };
 
 struct mlx5_ifc_register_power_settings_bits {
-	u8         reserved_0[0x18];
+	u8         reserved_at_0[0x18];
 	u8         power_settings_level[0x8];
 
-	u8         reserved_1[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_register_host_endianness_bits {
 	u8         he[0x1];
-	u8         reserved_0[0x1f];
+	u8         reserved_at_1[0x1f];
 
-	u8         reserved_1[0x60];
+	u8         reserved_at_20[0x60];
 };
 
 struct mlx5_ifc_umr_pointer_desc_argument_bits {
-	u8         reserved_0[0x20];
+	u8         reserved_at_0[0x20];
 
 	u8         mkey[0x20];
 
@@ -6741,7 +6743,7 @@
 	u8         dc_key[0x40];
 
 	u8         ext[0x1];
-	u8         reserved_0[0x7];
+	u8         reserved_at_41[0x7];
 	u8         destination_qp_dct[0x18];
 
 	u8         static_rate[0x4];
@@ -6750,7 +6752,7 @@
 	u8         mlid[0x7];
 	u8         rlid_udp_sport[0x10];
 
-	u8         reserved_1[0x20];
+	u8         reserved_at_80[0x20];
 
 	u8         rmac_47_16[0x20];
 
@@ -6758,9 +6760,9 @@
 	u8         tclass[0x8];
 	u8         hop_limit[0x8];
 
-	u8         reserved_2[0x1];
+	u8         reserved_at_e0[0x1];
 	u8         grh[0x1];
-	u8         reserved_3[0x2];
+	u8         reserved_at_e2[0x2];
 	u8         src_addr_index[0x8];
 	u8         flow_label[0x14];
 
@@ -6768,27 +6770,27 @@
 };
 
 struct mlx5_ifc_pages_req_event_bits {
-	u8         reserved_0[0x10];
+	u8         reserved_at_0[0x10];
 	u8         function_id[0x10];
 
 	u8         num_pages[0x20];
 
-	u8         reserved_1[0xa0];
+	u8         reserved_at_40[0xa0];
 };
 
 struct mlx5_ifc_eqe_bits {
-	u8         reserved_0[0x8];
+	u8         reserved_at_0[0x8];
 	u8         event_type[0x8];
-	u8         reserved_1[0x8];
+	u8         reserved_at_10[0x8];
 	u8         event_sub_type[0x8];
 
-	u8         reserved_2[0xe0];
+	u8         reserved_at_20[0xe0];
 
 	union mlx5_ifc_event_auto_bits event_data;
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_1e0[0x10];
 	u8         signature[0x8];
-	u8         reserved_4[0x7];
+	u8         reserved_at_1f8[0x7];
 	u8         owner[0x1];
 };
 
@@ -6798,14 +6800,14 @@
 
 struct mlx5_ifc_cmd_queue_entry_bits {
 	u8         type[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         input_length[0x20];
 
 	u8         input_mailbox_pointer_63_32[0x20];
 
 	u8         input_mailbox_pointer_31_9[0x17];
-	u8         reserved_1[0x9];
+	u8         reserved_at_77[0x9];
 
 	u8         command_input_inline_data[16][0x8];
 
@@ -6814,20 +6816,20 @@
 	u8         output_mailbox_pointer_63_32[0x20];
 
 	u8         output_mailbox_pointer_31_9[0x17];
-	u8         reserved_2[0x9];
+	u8         reserved_at_1b7[0x9];
 
 	u8         output_length[0x20];
 
 	u8         token[0x8];
 	u8         signature[0x8];
-	u8         reserved_3[0x8];
+	u8         reserved_at_1f0[0x8];
 	u8         status[0x7];
 	u8         ownership[0x1];
 };
 
 struct mlx5_ifc_cmd_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
@@ -6836,9 +6838,9 @@
 
 struct mlx5_ifc_cmd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
 	u8         command[0][0x20];
@@ -6847,16 +6849,16 @@
 struct mlx5_ifc_cmd_if_box_bits {
 	u8         mailbox_data[512][0x8];
 
-	u8         reserved_0[0x180];
+	u8         reserved_at_1000[0x180];
 
 	u8         next_pointer_63_32[0x20];
 
 	u8         next_pointer_31_10[0x16];
-	u8         reserved_1[0xa];
+	u8         reserved_at_11b6[0xa];
 
 	u8         block_number[0x20];
 
-	u8         reserved_2[0x8];
+	u8         reserved_at_11e0[0x8];
 	u8         token[0x8];
 	u8         ctrl_signature[0x8];
 	u8         signature[0x8];
@@ -6866,7 +6868,7 @@
 	u8         ptag_63_32[0x20];
 
 	u8         ptag_31_8[0x18];
-	u8         reserved_0[0x6];
+	u8         reserved_at_38[0x6];
 	u8         wr_en[0x1];
 	u8         rd_en[0x1];
 };
@@ -6904,38 +6906,38 @@
 	u8         cmd_interface_rev[0x10];
 	u8         fw_rev_subminor[0x10];
 
-	u8         reserved_0[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         cmdq_phy_addr_63_32[0x20];
 
 	u8         cmdq_phy_addr_31_12[0x14];
-	u8         reserved_1[0x2];
+	u8         reserved_at_b4[0x2];
 	u8         nic_interface[0x2];
 	u8         log_cmdq_size[0x4];
 	u8         log_cmdq_stride[0x4];
 
 	u8         command_doorbell_vector[0x20];
 
-	u8         reserved_2[0xf00];
+	u8         reserved_at_e0[0xf00];
 
 	u8         initializing[0x1];
-	u8         reserved_3[0x4];
+	u8         reserved_at_fe1[0x4];
 	u8         nic_interface_supported[0x3];
-	u8         reserved_4[0x18];
+	u8         reserved_at_fe8[0x18];
 
 	struct mlx5_ifc_health_buffer_bits health_buffer;
 
 	u8         no_dram_nic_offset[0x20];
 
-	u8         reserved_5[0x6e40];
+	u8         reserved_at_1220[0x6e40];
 
-	u8         reserved_6[0x1f];
+	u8         reserved_at_8060[0x1f];
 	u8         clear_int[0x1];
 
 	u8         health_syndrome[0x8];
 	u8         health_counter[0x18];
 
-	u8         reserved_7[0x17fc0];
+	u8         reserved_at_80a0[0x17fc0];
 };
 
 union mlx5_ifc_ports_control_registers_document_bits {
@@ -6980,44 +6982,44 @@
 	struct mlx5_ifc_pvlc_reg_bits pvlc_reg;
 	struct mlx5_ifc_slrg_reg_bits slrg_reg;
 	struct mlx5_ifc_sltp_reg_bits sltp_reg;
-	u8         reserved_0[0x60e0];
+	u8         reserved_at_0[0x60e0];
 };
 
 union mlx5_ifc_debug_enhancements_document_bits {
 	struct mlx5_ifc_health_buffer_bits health_buffer;
-	u8         reserved_0[0x200];
+	u8         reserved_at_0[0x200];
 };
 
 union mlx5_ifc_uplink_pci_interface_document_bits {
 	struct mlx5_ifc_initial_seg_bits initial_seg;
-	u8         reserved_0[0x20060];
+	u8         reserved_at_0[0x20060];
 };
 
 struct mlx5_ifc_set_flow_table_root_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_set_flow_table_root_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x40];
+	u8         reserved_at_40[0x40];
 
 	u8         table_type[0x8];
-	u8         reserved_3[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_4[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_5[0x140];
+	u8         reserved_at_c0[0x140];
 };
 
 enum {
@@ -7026,39 +7028,39 @@
 
 struct mlx5_ifc_modify_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_flow_table_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x20];
+	u8         reserved_at_40[0x20];
 
-	u8         reserved_3[0x10];
+	u8         reserved_at_60[0x10];
 	u8         modify_field_select[0x10];
 
 	u8         table_type[0x8];
-	u8         reserved_4[0x18];
+	u8         reserved_at_88[0x18];
 
-	u8         reserved_5[0x8];
+	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_6[0x4];
+	u8         reserved_at_c0[0x4];
 	u8         table_miss_mode[0x4];
-	u8         reserved_7[0x18];
+	u8         reserved_at_c8[0x18];
 
-	u8         reserved_8[0x8];
+	u8         reserved_at_e0[0x8];
 	u8         table_miss_id[0x18];
 
-	u8         reserved_9[0x100];
+	u8         reserved_at_100[0x100];
 };
 
 #endif /* MLX5_IFC_H */

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 516e149..3579d1e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -387,7 +387,8 @@
 	REGION_MIXED,
 };
 
-int region_intersects(resource_size_t offset, size_t size, const char *type);
+int region_intersects(resource_size_t offset, size_t size, unsigned long flags,
+		      unsigned long desc);
 
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
@@ -2138,6 +2139,8 @@
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 624b78b..944b2b3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h

@@ -566,10 +566,26 @@
 }
 #endif
 
-struct vm_special_mapping
-{
-	const char *name;
+struct vm_fault;
+
+struct vm_special_mapping {
+	const char *name;	/* The name, e.g. "[vdso]". */
+
+	/*
+	 * If .fault is not provided, this points to a
+	 * NULL-terminated array of pages that back the special mapping.
+	 *
+	 * This must not be NULL unless .fault is provided.
+	 */
 	struct page **pages;
+
+	/*
+	 * If non-NULL, then this is called to resolve page faults
+	 * on the special mapping.  If used, .pages is not checked.
+	 */
+	int (*fault)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *vma,
+		     struct vm_fault *vmf);
 };
 
 enum tlb_flush_reason {

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 289c231..5440b7b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h

@@ -3718,7 +3718,7 @@
 void *netdev_lower_get_next(struct net_device *dev,
 				struct list_head **iter);
 #define netdev_for_each_lower_dev(dev, ldev, iter) \
-	for (iter = &(dev)->adj_list.lower, \
+	for (iter = (dev)->adj_list.lower.next, \
 	     ldev = netdev_lower_get_next(dev, &(iter)); \
 	     ldev; \
 	     ldev = netdev_lower_get_next(dev, &(iter)))

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 48e0320..67300f8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h

@@ -550,9 +550,7 @@
 
 static inline loff_t nfs_size_to_loff_t(__u64 size)
 {
-	if (size > (__u64) OFFSET_MAX - 1)
-		return OFFSET_MAX - 1;
-	return (loff_t) size;
+	return min_t(u64, size, OFFSET_MAX);
 }
 
 static inline ino_t

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 791098a..d320906 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h

@@ -275,6 +275,7 @@
 	size_t layoutupdate_len;
 	struct page *layoutupdate_page;
 	struct page **layoutupdate_pages;
+	__be32 *start_p;
 };
 
 struct nfs4_layoutcommit_res {

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index d14a4c3..4149868 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h

@@ -47,6 +47,8 @@
  * runtime initialization.
  */
 
+struct notifier_block;
+
 typedef	int (*notifier_fn_t)(struct notifier_block *nb,
 			unsigned long action, void *data);
 

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6..2771625 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h

@@ -988,23 +988,6 @@
 	return pdev->is_managed;
 }
 
-static inline void pci_set_managed_irq(struct pci_dev *pdev, unsigned int irq)
-{
-	pdev->irq = irq;
-	pdev->irq_managed = 1;
-}
-
-static inline void pci_reset_managed_irq(struct pci_dev *pdev)
-{
-	pdev->irq = 0;
-	pdev->irq_managed = 0;
-}
-
-static inline bool pci_has_managed_irq(struct pci_dev *pdev)
-{
-	return pdev->irq_managed && pdev->irq > 0;
-}
-
 void pci_disable_device(struct pci_dev *dev);
 
 extern unsigned int pcibios_max_latency;

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b35a61a..79ec7bb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -397,6 +397,7 @@
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
+	PERF_EVENT_STATE_DEAD		= -4,
 	PERF_EVENT_STATE_EXIT		= -3,
 	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
@@ -467,6 +468,7 @@
 	int				group_flags;
 	struct perf_event		*group_leader;
 	struct pmu			*pmu;
+	void				*pmu_private;
 
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
@@ -905,7 +907,7 @@
 	}
 }
 
-extern struct static_key_deferred perf_sched_events;
+extern struct static_key_false perf_sched_events;
 
 static __always_inline bool
 perf_sw_migrate_enabled(void)
@@ -924,7 +926,7 @@
 static inline void perf_event_task_sched_in(struct task_struct *prev,
 					    struct task_struct *task)
 {
-	if (static_key_false(&perf_sched_events.key))
+	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_in(prev, task);
 
 	if (perf_sw_migrate_enabled() && task->sched_migrated) {
@@ -941,7 +943,7 @@
 {
 	perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
 
-	if (static_key_false(&perf_sched_events.key))
+	if (static_branch_unlikely(&perf_sched_events))
 		__perf_event_task_sched_out(prev, next);
 }
 
@@ -1108,12 +1110,6 @@
 static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
 #endif
 
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
-extern bool perf_event_can_stop_tick(void);
-#else
-static inline bool perf_event_can_stop_tick(void)			{ return true; }
-#endif
-
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
 extern void perf_restore_debug_store(void);
 #else

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 907f3fd..62d44c1 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h

@@ -128,9 +128,6 @@
 void run_posix_cpu_timers(struct task_struct *task);
 void posix_cpu_timers_exit(struct task_struct *task);
 void posix_cpu_timers_exit_group(struct task_struct *task);
-
-bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
-
 void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval);
 

diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 998d8f1..b50c049 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h

@@ -49,6 +49,7 @@
 
 struct bq27xxx_device_info {
 	struct device *dev;
+	int id;
 	enum bq27xxx_chip chip;
 	const char *name;
 	struct bq27xxx_access_methods bus;

diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index 54bf148..35ac903 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h

@@ -111,22 +111,17 @@
 	kt->nsec = ts.tv_nsec;
 }
 
+static inline void pps_get_ts(struct pps_event_time *ts)
+{
+	struct system_time_snapshot snap;
+
+	ktime_get_snapshot(&snap);
+	ts->ts_real = ktime_to_timespec64(snap.real);
 #ifdef CONFIG_NTP_PPS
-
-static inline void pps_get_ts(struct pps_event_time *ts)
-{
-	ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real);
+	ts->ts_raw = ktime_to_timespec64(snap.raw);
+#endif
 }
 
-#else /* CONFIG_NTP_PPS */
-
-static inline void pps_get_ts(struct pps_event_time *ts)
-{
-	ktime_get_real_ts64(&ts->ts_real);
-}
-
-#endif /* CONFIG_NTP_PPS */
-
 /* Subtract known time delay from PPS event time(s) */
 static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta)
 {

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index b8b7306..6b15e16 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h

@@ -38,6 +38,7 @@
 	};
 };
 
+struct system_device_crosststamp;
 /**
  * struct ptp_clock_info - decribes a PTP hardware clock
  *
@@ -67,6 +68,11 @@
  * @gettime64:  Reads the current time from the hardware clock.
  *              parameter ts: Holds the result.
  *
+ * @getcrosststamp:  Reads the current time from the hardware clock and
+ *                   system clock simultaneously.
+ *                   parameter cts: Contains timestamp (device,system) pair,
+ *                   where system time is realtime and monotonic.
+ *
  * @settime64:  Set the current time on the hardware clock.
  *              parameter ts: Time value to set.
  *
@@ -105,6 +111,8 @@
 	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
 	int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
+	int (*getcrosststamp)(struct ptp_clock_info *ptp,
+			      struct system_device_crosststamp *cts);
 	int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts);
 	int (*enable)(struct ptp_clock_info *ptp,
 		      struct ptp_clock_request *request, int on);

diff --git a/include/linux/random.h b/include/linux/random.h
index a75840c..9c29122 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h

@@ -34,6 +34,7 @@
 #endif
 
 unsigned int get_random_int(void);
+unsigned long get_random_long(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 14e6f47..2657aff 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h

@@ -332,9 +332,7 @@
 void rcu_sched_qs(void);
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
-struct notifier_block;
-int rcu_cpu_notify(struct notifier_block *self,
-		   unsigned long action, void *hcpu);
+void rcu_report_dead(unsigned int cpu);
 
 #ifndef CONFIG_TINY_RCU
 void rcu_end_inkernel_boot(void);
@@ -360,8 +358,6 @@
 #else
 static inline void rcu_user_enter(void) { }
 static inline void rcu_user_exit(void) { }
-static inline void rcu_user_hooks_switch(struct task_struct *prev,
-					 struct task_struct *next) { }
 #endif /* CONFIG_NO_HZ_FULL */
 
 #ifdef CONFIG_RCU_NOCB_CPU

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a..c617ea1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -182,8 +182,6 @@
 static inline void update_cpu_load_nohz(int active) { }
 #endif
 
-extern unsigned long get_parent_ip(unsigned long addr);
-
 extern void dump_cpu_task(int cpu);
 
 struct seq_file;
@@ -719,6 +717,10 @@
 	/* Earliest-expiration cache. */
 	struct task_cputime cputime_expires;
 
+#ifdef CONFIG_NO_HZ_FULL
+	unsigned long tick_dep_mask;
+#endif
+
 	struct list_head cpu_timers[3];
 
 	struct pid *tty_old_pgrp;
@@ -920,6 +922,10 @@
 #endif
 }
 
+#ifdef CONFIG_SCHEDSTATS
+void force_schedstat_enabled(void);
+#endif
+
 enum cpu_idle_type {
 	CPU_IDLE,
 	CPU_NOT_IDLE,
@@ -1289,6 +1295,8 @@
 	unsigned long timeout;
 	unsigned long watchdog_stamp;
 	unsigned int time_slice;
+	unsigned short on_rq;
+	unsigned short on_list;
 
 	struct sched_rt_entity *back;
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -1329,10 +1337,6 @@
 	 * task has to wait for a replenishment to be performed at the
 	 * next firing of dl_timer.
 	 *
-	 * @dl_new tells if a new instance arrived. If so we must
-	 * start executing it with full runtime and reset its absolute
-	 * deadline;
-	 *
 	 * @dl_boosted tells if we are boosted due to DI. If so we are
 	 * outside bandwidth enforcement mechanism (but only until we
 	 * exit the critical section);
@@ -1340,7 +1344,7 @@
 	 * @dl_yielded tells if task gave up the cpu before consuming
 	 * all its available runtime during the last job.
 	 */
-	int dl_throttled, dl_new, dl_boosted, dl_yielded;
+	int dl_throttled, dl_boosted, dl_yielded;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
@@ -1542,6 +1546,10 @@
 		VTIME_SYS,
 	} vtime_snap_whence;
 #endif
+
+#ifdef CONFIG_NO_HZ_FULL
+	unsigned long tick_dep_mask;
+#endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	u64 start_time;		/* monotonic time in nsec */
 	u64 real_start_time;	/* boot based time in nsec */
@@ -2356,10 +2364,7 @@
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-extern bool sched_can_stop_tick(void);
 extern u64 scheduler_tick_max_deferment(void);
-#else
-static inline bool sched_can_stop_tick(void) { return false; }
 #endif
 
 #ifdef CONFIG_SCHED_AUTOGROUP

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index c9e4731..4f080ab 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h

@@ -95,4 +95,8 @@
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
 
+extern int sysctl_schedstats(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos);
+
 #endif /* _SCHED_SYSCTL_H */

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4ce9ff7..d3fcd45 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h

@@ -1985,6 +1985,30 @@
 	skb->tail += len;
 }
 
+/**
+ *	skb_tailroom_reserve - adjust reserved_tailroom
+ *	@skb: buffer to alter
+ *	@mtu: maximum amount of headlen permitted
+ *	@needed_tailroom: minimum amount of reserved_tailroom
+ *
+ *	Set reserved_tailroom so that headlen can be as large as possible but
+ *	not larger than mtu and tailroom cannot be smaller than
+ *	needed_tailroom.
+ *	The required headroom should already have been reserved before using
+ *	this function.
+ */
+static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu,
+					unsigned int needed_tailroom)
+{
+	SKB_LINEAR_ASSERT(skb);
+	if (mtu < skb_tailroom(skb) - needed_tailroom)
+		/* use at most mtu */
+		skb->reserved_tailroom = skb_tailroom(skb) - mtu;
+	else
+		/* use up to all available space */
+		skb->reserved_tailroom = needed_tailroom;
+}
+
 #define ENCAP_TYPE_ETHER	0
 #define ENCAP_TYPE_IPPROTO	1
 

diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index 343c13a..35cb926 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h

@@ -44,6 +44,7 @@
 
 #define KNAV_DMA_NUM_EPIB_WORDS			4
 #define KNAV_DMA_NUM_PS_WORDS			16
+#define KNAV_DMA_NUM_SW_DATA_WORDS		4
 #define KNAV_DMA_FDQ_PER_CHAN			4
 
 /* Tx channel scheduling priority */
@@ -142,6 +143,7 @@
  * @orig_buff:			buff pointer since 'buff' can be overwritten
  * @epib:			Extended packet info block
  * @psdata:			Protocol specific
+ * @sw_data:			Software private data not touched by h/w
  */
 struct knav_dma_desc {
 	__le32	desc_info;
@@ -154,7 +156,7 @@
 	__le32	orig_buff;
 	__le32	epib[KNAV_DMA_NUM_EPIB_WORDS];
 	__le32	psdata[KNAV_DMA_NUM_PS_WORDS];
-	__le32	pad[4];
+	u32	sw_data[KNAV_DMA_NUM_SW_DATA_WORDS];
 } ____cacheline_aligned;
 
 #if IS_ENABLED(CONFIG_KEYSTONE_NAVIGATOR_DMA)

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index f5f80c5..dc8eb63 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h

@@ -99,8 +99,23 @@
 	}
 
 /*
- * define and init a srcu struct at build time.
- * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ * Define and initialize a srcu struct at build time.
+ * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ *
+ * Note that although DEFINE_STATIC_SRCU() hides the name from other
+ * files, the per-CPU variable rules nevertheless require that the
+ * chosen name be globally unique.  These rules also prohibit use of
+ * DEFINE_STATIC_SRCU() within a function.  If these rules are too
+ * restrictive, declare the srcu_struct manually.  For example, in
+ * each file:
+ *
+ *	static struct srcu_struct my_srcu;
+ *
+ * Then, before the first use of each my_srcu, manually initialize it:
+ *
+ *	init_srcu_struct(&my_srcu);
+ *
+ * See include/linux/percpu-defs.h for the rules on per-CPU variables.
  */
 #define __DEFINE_SRCU(name, is_static)					\
 	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index eead8ab..881a79d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h

@@ -100,6 +100,7 @@
 	int interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
+	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	int clk_csr;
 	int has_gmac;

diff --git a/include/linux/swait.h b/include/linux/swait.h
new file mode 100644
index 0000000..c1f9c62
--- /dev/null
+++ b/include/linux/swait.h

@@ -0,0 +1,172 @@
+#ifndef _LINUX_SWAIT_H
+#define _LINUX_SWAIT_H
+
+#include <linux/list.h>
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <asm/current.h>
+
+/*
+ * Simple wait queues
+ *
+ * While these are very similar to the other/complex wait queues (wait.h) the
+ * most important difference is that the simple waitqueue allows for
+ * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
+ *
+ * In order to make this so, we had to drop a fair number of features of the
+ * other waitqueue code; notably:
+ *
+ *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
+ *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
+ *    sleeper state.
+ *
+ *  - the exclusive mode; because this requires preserving the list order
+ *    and this is hard.
+ *
+ *  - custom wake functions; because you cannot give any guarantees about
+ *    random code.
+ *
+ * As a side effect of this; the data structures are slimmer.
+ *
+ * One would recommend using this wait queue where possible.
+ */
+
+struct task_struct;
+
+struct swait_queue_head {
+	raw_spinlock_t		lock;
+	struct list_head	task_list;
+};
+
+struct swait_queue {
+	struct task_struct	*task;
+	struct list_head	task_list;
+};
+
+#define __SWAITQUEUE_INITIALIZER(name) {				\
+	.task		= current,					\
+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
+}
+
+#define DECLARE_SWAITQUEUE(name)					\
+	struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
+
+#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) {				\
+	.lock		= __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
+}
+
+#define DECLARE_SWAIT_QUEUE_HEAD(name)					\
+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
+
+extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
+				    struct lock_class_key *key);
+
+#define init_swait_queue_head(q)				\
+	do {							\
+		static struct lock_class_key __key;		\
+		__init_swait_queue_head((q), #q, &__key);	\
+	} while (0)
+
+#ifdef CONFIG_LOCKDEP
+# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)			\
+	({ init_swait_queue_head(&name); name; })
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+#else
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
+	DECLARE_SWAIT_QUEUE_HEAD(name)
+#endif
+
+static inline int swait_active(struct swait_queue_head *q)
+{
+	return !list_empty(&q->task_list);
+}
+
+extern void swake_up(struct swait_queue_head *q);
+extern void swake_up_all(struct swait_queue_head *q);
+extern void swake_up_locked(struct swait_queue_head *q);
+
+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
+extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
+
+extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+
+/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+#define ___swait_event(wq, condition, state, ret, cmd)			\
+({									\
+	struct swait_queue __wait;					\
+	long __ret = ret;						\
+									\
+	INIT_LIST_HEAD(&__wait.task_list);				\
+	for (;;) {							\
+		long __int = prepare_to_swait_event(&wq, &__wait, state);\
+									\
+		if (condition)						\
+			break;						\
+									\
+		if (___wait_is_interruptible(state) && __int) {		\
+			__ret = __int;					\
+			break;						\
+		}							\
+									\
+		cmd;							\
+	}								\
+	finish_swait(&wq, &__wait);					\
+	__ret;								\
+})
+
+#define __swait_event(wq, condition)					\
+	(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,	\
+			    schedule())
+
+#define swait_event(wq, condition)					\
+do {									\
+	if (condition)							\
+		break;							\
+	__swait_event(wq, condition);					\
+} while (0)
+
+#define __swait_event_timeout(wq, condition, timeout)			\
+	___swait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_UNINTERRUPTIBLE, timeout,			\
+		      __ret = schedule_timeout(__ret))
+
+#define swait_event_timeout(wq, condition, timeout)			\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __swait_event_timeout(wq, condition, timeout);	\
+	__ret;								\
+})
+
+#define __swait_event_interruptible(wq, condition)			\
+	___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,		\
+		      schedule())
+
+#define swait_event_interruptible(wq, condition)			\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__ret = __swait_event_interruptible(wq, condition);	\
+	__ret;								\
+})
+
+#define __swait_event_interruptible_timeout(wq, condition, timeout)	\
+	___swait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, timeout,			\
+		      __ret = schedule_timeout(__ret))
+
+#define swait_event_interruptible_timeout(wq, condition, timeout)	\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __swait_event_interruptible_timeout(wq,		\
+						condition, timeout);	\
+	__ret;								\
+})
+
+#endif /* _LINUX_SWAIT_H */

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 97fd4e5..21f7364 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h

@@ -97,6 +97,19 @@
 	tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
 }
 
+enum tick_dep_bits {
+	TICK_DEP_BIT_POSIX_TIMER	= 0,
+	TICK_DEP_BIT_PERF_EVENTS	= 1,
+	TICK_DEP_BIT_SCHED		= 2,
+	TICK_DEP_BIT_CLOCK_UNSTABLE	= 3
+};
+
+#define TICK_DEP_MASK_NONE		0
+#define TICK_DEP_MASK_POSIX_TIMER	(1 << TICK_DEP_BIT_POSIX_TIMER)
+#define TICK_DEP_MASK_PERF_EVENTS	(1 << TICK_DEP_BIT_PERF_EVENTS)
+#define TICK_DEP_MASK_SCHED		(1 << TICK_DEP_BIT_SCHED)
+#define TICK_DEP_MASK_CLOCK_UNSTABLE	(1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
+
 #ifdef CONFIG_NO_HZ_COMMON
 extern int tick_nohz_enabled;
 extern int tick_nohz_tick_stopped(void);
@@ -154,9 +167,73 @@
 	return cpumask_any_and(housekeeping_mask, cpu_online_mask);
 }
 
-extern void tick_nohz_full_kick(void);
+extern void tick_nohz_dep_set(enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_task(struct task_struct *tsk,
+				   enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+				     enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit);
+
+/*
+ * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
+ * on top of static keys.
+ */
+static inline void tick_dep_set(enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set(bit);
+}
+
+static inline void tick_dep_clear(enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear(bit);
+}
+
+static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_cpu(cpu))
+		tick_nohz_dep_set_cpu(cpu, bit);
+}
+
+static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_cpu(cpu))
+		tick_nohz_dep_clear_cpu(cpu, bit);
+}
+
+static inline void tick_dep_set_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set_task(tsk, bit);
+}
+static inline void tick_dep_clear_task(struct task_struct *tsk,
+				       enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear_task(tsk, bit);
+}
+static inline void tick_dep_set_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set_signal(signal, bit);
+}
+static inline void tick_dep_clear_signal(struct signal_struct *signal,
+					 enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear_signal(signal, bit);
+}
+
 extern void tick_nohz_full_kick_cpu(int cpu);
-extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(void);
 #else
 static inline int housekeeping_any_cpu(void)
@@ -166,9 +243,21 @@
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
 static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
+
+static inline void tick_dep_set(enum tick_dep_bits bit) { }
+static inline void tick_dep_clear(enum tick_dep_bits bit) { }
+static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline void tick_dep_set_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_task(struct task_struct *tsk,
+				       enum tick_dep_bits bit) { }
+static inline void tick_dep_set_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_signal(struct signal_struct *signal,
+					 enum tick_dep_bits bit) { }
+
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
-static inline void tick_nohz_full_kick(void) { }
-static inline void tick_nohz_full_kick_all(void) { }
 static inline void __tick_nohz_task_switch(void) { }
 #endif
 

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 2524722..e880054 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h

@@ -50,6 +50,7 @@
  * @offs_tai:		Offset clock monotonic -> clock tai
  * @tai_offset:		The current UTC to TAI offset in seconds
  * @clock_was_set_seq:	The sequence number of clock was set events
+ * @cs_was_changed_seq:	The sequence number of clocksource change events
  * @next_leap_ktime:	CLOCK_MONOTONIC time value of a pending leap-second
  * @raw_time:		Monotonic raw base time in timespec64 format
  * @cycle_interval:	Number of clock cycles in one NTP interval
@@ -91,6 +92,7 @@
 	ktime_t			offs_tai;
 	s32			tai_offset;
 	unsigned int		clock_was_set_seq;
+	u8			cs_was_changed_seq;
 	ktime_t			next_leap_ktime;
 	struct timespec64	raw_time;
 

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index ec89d84..96f37be 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h

@@ -267,6 +267,64 @@
 				        struct timespec64 *ts_real);
 
 /*
+ * struct system_time_snapshot - simultaneous raw/real time capture with
+ *	counter value
+ * @cycles:	Clocksource counter value to produce the system times
+ * @real:	Realtime system time
+ * @raw:	Monotonic raw system time
+ * @clock_was_set_seq:	The sequence number of clock was set events
+ * @cs_was_changed_seq:	The sequence number of clocksource change events
+ */
+struct system_time_snapshot {
+	cycle_t		cycles;
+	ktime_t		real;
+	ktime_t		raw;
+	unsigned int	clock_was_set_seq;
+	u8		cs_was_changed_seq;
+};
+
+/*
+ * struct system_device_crosststamp - system/device cross-timestamp
+ *	(syncronized capture)
+ * @device:		Device time
+ * @sys_realtime:	Realtime simultaneous with device time
+ * @sys_monoraw:	Monotonic raw simultaneous with device time
+ */
+struct system_device_crosststamp {
+	ktime_t device;
+	ktime_t sys_realtime;
+	ktime_t sys_monoraw;
+};
+
+/*
+ * struct system_counterval_t - system counter value with the pointer to the
+ *	corresponding clocksource
+ * @cycles:	System counter value
+ * @cs:		Clocksource corresponding to system counter value. Used by
+ *	timekeeping code to verify comparibility of two cycle values
+ */
+struct system_counterval_t {
+	cycle_t			cycles;
+	struct clocksource	*cs;
+};
+
+/*
+ * Get cross timestamp between system clock and device clock
+ */
+extern int get_device_system_crosststamp(
+			int (*get_time_fn)(ktime_t *device_time,
+				struct system_counterval_t *system_counterval,
+				void *ctx),
+			void *ctx,
+			struct system_time_snapshot *history,
+			struct system_device_crosststamp *xtstamp);
+
+/*
+ * Simultaneously snapshot realtime and monotonic raw clocks
+ */
+extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
+
+/*
  * Persistent clock related interfaces
  */
 extern int persistent_clock_is_local;

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 429fdfc..925730b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h

@@ -568,6 +568,8 @@
 	FILTER_DYN_STRING,
 	FILTER_PTR_STRING,
 	FILTER_TRACE_FN,
+	FILTER_COMM,
+	FILTER_CPU,
 };
 
 extern int trace_event_raw_init(struct trace_event_call *call);

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index acfdbf3..be586c6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h

@@ -134,9 +134,6 @@
 		void *it_func;						\
 		void *__data;						\
 									\
-		if (!cpu_online(raw_smp_processor_id()))		\
-			return;						\
-									\
 		if (!(cond))						\
 			return;						\
 		prercu;							\
@@ -343,15 +340,19 @@
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)					\
-		__DECLARE_TRACE(name, void, , 1, void *__data, __data)
+	__DECLARE_TRACE(name, void, ,					\
+			cpu_online(raw_smp_processor_id()),		\
+			void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)				\
-		__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,	\
-				PARAMS(void *__data, proto),		\
-				PARAMS(__data, args))
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+			cpu_online(raw_smp_processor_id()),		\
+			PARAMS(void *__data, proto),			\
+			PARAMS(__data, args))
 
 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
-	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
 			PARAMS(void *__data, proto),			\
 			PARAMS(__data, args))
 

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ae71a76..27d7a0a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h

@@ -338,7 +338,7 @@
 			    schedule(); try_to_freeze())
 
 /**
- * wait_event - sleep (or freeze) until a condition gets true
+ * wait_event_freezable - sleep (or freeze) until a condition gets true
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b333c94..d0b5ca5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h

@@ -198,6 +198,7 @@
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_io(struct writeback_control *wbc, struct page *page,
 		    size_t bytes);
+void cgroup_writeback_umount(void);
 
 /**
  * inode_attach_wb - associate an inode with its wb
@@ -301,6 +302,10 @@
 {
 }
 
+static inline void cgroup_writeback_umount(void)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 /*

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 481fe1c..49dcad4 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h

@@ -270,8 +270,9 @@
 					    struct sock *newsk,
 					    const struct request_sock *req);
 
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
-			      struct sock *child);
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+				      struct request_sock *req,
+				      struct sock *child);
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 				   unsigned long timeout);
 struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 7029527..4079fc1 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h

@@ -61,6 +61,7 @@
 	struct rtable __rcu		*fnhe_rth_input;
 	struct rtable __rcu		*fnhe_rth_output;
 	unsigned long			fnhe_stamp;
+	struct rcu_head			rcu;
 };
 
 struct fnhe_hash_bucket {

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 8f81bbb..e0f4109 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h

@@ -439,6 +439,12 @@
 /* Send a single event to user space */
 void wireless_send_event(struct net_device *dev, unsigned int cmd,
 			 union iwreq_data *wrqu, const char *extra);
+#ifdef CONFIG_WEXT_CORE
+/* flush all previous wext events - if work is done from netdev notifiers */
+void wireless_nlevent_flush(void);
+#else
+static inline void wireless_nlevent_flush(void) {}
+#endif
 
 /* We may need a function to send a stream of events to user space.
  * More on that later... */

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index e2b712c..c21c38c 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h

@@ -343,7 +343,7 @@
 void snd_hdac_bus_exit_link_reset(struct hdac_bus *bus);
 
 void snd_hdac_bus_update_rirb(struct hdac_bus *bus);
-void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
+int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
 				    void (*ack)(struct hdac_bus *,
 						struct hdac_stream *));
 

diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h
index 317a1ed..9130dd5 100644
--- a/include/trace/events/asoc.h
+++ b/include/trace/events/asoc.h

@@ -231,13 +231,13 @@
 	TP_ARGS(jack, mask, val),
 
 	TP_STRUCT__entry(
-		__string(	name,		jack->jack->name	)
+		__string(	name,		jack->jack->id		)
 		__field(	int,		mask			)
 		__field(	int,		val			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, jack->jack->name);
+		__assign_str(name, jack->jack->id);
 		__entry->mask = mask;
 		__entry->val = val;
 	),
@@ -253,12 +253,12 @@
 	TP_ARGS(jack, val),
 
 	TP_STRUCT__entry(
-		__string(	name,		jack->jack->name	)
+		__string(	name,		jack->jack->id		)
 		__field(	int,		val			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, jack->jack->name);
+		__assign_str(name, jack->jack->id);
 		__entry->val = val;
 	),
 

diff --git a/include/trace/events/cpuhp.h b/include/trace/events/cpuhp.h
new file mode 100644
index 0000000..a72bd93
--- /dev/null
+++ b/include/trace/events/cpuhp.h

@@ -0,0 +1,66 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cpuhp
+
+#if !defined(_TRACE_CPUHP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CPUHP_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(cpuhp_enter,
+
+	TP_PROTO(unsigned int cpu,
+		 int target,
+		 int idx,
+		 int (*fun)(unsigned int)),
+
+	TP_ARGS(cpu, target, idx, fun),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	cpu		)
+		__field( int,		target		)
+		__field( int,		idx		)
+		__field( void *,	fun		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+		__entry->target	= target;
+		__entry->idx	= idx;
+		__entry->fun	= fun;
+	),
+
+	TP_printk("cpu: %04u target: %3d step: %3d (%pf)",
+		  __entry->cpu, __entry->target, __entry->idx, __entry->fun)
+);
+
+TRACE_EVENT(cpuhp_exit,
+
+	TP_PROTO(unsigned int cpu,
+		 int state,
+		 int idx,
+		 int ret),
+
+	TP_ARGS(cpu, state, idx, ret),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	cpu		)
+		__field( int,		state		)
+		__field( int,		idx		)
+		__field( int,		ret		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+		__entry->state	= state;
+		__entry->idx	= idx;
+		__entry->ret	= ret;
+	),
+
+	TP_printk(" cpu: %04u  state: %3d step: %3d ret: %d",
+		  __entry->cpu, __entry->state, __entry->idx,  __entry->ret)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 073b9ac..5144013 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h

@@ -328,23 +328,49 @@
 );
 
 #ifdef CONFIG_NO_HZ_COMMON
+
+#define TICK_DEP_NAMES					\
+		tick_dep_name(NONE)			\
+		tick_dep_name(POSIX_TIMER)		\
+		tick_dep_name(PERF_EVENTS)		\
+		tick_dep_name(SCHED)			\
+		tick_dep_name_end(CLOCK_UNSTABLE)
+
+#undef tick_dep_name
+#undef tick_dep_name_end
+
+#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+#define tick_dep_name_end(sdep)  TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+
+TICK_DEP_NAMES
+
+#undef tick_dep_name
+#undef tick_dep_name_end
+
+#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
+#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
+
+#define show_tick_dep_name(val)				\
+	__print_symbolic(val, TICK_DEP_NAMES)
+
 TRACE_EVENT(tick_stop,
 
-	TP_PROTO(int success, char *error_msg),
+	TP_PROTO(int success, int dependency),
 
-	TP_ARGS(success, error_msg),
+	TP_ARGS(success, dependency),
 
 	TP_STRUCT__entry(
 		__field( int ,		success	)
-		__string( msg, 		error_msg )
+		__field( int ,		dependency )
 	),
 
 	TP_fast_assign(
 		__entry->success	= success;
-		__assign_str(msg, error_msg);
+		__entry->dependency	= dependency;
 	),
 
-	TP_printk("success=%s msg=%s",  __entry->success ? "yes" : "no", __get_str(msg))
+	TP_printk("success=%d dependency=%s",  __entry->success, \
+			show_tick_dep_name(__entry->dependency))
 );
 #endif
 

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index aa6f857..5df4881 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h

@@ -292,6 +292,9 @@
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
 
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 1e3c8cb..a8e3a8c 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h

@@ -66,27 +66,33 @@
 /*
  * DVB entities
  */
-#define MEDIA_ENT_F_DTV_DEMOD		(MEDIA_ENT_F_BASE + 1)
-#define MEDIA_ENT_F_TS_DEMUX		(MEDIA_ENT_F_BASE + 2)
-#define MEDIA_ENT_F_DTV_CA		(MEDIA_ENT_F_BASE + 3)
-#define MEDIA_ENT_F_DTV_NET_DECAP	(MEDIA_ENT_F_BASE + 4)
+#define MEDIA_ENT_F_DTV_DEMOD		(MEDIA_ENT_F_BASE + 0x00001)
+#define MEDIA_ENT_F_TS_DEMUX		(MEDIA_ENT_F_BASE + 0x00002)
+#define MEDIA_ENT_F_DTV_CA		(MEDIA_ENT_F_BASE + 0x00003)
+#define MEDIA_ENT_F_DTV_NET_DECAP	(MEDIA_ENT_F_BASE + 0x00004)
+
+/*
+ * I/O entities
+ */
+#define MEDIA_ENT_F_IO_DTV		(MEDIA_ENT_F_BASE + 0x01001)
+#define MEDIA_ENT_F_IO_VBI		(MEDIA_ENT_F_BASE + 0x01002)
+#define MEDIA_ENT_F_IO_SWRADIO		(MEDIA_ENT_F_BASE + 0x01003)
 
 /*
  * Connectors
  */
 /* It is a responsibility of the entity drivers to add connectors and links */
-#define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 21)
-#define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 22)
-#define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 23)
-/* For internal test signal generators and other debug connectors */
-#define MEDIA_ENT_F_CONN_TEST		(MEDIA_ENT_F_BASE + 24)
+#ifdef __KERNEL__
+	/*
+	 * For now, it should not be used in userspace, as some
+	 * definitions may change
+	 */
 
-/*
- * I/O entities
- */
-#define MEDIA_ENT_F_IO_DTV  		(MEDIA_ENT_F_BASE + 31)
-#define MEDIA_ENT_F_IO_VBI  		(MEDIA_ENT_F_BASE + 32)
-#define MEDIA_ENT_F_IO_SWRADIO		(MEDIA_ENT_F_BASE + 33)
+#define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 0x30001)
+#define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 0x30002)
+#define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 0x30003)
+
+#endif
 
 /*
  * Don't touch on those. The ranges MEDIA_ENT_F_OLD_BASE and
@@ -114,7 +120,7 @@
 
 #define MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN	MEDIA_ENT_F_OLD_SUBDEV_BASE
 
-#ifndef __KERNEL__
+#if !defined(__KERNEL__) || defined(__NEED_MEDIA_LEGACY_API)
 
 /*
  * Legacy symbols used to avoid userspace compilation breakages
@@ -127,6 +133,10 @@
 #define MEDIA_ENT_TYPE_MASK		0x00ff0000
 #define MEDIA_ENT_SUBTYPE_MASK		0x0000ffff
 
+/* End of the old subdev reserved numberspace */
+#define MEDIA_ENT_T_DEVNODE_UNKNOWN	(MEDIA_ENT_T_DEVNODE | \
+					 MEDIA_ENT_SUBTYPE_MASK)
+
 #define MEDIA_ENT_T_DEVNODE		MEDIA_ENT_F_OLD_BASE
 #define MEDIA_ENT_T_DEVNODE_V4L		MEDIA_ENT_F_IO_V4L
 #define MEDIA_ENT_T_DEVNODE_FB		(MEDIA_ENT_T_DEVNODE + 2)
@@ -291,14 +301,14 @@
 	__u32 id;
 	char name[64];		/* FIXME: move to a property? (RFC says so) */
 	__u32 function;		/* Main function of the entity */
-	__u16 reserved[12];
-};
+	__u32 reserved[6];
+} __attribute__ ((packed));
 
 /* Should match the specific fields at media_intf_devnode */
 struct media_v2_intf_devnode {
 	__u32 major;
 	__u32 minor;
-};
+} __attribute__ ((packed));
 
 struct media_v2_interface {
 	__u32 id;
@@ -310,22 +320,22 @@
 		struct media_v2_intf_devnode devnode;
 		__u32 raw[16];
 	};
-};
+} __attribute__ ((packed));
 
 struct media_v2_pad {
 	__u32 id;
 	__u32 entity_id;
 	__u32 flags;
-	__u16 reserved[9];
-};
+	__u32 reserved[5];
+} __attribute__ ((packed));
 
 struct media_v2_link {
 	__u32 id;
 	__u32 source_id;
 	__u32 sink_id;
 	__u32 flags;
-	__u32 reserved[5];
-};
+	__u32 reserved[6];
+} __attribute__ ((packed));
 
 struct media_v2_topology {
 	__u64 topology_version;
@@ -345,7 +355,7 @@
 	__u32 num_links;
 	__u32 reserved4;
 	__u64 ptr_links;
-};
+} __attribute__ ((packed));
 
 static inline void __user *media_get_uptr(__u64 arg)
 {

diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 5b4a4be..cc68b921 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h

@@ -66,14 +66,18 @@
 	__u64 length;
 	__u32 status;
 	__u32 max_ars_out;
+	__u32 clear_err_unit;
+	__u32 reserved;
 } __packed;
 
 struct nd_cmd_ars_start {
 	__u64 address;
 	__u64 length;
 	__u16 type;
-	__u8 reserved[6];
+	__u8 flags;
+	__u8 reserved[5];
 	__u32 status;
+	__u32 scrub_time;
 } __packed;
 
 struct nd_cmd_ars_status {
@@ -81,11 +85,14 @@
 	__u32 out_length;
 	__u64 address;
 	__u64 length;
+	__u64 restart_address;
+	__u64 restart_length;
 	__u16 type;
+	__u16 flags;
 	__u32 num_records;
 	struct nd_ars_record {
 		__u32 handle;
-		__u32 flags;
+		__u32 reserved;
 		__u64 err_address;
 		__u64 length;
 	} __packed records[0];

diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index f0b7bfe..ac6dded 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h

@@ -51,7 +51,9 @@
 	int n_per_out; /* Number of programmable periodic signals. */
 	int pps;       /* Whether the clock supports a PPS callback. */
 	int n_pins;    /* Number of input/output pins. */
-	int rsv[14];   /* Reserved for future use. */
+	/* Whether the clock supports precise system-device cross timestamps */
+	int cross_timestamping;
+	int rsv[13];   /* Reserved for future use. */
 };
 
 struct ptp_extts_request {
@@ -81,6 +83,13 @@
 	struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
 };
 
+struct ptp_sys_offset_precise {
+	struct ptp_clock_time device;
+	struct ptp_clock_time sys_realtime;
+	struct ptp_clock_time sys_monoraw;
+	unsigned int rsv[4];    /* Reserved for future use. */
+};
+
 enum ptp_pin_function {
 	PTP_PF_NONE,
 	PTP_PF_EXTTS,
@@ -124,6 +133,8 @@
 #define PTP_SYS_OFFSET     _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset)
 #define PTP_PIN_GETFUNC    _IOWR(PTP_CLK_MAGIC, 6, struct ptp_pin_desc)
 #define PTP_PIN_SETFUNC    _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc)
+#define PTP_SYS_OFFSET_PRECISE \
+	_IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise)
 
 struct ptp_extts_event {
 	struct ptp_clock_time t; /* Time event occured. */

diff --git a/init/main.c b/init/main.c
index 58c9e37..8dc93df 100644
--- a/init/main.c
+++ b/init/main.c

@@ -93,9 +93,6 @@
 extern void init_IRQ(void);
 extern void fork_init(void);
 extern void radix_tree_init(void);
-#ifndef CONFIG_DEBUG_RODATA
-static inline void mark_rodata_ro(void) { }
-#endif
 
 /*
  * Debug helper: via this flag we know that we are in 'early bootup code'
@@ -388,7 +385,6 @@
 	int pid;
 
 	rcu_scheduler_starting();
-	smpboot_thread_init();
 	/*
 	 * We need to spawn init first so that it obtains pid 1, however
 	 * the init task will end up wanting to create kthreads, which, if
@@ -452,20 +448,6 @@
 	done = 1;
 }
 
-/*
- *	Activate the first processor.
- */
-
-static void __init boot_cpu_init(void)
-{
-	int cpu = smp_processor_id();
-	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
-	set_cpu_online(cpu, true);
-	set_cpu_active(cpu, true);
-	set_cpu_present(cpu, true);
-	set_cpu_possible(cpu, true);
-}
-
 void __init __weak smp_setup_processor_id(void)
 {
 }
@@ -499,11 +481,6 @@
 	char *command_line;
 	char *after_dashes;
 
-	/*
-	 * Need to run as early as possible, to initialize the
-	 * lockdep hash:
-	 */
-	lockdep_init();
 	set_task_stack_end_magic(&init_task);
 	smp_setup_processor_id();
 	debug_objects_early_init();
@@ -530,6 +507,7 @@
 	setup_command_line(command_line);
 	setup_nr_cpu_ids();
 	setup_per_cpu_areas();
+	boot_cpu_state_init();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
 
 	build_all_zonelists(NULL, NULL);
@@ -929,6 +907,28 @@
 
 static noinline void __init kernel_init_freeable(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+static bool rodata_enabled = true;
+static int __init set_debug_rodata(char *str)
+{
+	return strtobool(str, &rodata_enabled);
+}
+__setup("rodata=", set_debug_rodata);
+
+static void mark_readonly(void)
+{
+	if (rodata_enabled)
+		mark_rodata_ro();
+	else
+		pr_info("Kernel memory protection disabled.\n");
+}
+#else
+static inline void mark_readonly(void)
+{
+	pr_warn("This architecture does not have kernel memory protection.\n");
+}
+#endif
+
 static int __ref kernel_init(void *unused)
 {
 	int ret;
@@ -937,7 +937,7 @@
 	/* need to finish all async __init code before freeing the memory */
 	async_synchronize_full();
 	free_initmem();
-	mark_rodata_ro();
+	mark_readonly();
 	system_state = SYSTEM_RUNNING;
 	numa_default_policy();
 

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5b9d396..6ea42e8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c

@@ -22,13 +22,88 @@
 #include <linux/lockdep.h>
 #include <linux/tick.h>
 #include <linux/irq.h>
+#include <linux/smpboot.h>
+
 #include <trace/events/power.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpuhp.h>
 
 #include "smpboot.h"
 
+/**
+ * cpuhp_cpu_state - Per cpu hotplug state storage
+ * @state:	The current cpu state
+ * @target:	The target state
+ * @thread:	Pointer to the hotplug thread
+ * @should_run:	Thread should execute
+ * @cb_stat:	The state for a single callback (install/uninstall)
+ * @cb:		Single callback function (install/uninstall)
+ * @result:	Result of the operation
+ * @done:	Signal completion to the issuer of the task
+ */
+struct cpuhp_cpu_state {
+	enum cpuhp_state	state;
+	enum cpuhp_state	target;
+#ifdef CONFIG_SMP
+	struct task_struct	*thread;
+	bool			should_run;
+	enum cpuhp_state	cb_state;
+	int			(*cb)(unsigned int cpu);
+	int			result;
+	struct completion	done;
+#endif
+};
+
+static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+
+/**
+ * cpuhp_step - Hotplug state machine step
+ * @name:	Name of the step
+ * @startup:	Startup function of the step
+ * @teardown:	Teardown function of the step
+ * @skip_onerr:	Do not invoke the functions on error rollback
+ *		Will go away once the notifiers	are gone
+ * @cant_stop:	Bringup/teardown can't be stopped at this step
+ */
+struct cpuhp_step {
+	const char	*name;
+	int		(*startup)(unsigned int cpu);
+	int		(*teardown)(unsigned int cpu);
+	bool		skip_onerr;
+	bool		cant_stop;
+};
+
+static DEFINE_MUTEX(cpuhp_state_mutex);
+static struct cpuhp_step cpuhp_bp_states[];
+static struct cpuhp_step cpuhp_ap_states[];
+
+/**
+ * cpuhp_invoke_callback _ Invoke the callbacks for a given state
+ * @cpu:	The cpu for which the callback should be invoked
+ * @step:	The step in the state machine
+ * @cb:		The callback function to invoke
+ *
+ * Called from cpu hotplug and from the state register machinery
+ */
+static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
+				 int (*cb)(unsigned int))
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+	int ret = 0;
+
+	if (cb) {
+		trace_cpuhp_enter(cpu, st->target, step, cb);
+		ret = cb(cpu);
+		trace_cpuhp_exit(cpu, st->state, step, ret);
+	}
+	return ret;
+}
+
 #ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
+bool cpuhp_tasks_frozen;
+EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
 
 /*
  * The following two APIs (cpu_maps_update_begin/done) must be used when
@@ -207,31 +282,281 @@
 	return raw_notifier_chain_register(&cpu_chain, nb);
 }
 
-static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
+static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call,
 			int *nr_calls)
 {
+	unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0;
+	void *hcpu = (void *)(long)cpu;
+
 	int ret;
 
-	ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
+	ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call,
 					nr_calls);
 
 	return notifier_to_errno(ret);
 }
 
-static int cpu_notify(unsigned long val, void *v)
+static int cpu_notify(unsigned long val, unsigned int cpu)
 {
-	return __cpu_notify(val, v, -1, NULL);
+	return __cpu_notify(val, cpu, -1, NULL);
+}
+
+/* Notifier wrappers for transitioning to state machine */
+static int notify_prepare(unsigned int cpu)
+{
+	int nr_calls = 0;
+	int ret;
+
+	ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls);
+	if (ret) {
+		nr_calls--;
+		printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
+				__func__, cpu);
+		__cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
+	}
+	return ret;
+}
+
+static int notify_online(unsigned int cpu)
+{
+	cpu_notify(CPU_ONLINE, cpu);
+	return 0;
+}
+
+static int notify_starting(unsigned int cpu)
+{
+	cpu_notify(CPU_STARTING, cpu);
+	return 0;
+}
+
+static int bringup_wait_for_ap(unsigned int cpu)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+	wait_for_completion(&st->done);
+	return st->result;
+}
+
+static int bringup_cpu(unsigned int cpu)
+{
+	struct task_struct *idle = idle_thread_get(cpu);
+	int ret;
+
+	/* Arch-specific enabling code. */
+	ret = __cpu_up(cpu, idle);
+	if (ret) {
+		cpu_notify(CPU_UP_CANCELED, cpu);
+		return ret;
+	}
+	ret = bringup_wait_for_ap(cpu);
+	BUG_ON(!cpu_online(cpu));
+	return ret;
+}
+
+/*
+ * Hotplug state machine related functions
+ */
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
+			  struct cpuhp_step *steps)
+{
+	for (st->state++; st->state < st->target; st->state++) {
+		struct cpuhp_step *step = steps + st->state;
+
+		if (!step->skip_onerr)
+			cpuhp_invoke_callback(cpu, st->state, step->startup);
+	}
+}
+
+static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+				struct cpuhp_step *steps, enum cpuhp_state target)
+{
+	enum cpuhp_state prev_state = st->state;
+	int ret = 0;
+
+	for (; st->state > target; st->state--) {
+		struct cpuhp_step *step = steps + st->state;
+
+		ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+		if (ret) {
+			st->target = prev_state;
+			undo_cpu_down(cpu, st, steps);
+			break;
+		}
+	}
+	return ret;
+}
+
+static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
+			struct cpuhp_step *steps)
+{
+	for (st->state--; st->state > st->target; st->state--) {
+		struct cpuhp_step *step = steps + st->state;
+
+		if (!step->skip_onerr)
+			cpuhp_invoke_callback(cpu, st->state, step->teardown);
+	}
+}
+
+static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+			      struct cpuhp_step *steps, enum cpuhp_state target)
+{
+	enum cpuhp_state prev_state = st->state;
+	int ret = 0;
+
+	while (st->state < target) {
+		struct cpuhp_step *step;
+
+		st->state++;
+		step = steps + st->state;
+		ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+		if (ret) {
+			st->target = prev_state;
+			undo_cpu_up(cpu, st, steps);
+			break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * The cpu hotplug threads manage the bringup and teardown of the cpus
+ */
+static void cpuhp_create(unsigned int cpu)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+	init_completion(&st->done);
+}
+
+static int cpuhp_should_run(unsigned int cpu)
+{
+	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+	return st->should_run;
+}
+
+/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
+static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+	enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
+
+	return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+}
+
+/* Execute the online startup callbacks. Used to be CPU_ONLINE */
+static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+	return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+}
+
+/*
+ * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
+ * callbacks when a state gets [un]installed at runtime.
+ */
+static void cpuhp_thread_fun(unsigned int cpu)
+{
+	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+	int ret = 0;
+
+	/*
+	 * Paired with the mb() in cpuhp_kick_ap_work and
+	 * cpuhp_invoke_ap_callback, so the work set is consistent visible.
+	 */
+	smp_mb();
+	if (!st->should_run)
+		return;
+
+	st->should_run = false;
+
+	/* Single callback invocation for [un]install ? */
+	if (st->cb) {
+		if (st->cb_state < CPUHP_AP_ONLINE) {
+			local_irq_disable();
+			ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+			local_irq_enable();
+		} else {
+			ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+		}
+	} else {
+		/* Cannot happen .... */
+		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+
+		/* Regular hotplug work */
+		if (st->state < st->target)
+			ret = cpuhp_ap_online(cpu, st);
+		else if (st->state > st->target)
+			ret = cpuhp_ap_offline(cpu, st);
+	}
+	st->result = ret;
+	complete(&st->done);
+}
+
+/* Invoke a single callback on a remote cpu */
+static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
+				    int (*cb)(unsigned int))
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+	if (!cpu_online(cpu))
+		return 0;
+
+	st->cb_state = state;
+	st->cb = cb;
+	/*
+	 * Make sure the above stores are visible before should_run becomes
+	 * true. Paired with the mb() above in cpuhp_thread_fun()
+	 */
+	smp_mb();
+	st->should_run = true;
+	wake_up_process(st->thread);
+	wait_for_completion(&st->done);
+	return st->result;
+}
+
+/* Regular hotplug invocation of the AP hotplug thread */
+static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
+{
+	st->result = 0;
+	st->cb = NULL;
+	/*
+	 * Make sure the above stores are visible before should_run becomes
+	 * true. Paired with the mb() above in cpuhp_thread_fun()
+	 */
+	smp_mb();
+	st->should_run = true;
+	wake_up_process(st->thread);
+}
+
+static int cpuhp_kick_ap_work(unsigned int cpu)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+	enum cpuhp_state state = st->state;
+
+	trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+	__cpuhp_kick_ap_work(st);
+	wait_for_completion(&st->done);
+	trace_cpuhp_exit(cpu, st->state, state, st->result);
+	return st->result;
+}
+
+static struct smp_hotplug_thread cpuhp_threads = {
+	.store			= &cpuhp_state.thread,
+	.create			= &cpuhp_create,
+	.thread_should_run	= cpuhp_should_run,
+	.thread_fn		= cpuhp_thread_fun,
+	.thread_comm		= "cpuhp/%u",
+	.selfparking		= true,
+};
+
+void __init cpuhp_threads_init(void)
+{
+	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
+	kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-
-static void cpu_notify_nofail(unsigned long val, void *v)
-{
-	BUG_ON(cpu_notify(val, v));
-}
 EXPORT_SYMBOL(register_cpu_notifier);
 EXPORT_SYMBOL(__register_cpu_notifier);
-
 void unregister_cpu_notifier(struct notifier_block *nb)
 {
 	cpu_maps_update_begin();
@@ -311,57 +636,60 @@
 	read_unlock(&tasklist_lock);
 }
 
-struct take_cpu_down_param {
-	unsigned long mod;
-	void *hcpu;
-};
+static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
+{
+	BUG_ON(cpu_notify(val, cpu));
+}
+
+static int notify_down_prepare(unsigned int cpu)
+{
+	int err, nr_calls = 0;
+
+	err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls);
+	if (err) {
+		nr_calls--;
+		__cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
+		pr_warn("%s: attempt to take down CPU %u failed\n",
+				__func__, cpu);
+	}
+	return err;
+}
+
+static int notify_dying(unsigned int cpu)
+{
+	cpu_notify(CPU_DYING, cpu);
+	return 0;
+}
 
 /* Take this CPU down. */
 static int take_cpu_down(void *_param)
 {
-	struct take_cpu_down_param *param = _param;
-	int err;
+	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
+	int err, cpu = smp_processor_id();
 
 	/* Ensure this CPU doesn't handle any more interrupts. */
 	err = __cpu_disable();
 	if (err < 0)
 		return err;
 
-	cpu_notify(CPU_DYING | param->mod, param->hcpu);
+	/* Invoke the former CPU_DYING callbacks */
+	for (; st->state > target; st->state--) {
+		struct cpuhp_step *step = cpuhp_ap_states + st->state;
+
+		cpuhp_invoke_callback(cpu, st->state, step->teardown);
+	}
 	/* Give up timekeeping duties */
 	tick_handover_do_timer();
 	/* Park the stopper thread */
-	stop_machine_park((long)param->hcpu);
+	stop_machine_park(cpu);
 	return 0;
 }
 
-/* Requires cpu_add_remove_lock to be held */
-static int _cpu_down(unsigned int cpu, int tasks_frozen)
+static int takedown_cpu(unsigned int cpu)
 {
-	int err, nr_calls = 0;
-	void *hcpu = (void *)(long)cpu;
-	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
-	struct take_cpu_down_param tcd_param = {
-		.mod = mod,
-		.hcpu = hcpu,
-	};
-
-	if (num_online_cpus() == 1)
-		return -EBUSY;
-
-	if (!cpu_online(cpu))
-		return -EINVAL;
-
-	cpu_hotplug_begin();
-
-	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
-	if (err) {
-		nr_calls--;
-		__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
-		pr_warn("%s: attempt to take down CPU %u failed\n",
-			__func__, cpu);
-		goto out_release;
-	}
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+	int err;
 
 	/*
 	 * By now we've cleared cpu_active_mask, wait for all preempt-disabled
@@ -378,6 +706,8 @@
 	else
 		synchronize_rcu();
 
+	/* Park the smpboot threads */
+	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
 	smpboot_park_threads(cpu);
 
 	/*
@@ -389,12 +719,12 @@
 	/*
 	 * So now all preempt/rcu users must observe !cpu_active().
 	 */
-	err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+	err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
 	if (err) {
 		/* CPU didn't die: tell everyone.  Can't complain. */
-		cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
+		cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
 		irq_unlock_sparse();
-		goto out_release;
+		return err;
 	}
 	BUG_ON(cpu_online(cpu));
 
@@ -405,10 +735,8 @@
 	 *
 	 * Wait for the stop thread to go away.
 	 */
-	while (!per_cpu(cpu_dead_idle, cpu))
-		cpu_relax();
-	smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */
-	per_cpu(cpu_dead_idle, cpu) = false;
+	wait_for_completion(&st->done);
+	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
 
 	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
 	irq_unlock_sparse();
@@ -417,20 +745,104 @@
 	/* This actually kills the CPU. */
 	__cpu_die(cpu);
 
-	/* CPU is completely dead: tell everyone.  Too late to complain. */
 	tick_cleanup_dead_cpu(cpu);
-	cpu_notify_nofail(CPU_DEAD | mod, hcpu);
-
-	check_for_tasks(cpu);
-
-out_release:
-	cpu_hotplug_done();
-	if (!err)
-		cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
-	return err;
+	return 0;
 }
 
-int cpu_down(unsigned int cpu)
+static int notify_dead(unsigned int cpu)
+{
+	cpu_notify_nofail(CPU_DEAD, cpu);
+	check_for_tasks(cpu);
+	return 0;
+}
+
+static void cpuhp_complete_idle_dead(void *arg)
+{
+	struct cpuhp_cpu_state *st = arg;
+
+	complete(&st->done);
+}
+
+void cpuhp_report_idle_dead(void)
+{
+	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+	BUG_ON(st->state != CPUHP_AP_OFFLINE);
+	rcu_report_dead(smp_processor_id());
+	st->state = CPUHP_AP_IDLE_DEAD;
+	/*
+	 * We cannot call complete after rcu_report_dead() so we delegate it
+	 * to an online cpu.
+	 */
+	smp_call_function_single(cpumask_first(cpu_online_mask),
+				 cpuhp_complete_idle_dead, st, 0);
+}
+
+#else
+#define notify_down_prepare	NULL
+#define takedown_cpu		NULL
+#define notify_dead		NULL
+#define notify_dying		NULL
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Requires cpu_add_remove_lock to be held */
+static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+			   enum cpuhp_state target)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+	int prev_state, ret = 0;
+	bool hasdied = false;
+
+	if (num_online_cpus() == 1)
+		return -EBUSY;
+
+	if (!cpu_present(cpu))
+		return -EINVAL;
+
+	cpu_hotplug_begin();
+
+	cpuhp_tasks_frozen = tasks_frozen;
+
+	prev_state = st->state;
+	st->target = target;
+	/*
+	 * If the current CPU state is in the range of the AP hotplug thread,
+	 * then we need to kick the thread.
+	 */
+	if (st->state > CPUHP_TEARDOWN_CPU) {
+		ret = cpuhp_kick_ap_work(cpu);
+		/*
+		 * The AP side has done the error rollback already. Just
+		 * return the error code..
+		 */
+		if (ret)
+			goto out;
+
+		/*
+		 * We might have stopped still in the range of the AP hotplug
+		 * thread. Nothing to do anymore.
+		 */
+		if (st->state > CPUHP_TEARDOWN_CPU)
+			goto out;
+	}
+	/*
+	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
+	 * to do the further cleanups.
+	 */
+	ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+
+	hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
+out:
+	cpu_hotplug_done();
+	/* This post dead nonsense must die */
+	if (!ret && hasdied)
+		cpu_notify_nofail(CPU_POST_DEAD, cpu);
+	return ret;
+}
+
+static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
 {
 	int err;
 
@@ -441,100 +853,131 @@
 		goto out;
 	}
 
-	err = _cpu_down(cpu, 0);
+	err = _cpu_down(cpu, 0, target);
 
 out:
 	cpu_maps_update_done();
 	return err;
 }
+int cpu_down(unsigned int cpu)
+{
+	return do_cpu_down(cpu, CPUHP_OFFLINE);
+}
 EXPORT_SYMBOL(cpu_down);
 #endif /*CONFIG_HOTPLUG_CPU*/
 
-/*
- * Unpark per-CPU smpboot kthreads at CPU-online time.
+/**
+ * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * @cpu: cpu that just started
+ *
+ * This function calls the cpu_chain notifiers with CPU_STARTING.
+ * It must be called by the arch code on the new cpu, before the new cpu
+ * enables interrupts and before the "boot" cpu returns from __cpu_up().
  */
-static int smpboot_thread_call(struct notifier_block *nfb,
-			       unsigned long action, void *hcpu)
+void notify_cpu_starting(unsigned int cpu)
 {
-	int cpu = (long)hcpu;
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
 
-	switch (action & ~CPU_TASKS_FROZEN) {
+	while (st->state < target) {
+		struct cpuhp_step *step;
 
-	case CPU_DOWN_FAILED:
-	case CPU_ONLINE:
-		smpboot_unpark_threads(cpu);
-		break;
-
-	default:
-		break;
+		st->state++;
+		step = cpuhp_ap_states + st->state;
+		cpuhp_invoke_callback(cpu, st->state, step->startup);
 	}
-
-	return NOTIFY_OK;
 }
 
-static struct notifier_block smpboot_thread_notifier = {
-	.notifier_call = smpboot_thread_call,
-	.priority = CPU_PRI_SMPBOOT,
-};
-
-void smpboot_thread_init(void)
+/*
+ * Called from the idle task. We need to set active here, so we can kick off
+ * the stopper thread and unpark the smpboot threads. If the target state is
+ * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
+ * cpu further.
+ */
+void cpuhp_online_idle(enum cpuhp_state state)
 {
-	register_cpu_notifier(&smpboot_thread_notifier);
+	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+	unsigned int cpu = smp_processor_id();
+
+	/* Happens for the boot cpu */
+	if (state != CPUHP_AP_ONLINE_IDLE)
+		return;
+
+	st->state = CPUHP_AP_ONLINE_IDLE;
+
+	/* The cpu is marked online, set it active now */
+	set_cpu_active(cpu, true);
+	/* Unpark the stopper thread and the hotplug thread of this cpu */
+	stop_machine_unpark(cpu);
+	kthread_unpark(st->thread);
+
+	/* Should we go further up ? */
+	if (st->target > CPUHP_AP_ONLINE_IDLE)
+		__cpuhp_kick_ap_work(st);
+	else
+		complete(&st->done);
 }
 
 /* Requires cpu_add_remove_lock to be held */
-static int _cpu_up(unsigned int cpu, int tasks_frozen)
+static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 {
-	int ret, nr_calls = 0;
-	void *hcpu = (void *)(long)cpu;
-	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	struct task_struct *idle;
+	int ret = 0;
 
 	cpu_hotplug_begin();
 
-	if (cpu_online(cpu) || !cpu_present(cpu)) {
+	if (!cpu_present(cpu)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	idle = idle_thread_get(cpu);
-	if (IS_ERR(idle)) {
-		ret = PTR_ERR(idle);
-		goto out;
-	}
-
-	ret = smpboot_create_threads(cpu);
-	if (ret)
+	/*
+	 * The caller of do_cpu_up might have raced with another
+	 * caller. Ignore it for now.
+	 */
+	if (st->state >= target)
 		goto out;
 
-	ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
-	if (ret) {
-		nr_calls--;
-		pr_warn("%s: attempt to bring up CPU %u failed\n",
-			__func__, cpu);
-		goto out_notify;
+	if (st->state == CPUHP_OFFLINE) {
+		/* Let it fail before we try to bring the cpu up */
+		idle = idle_thread_get(cpu);
+		if (IS_ERR(idle)) {
+			ret = PTR_ERR(idle);
+			goto out;
+		}
 	}
 
-	/* Arch-specific enabling code. */
-	ret = __cpu_up(cpu, idle);
+	cpuhp_tasks_frozen = tasks_frozen;
 
-	if (ret != 0)
-		goto out_notify;
-	BUG_ON(!cpu_online(cpu));
+	st->target = target;
+	/*
+	 * If the current CPU state is in the range of the AP hotplug thread,
+	 * then we need to kick the thread once more.
+	 */
+	if (st->state > CPUHP_BRINGUP_CPU) {
+		ret = cpuhp_kick_ap_work(cpu);
+		/*
+		 * The AP side has done the error rollback already. Just
+		 * return the error code..
+		 */
+		if (ret)
+			goto out;
+	}
 
-	/* Now call notifier in preparation. */
-	cpu_notify(CPU_ONLINE | mod, hcpu);
-
-out_notify:
-	if (ret != 0)
-		__cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+	/*
+	 * Try to reach the target state. We max out on the BP at
+	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
+	 * responsible for bringing it up to the target state.
+	 */
+	target = min((int)target, CPUHP_BRINGUP_CPU);
+	ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
 out:
 	cpu_hotplug_done();
-
 	return ret;
 }
 
-int cpu_up(unsigned int cpu)
+static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
 {
 	int err = 0;
 
@@ -558,12 +1001,16 @@
 		goto out;
 	}
 
-	err = _cpu_up(cpu, 0);
-
+	err = _cpu_up(cpu, 0, target);
 out:
 	cpu_maps_update_done();
 	return err;
 }
+
+int cpu_up(unsigned int cpu)
+{
+	return do_cpu_up(cpu, CPUHP_ONLINE);
+}
 EXPORT_SYMBOL_GPL(cpu_up);
 
 #ifdef CONFIG_PM_SLEEP_SMP
@@ -586,7 +1033,7 @@
 		if (cpu == first_cpu)
 			continue;
 		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
-		error = _cpu_down(cpu, 1);
+		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
 		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
 		if (!error)
 			cpumask_set_cpu(cpu, frozen_cpus);
@@ -636,7 +1083,7 @@
 
 	for_each_cpu(cpu, frozen_cpus) {
 		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
-		error = _cpu_up(cpu, 1);
+		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
 		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
 		if (!error) {
 			pr_info("CPU%d is up\n", cpu);
@@ -709,26 +1156,463 @@
 
 #endif /* CONFIG_PM_SLEEP_SMP */
 
-/**
- * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
- * @cpu: cpu that just started
- *
- * This function calls the cpu_chain notifiers with CPU_STARTING.
- * It must be called by the arch code on the new cpu, before the new cpu
- * enables interrupts and before the "boot" cpu returns from __cpu_up().
- */
-void notify_cpu_starting(unsigned int cpu)
-{
-	unsigned long val = CPU_STARTING;
+#endif /* CONFIG_SMP */
 
-#ifdef CONFIG_PM_SLEEP_SMP
-	if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
-		val = CPU_STARTING_FROZEN;
-#endif /* CONFIG_PM_SLEEP_SMP */
-	cpu_notify(val, (void *)(long)cpu);
+/* Boot processor state steps */
+static struct cpuhp_step cpuhp_bp_states[] = {
+	[CPUHP_OFFLINE] = {
+		.name			= "offline",
+		.startup		= NULL,
+		.teardown		= NULL,
+	},
+#ifdef CONFIG_SMP
+	[CPUHP_CREATE_THREADS]= {
+		.name			= "threads:create",
+		.startup		= smpboot_create_threads,
+		.teardown		= NULL,
+		.cant_stop		= true,
+	},
+	/*
+	 * Preparatory and dead notifiers. Will be replaced once the notifiers
+	 * are converted to states.
+	 */
+	[CPUHP_NOTIFY_PREPARE] = {
+		.name			= "notify:prepare",
+		.startup		= notify_prepare,
+		.teardown		= notify_dead,
+		.skip_onerr		= true,
+		.cant_stop		= true,
+	},
+	/* Kicks the plugged cpu into life */
+	[CPUHP_BRINGUP_CPU] = {
+		.name			= "cpu:bringup",
+		.startup		= bringup_cpu,
+		.teardown		= NULL,
+		.cant_stop		= true,
+	},
+	/*
+	 * Handled on controll processor until the plugged processor manages
+	 * this itself.
+	 */
+	[CPUHP_TEARDOWN_CPU] = {
+		.name			= "cpu:teardown",
+		.startup		= NULL,
+		.teardown		= takedown_cpu,
+		.cant_stop		= true,
+	},
+#endif
+};
+
+/* Application processor state steps */
+static struct cpuhp_step cpuhp_ap_states[] = {
+#ifdef CONFIG_SMP
+	/* Final state before CPU kills itself */
+	[CPUHP_AP_IDLE_DEAD] = {
+		.name			= "idle:dead",
+	},
+	/*
+	 * Last state before CPU enters the idle loop to die. Transient state
+	 * for synchronization.
+	 */
+	[CPUHP_AP_OFFLINE] = {
+		.name			= "ap:offline",
+		.cant_stop		= true,
+	},
+	/*
+	 * Low level startup/teardown notifiers. Run with interrupts
+	 * disabled. Will be removed once the notifiers are converted to
+	 * states.
+	 */
+	[CPUHP_AP_NOTIFY_STARTING] = {
+		.name			= "notify:starting",
+		.startup		= notify_starting,
+		.teardown		= notify_dying,
+		.skip_onerr		= true,
+		.cant_stop		= true,
+	},
+	/* Entry state on starting. Interrupts enabled from here on. Transient
+	 * state for synchronsization */
+	[CPUHP_AP_ONLINE] = {
+		.name			= "ap:online",
+	},
+	/* Handle smpboot threads park/unpark */
+	[CPUHP_AP_SMPBOOT_THREADS] = {
+		.name			= "smpboot:threads",
+		.startup		= smpboot_unpark_threads,
+		.teardown		= NULL,
+	},
+	/*
+	 * Online/down_prepare notifiers. Will be removed once the notifiers
+	 * are converted to states.
+	 */
+	[CPUHP_AP_NOTIFY_ONLINE] = {
+		.name			= "notify:online",
+		.startup		= notify_online,
+		.teardown		= notify_down_prepare,
+	},
+#endif
+	/*
+	 * The dynamically registered state space is here
+	 */
+
+	/* CPU is fully up and running. */
+	[CPUHP_ONLINE] = {
+		.name			= "online",
+		.startup		= NULL,
+		.teardown		= NULL,
+	},
+};
+
+/* Sanity check for callbacks */
+static int cpuhp_cb_check(enum cpuhp_state state)
+{
+	if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
+		return -EINVAL;
+	return 0;
 }
 
-#endif /* CONFIG_SMP */
+static bool cpuhp_is_ap_state(enum cpuhp_state state)
+{
+	/*
+	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+	 * purposes as that state is handled explicitely in cpu_down.
+	 */
+	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+}
+
+static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+{
+	struct cpuhp_step *sp;
+
+	sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+	return sp + state;
+}
+
+static void cpuhp_store_callbacks(enum cpuhp_state state,
+				  const char *name,
+				  int (*startup)(unsigned int cpu),
+				  int (*teardown)(unsigned int cpu))
+{
+	/* (Un)Install the callbacks for further cpu hotplug operations */
+	struct cpuhp_step *sp;
+
+	mutex_lock(&cpuhp_state_mutex);
+	sp = cpuhp_get_step(state);
+	sp->startup = startup;
+	sp->teardown = teardown;
+	sp->name = name;
+	mutex_unlock(&cpuhp_state_mutex);
+}
+
+static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
+{
+	return cpuhp_get_step(state)->teardown;
+}
+
+/*
+ * Call the startup/teardown function for a step either on the AP or
+ * on the current CPU.
+ */
+static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
+			    int (*cb)(unsigned int), bool bringup)
+{
+	int ret;
+
+	if (!cb)
+		return 0;
+	/*
+	 * The non AP bound callbacks can fail on bringup. On teardown
+	 * e.g. module removal we crash for now.
+	 */
+#ifdef CONFIG_SMP
+	if (cpuhp_is_ap_state(state))
+		ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+	else
+		ret = cpuhp_invoke_callback(cpu, state, cb);
+#else
+	ret = cpuhp_invoke_callback(cpu, state, cb);
+#endif
+	BUG_ON(ret && !bringup);
+	return ret;
+}
+
+/*
+ * Called from __cpuhp_setup_state on a recoverable failure.
+ *
+ * Note: The teardown callbacks for rollback are not allowed to fail!
+ */
+static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
+				   int (*teardown)(unsigned int cpu))
+{
+	int cpu;
+
+	if (!teardown)
+		return;
+
+	/* Roll back the already executed steps on the other cpus */
+	for_each_present_cpu(cpu) {
+		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+		int cpustate = st->state;
+
+		if (cpu >= failedcpu)
+			break;
+
+		/* Did we invoke the startup call on that cpu ? */
+		if (cpustate >= state)
+			cpuhp_issue_call(cpu, state, teardown, false);
+	}
+}
+
+/*
+ * Returns a free for dynamic slot assignment of the Online state. The states
+ * are protected by the cpuhp_slot_states mutex and an empty slot is identified
+ * by having no name assigned.
+ */
+static int cpuhp_reserve_state(enum cpuhp_state state)
+{
+	enum cpuhp_state i;
+
+	mutex_lock(&cpuhp_state_mutex);
+	for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
+		if (cpuhp_ap_states[i].name)
+			continue;
+
+		cpuhp_ap_states[i].name = "Reserved";
+		mutex_unlock(&cpuhp_state_mutex);
+		return i;
+	}
+	mutex_unlock(&cpuhp_state_mutex);
+	WARN(1, "No more dynamic states available for CPU hotplug\n");
+	return -ENOSPC;
+}
+
+/**
+ * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
+ * @state:	The state to setup
+ * @invoke:	If true, the startup function is invoked for cpus where
+ *		cpu state >= @state
+ * @startup:	startup callback function
+ * @teardown:	teardown callback function
+ *
+ * Returns 0 if successful, otherwise a proper error code
+ */
+int __cpuhp_setup_state(enum cpuhp_state state,
+			const char *name, bool invoke,
+			int (*startup)(unsigned int cpu),
+			int (*teardown)(unsigned int cpu))
+{
+	int cpu, ret = 0;
+	int dyn_state = 0;
+
+	if (cpuhp_cb_check(state) || !name)
+		return -EINVAL;
+
+	get_online_cpus();
+
+	/* currently assignments for the ONLINE state are possible */
+	if (state == CPUHP_AP_ONLINE_DYN) {
+		dyn_state = 1;
+		ret = cpuhp_reserve_state(state);
+		if (ret < 0)
+			goto out;
+		state = ret;
+	}
+
+	cpuhp_store_callbacks(state, name, startup, teardown);
+
+	if (!invoke || !startup)
+		goto out;
+
+	/*
+	 * Try to call the startup callback for each present cpu
+	 * depending on the hotplug state of the cpu.
+	 */
+	for_each_present_cpu(cpu) {
+		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+		int cpustate = st->state;
+
+		if (cpustate < state)
+			continue;
+
+		ret = cpuhp_issue_call(cpu, state, startup, true);
+		if (ret) {
+			cpuhp_rollback_install(cpu, state, teardown);
+			cpuhp_store_callbacks(state, NULL, NULL, NULL);
+			goto out;
+		}
+	}
+out:
+	put_online_cpus();
+	if (!ret && dyn_state)
+		return state;
+	return ret;
+}
+EXPORT_SYMBOL(__cpuhp_setup_state);
+
+/**
+ * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
+ * @state:	The state to remove
+ * @invoke:	If true, the teardown function is invoked for cpus where
+ *		cpu state >= @state
+ *
+ * The teardown callback is currently not allowed to fail. Think
+ * about module removal!
+ */
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
+{
+	int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+	int cpu;
+
+	BUG_ON(cpuhp_cb_check(state));
+
+	get_online_cpus();
+
+	if (!invoke || !teardown)
+		goto remove;
+
+	/*
+	 * Call the teardown callback for each present cpu depending
+	 * on the hotplug state of the cpu. This function is not
+	 * allowed to fail currently!
+	 */
+	for_each_present_cpu(cpu) {
+		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+		int cpustate = st->state;
+
+		if (cpustate >= state)
+			cpuhp_issue_call(cpu, state, teardown, false);
+	}
+remove:
+	cpuhp_store_callbacks(state, NULL, NULL, NULL);
+	put_online_cpus();
+}
+EXPORT_SYMBOL(__cpuhp_remove_state);
+
+#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
+static ssize_t show_cpuhp_state(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+	return sprintf(buf, "%d\n", st->state);
+}
+static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
+
+static ssize_t write_cpuhp_target(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+	struct cpuhp_step *sp;
+	int target, ret;
+
+	ret = kstrtoint(buf, 10, &target);
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
+	if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
+		return -EINVAL;
+#else
+	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
+		return -EINVAL;
+#endif
+
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
+
+	mutex_lock(&cpuhp_state_mutex);
+	sp = cpuhp_get_step(target);
+	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
+	mutex_unlock(&cpuhp_state_mutex);
+	if (ret)
+		return ret;
+
+	if (st->state < target)
+		ret = do_cpu_up(dev->id, target);
+	else
+		ret = do_cpu_down(dev->id, target);
+
+	unlock_device_hotplug();
+	return ret ? ret : count;
+}
+
+static ssize_t show_cpuhp_target(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+	return sprintf(buf, "%d\n", st->target);
+}
+static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
+
+static struct attribute *cpuhp_cpu_attrs[] = {
+	&dev_attr_state.attr,
+	&dev_attr_target.attr,
+	NULL
+};
+
+static struct attribute_group cpuhp_cpu_attr_group = {
+	.attrs = cpuhp_cpu_attrs,
+	.name = "hotplug",
+	NULL
+};
+
+static ssize_t show_cpuhp_states(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	ssize_t cur, res = 0;
+	int i;
+
+	mutex_lock(&cpuhp_state_mutex);
+	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
+		struct cpuhp_step *sp = cpuhp_get_step(i);
+
+		if (sp->name) {
+			cur = sprintf(buf, "%3d: %s\n", i, sp->name);
+			buf += cur;
+			res += cur;
+		}
+	}
+	mutex_unlock(&cpuhp_state_mutex);
+	return res;
+}
+static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
+
+static struct attribute *cpuhp_cpu_root_attrs[] = {
+	&dev_attr_states.attr,
+	NULL
+};
+
+static struct attribute_group cpuhp_cpu_root_attr_group = {
+	.attrs = cpuhp_cpu_root_attrs,
+	.name = "hotplug",
+	NULL
+};
+
+static int __init cpuhp_sysfs_init(void)
+{
+	int cpu, ret;
+
+	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+				 &cpuhp_cpu_root_attr_group);
+	if (ret)
+		return ret;
+
+	for_each_possible_cpu(cpu) {
+		struct device *dev = get_cpu_device(cpu);
+
+		if (!dev)
+			continue;
+		ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+device_initcall(cpuhp_sysfs_init);
+#endif
 
 /*
  * cpu_bit_bitmap[] is a special, "compressed" data structure that
@@ -789,3 +1673,25 @@
 {
 	cpumask_copy(&__cpu_online_mask, src);
 }
+
+/*
+ * Activate the first processor.
+ */
+void __init boot_cpu_init(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
+	set_cpu_online(cpu, true);
+	set_cpu_active(cpu, true);
+	set_cpu_present(cpu, true);
+	set_cpu_possible(cpu, true);
+}
+
+/*
+ * Must be called _AFTER_ setting up the per_cpu areas
+ */
+void __init boot_cpu_state_init(void)
+{
+	per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
+}

diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index e1dbf4a..90ff129 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c

@@ -153,13 +153,11 @@
 	} else {
 		kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
 			   __func__, bp->bp_addr);
-#ifdef CONFIG_DEBUG_RODATA
 		if (!bp->bp_type) {
 			kdb_printf("Software breakpoints are unavailable.\n"
-				   "  Change the kernel CONFIG_DEBUG_RODATA=n\n"
+				   "  Boot the kernel with rodata=off\n"
 				   "  OR use hw breaks: help bph\n");
 		}
-#endif
 		return 1;
 	}
 	return 0;

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0d58522..712570d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -64,8 +64,17 @@
 	struct task_struct *p = tfc->p;
 
 	if (p) {
-		tfc->ret = -EAGAIN;
-		if (task_cpu(p) != smp_processor_id() || !task_curr(p))
+		/* -EAGAIN */
+		if (task_cpu(p) != smp_processor_id())
+			return;
+
+		/*
+		 * Now that we're on right CPU with IRQs disabled, we can test
+		 * if we hit the right task without races.
+		 */
+
+		tfc->ret = -ESRCH; /* No such (running) process */
+		if (p != current)
 			return;
 	}
 
@@ -92,13 +101,17 @@
 		.p	= p,
 		.func	= func,
 		.info	= info,
-		.ret	= -ESRCH, /* No such (running) process */
+		.ret	= -EAGAIN,
 	};
+	int ret;
 
-	if (task_curr(p))
-		smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+	do {
+		ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+		if (!ret)
+			ret = data.ret;
+	} while (ret == -EAGAIN);
 
-	return data.ret;
+	return ret;
 }
 
 /**
@@ -169,19 +182,6 @@
  *    rely on ctx->is_active and therefore cannot use event_function_call().
  *    See perf_install_in_context().
  *
- * This is because we need a ctx->lock serialized variable (ctx->is_active)
- * to reliably determine if a particular task/context is scheduled in. The
- * task_curr() use in task_function_call() is racy in that a remote context
- * switch is not a single atomic operation.
- *
- * As is, the situation is 'safe' because we set rq->curr before we do the
- * actual context switch. This means that task_curr() will fail early, but
- * we'll continue spinning on ctx->is_active until we've passed
- * perf_event_task_sched_out().
- *
- * Without this ctx->lock serialized variable we could have race where we find
- * the task (and hence the context) would not be active while in fact they are.
- *
  * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
  */
 
@@ -212,7 +212,7 @@
 	 */
 	if (ctx->task) {
 		if (ctx->task != current) {
-			ret = -EAGAIN;
+			ret = -ESRCH;
 			goto unlock;
 		}
 
@@ -276,10 +276,10 @@
 		return;
 	}
 
-again:
 	if (task == TASK_TOMBSTONE)
 		return;
 
+again:
 	if (!task_function_call(task, event_function, &efs))
 		return;
 
@@ -289,13 +289,15 @@
 	 * a concurrent perf_event_context_sched_out().
 	 */
 	task = ctx->task;
-	if (task != TASK_TOMBSTONE) {
-		if (ctx->is_active) {
-			raw_spin_unlock_irq(&ctx->lock);
-			goto again;
-		}
-		func(event, NULL, ctx, data);
+	if (task == TASK_TOMBSTONE) {
+		raw_spin_unlock_irq(&ctx->lock);
+		return;
 	}
+	if (ctx->is_active) {
+		raw_spin_unlock_irq(&ctx->lock);
+		goto again;
+	}
+	func(event, NULL, ctx, data);
 	raw_spin_unlock_irq(&ctx->lock);
 }
 
@@ -314,6 +316,7 @@
 enum event_type_t {
 	EVENT_FLEXIBLE = 0x1,
 	EVENT_PINNED = 0x2,
+	EVENT_TIME = 0x4,
 	EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
 };
 
@@ -321,7 +324,13 @@
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-struct static_key_deferred perf_sched_events __read_mostly;
+
+static void perf_sched_delayed(struct work_struct *work);
+DEFINE_STATIC_KEY_FALSE(perf_sched_events);
+static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
+static DEFINE_MUTEX(perf_sched_mutex);
+static atomic_t perf_sched_count;
+
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
@@ -1288,16 +1297,18 @@
 
 /*
  * Update the total_time_enabled and total_time_running fields for a event.
- * The caller of this function needs to hold the ctx->lock.
  */
 static void update_event_times(struct perf_event *event)
 {
 	struct perf_event_context *ctx = event->ctx;
 	u64 run_end;
 
+	lockdep_assert_held(&ctx->lock);
+
 	if (event->state < PERF_EVENT_STATE_INACTIVE ||
 	    event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
 		return;
+
 	/*
 	 * in cgroup mode, time_enabled represents
 	 * the time the event was enabled AND active
@@ -1645,7 +1656,7 @@
 
 static bool is_orphaned_event(struct perf_event *event)
 {
-	return event->state == PERF_EVENT_STATE_EXIT;
+	return event->state == PERF_EVENT_STATE_DEAD;
 }
 
 static inline int pmu_filter_match(struct perf_event *event)
@@ -1690,14 +1701,14 @@
 
 	perf_pmu_disable(event->pmu);
 
+	event->tstamp_stopped = tstamp;
+	event->pmu->del(event, 0);
+	event->oncpu = -1;
 	event->state = PERF_EVENT_STATE_INACTIVE;
 	if (event->pending_disable) {
 		event->pending_disable = 0;
 		event->state = PERF_EVENT_STATE_OFF;
 	}
-	event->tstamp_stopped = tstamp;
-	event->pmu->del(event, 0);
-	event->oncpu = -1;
 
 	if (!is_software_event(event))
 		cpuctx->active_oncpu--;
@@ -1732,7 +1743,6 @@
 }
 
 #define DETACH_GROUP	0x01UL
-#define DETACH_STATE	0x02UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -1752,8 +1762,6 @@
 	if (flags & DETACH_GROUP)
 		perf_group_detach(event);
 	list_del_event(event, ctx);
-	if (flags & DETACH_STATE)
-		event->state = PERF_EVENT_STATE_EXIT;
 
 	if (!ctx->nr_events && ctx->is_active) {
 		ctx->is_active = 0;
@@ -2063,14 +2071,27 @@
 	event->tstamp_stopped = tstamp;
 }
 
-static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
-			       struct perf_event_context *ctx);
+static void ctx_sched_out(struct perf_event_context *ctx,
+			  struct perf_cpu_context *cpuctx,
+			  enum event_type_t event_type);
 static void
 ctx_sched_in(struct perf_event_context *ctx,
 	     struct perf_cpu_context *cpuctx,
 	     enum event_type_t event_type,
 	     struct task_struct *task);
 
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+			       struct perf_event_context *ctx)
+{
+	if (!cpuctx->task_ctx)
+		return;
+
+	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+		return;
+
+	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+}
+
 static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
 				struct perf_event_context *ctx,
 				struct task_struct *task)
@@ -2097,49 +2118,68 @@
 /*
  * Cross CPU call to install and enable a performance event
  *
- * Must be called with ctx->mutex held
+ * Very similar to remote_function() + event_function() but cannot assume that
+ * things like ctx->is_active and cpuctx->task_ctx are set.
  */
 static int  __perf_install_in_context(void *info)
 {
-	struct perf_event_context *ctx = info;
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
+	bool activate = true;
+	int ret = 0;
 
 	raw_spin_lock(&cpuctx->ctx.lock);
 	if (ctx->task) {
 		raw_spin_lock(&ctx->lock);
-		/*
-		 * If we hit the 'wrong' task, we've since scheduled and
-		 * everything should be sorted, nothing to do!
-		 */
 		task_ctx = ctx;
-		if (ctx->task != current)
+
+		/* If we're on the wrong CPU, try again */
+		if (task_cpu(ctx->task) != smp_processor_id()) {
+			ret = -ESRCH;
 			goto unlock;
+		}
 
 		/*
-		 * If task_ctx is set, it had better be to us.
+		 * If we're on the right CPU, see if the task we target is
+		 * current, if not we don't have to activate the ctx, a future
+		 * context switch will do that for us.
 		 */
-		WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
+		if (ctx->task != current)
+			activate = false;
+		else
+			WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
+
 	} else if (task_ctx) {
 		raw_spin_lock(&task_ctx->lock);
 	}
 
-	ctx_resched(cpuctx, task_ctx);
+	if (activate) {
+		ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+		add_event_to_ctx(event, ctx);
+		ctx_resched(cpuctx, task_ctx);
+	} else {
+		add_event_to_ctx(event, ctx);
+	}
+
 unlock:
 	perf_ctx_unlock(cpuctx, task_ctx);
 
-	return 0;
+	return ret;
 }
 
 /*
- * Attach a performance event to a context
+ * Attach a performance event to a context.
+ *
+ * Very similar to event_function_call, see comment there.
  */
 static void
 perf_install_in_context(struct perf_event_context *ctx,
 			struct perf_event *event,
 			int cpu)
 {
-	struct task_struct *task = NULL;
+	struct task_struct *task = READ_ONCE(ctx->task);
 
 	lockdep_assert_held(&ctx->mutex);
 
@@ -2147,40 +2187,46 @@
 	if (event->cpu != -1)
 		event->cpu = cpu;
 
+	if (!task) {
+		cpu_function_call(cpu, __perf_install_in_context, event);
+		return;
+	}
+
+	/*
+	 * Should not happen, we validate the ctx is still alive before calling.
+	 */
+	if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
+		return;
+
 	/*
 	 * Installing events is tricky because we cannot rely on ctx->is_active
 	 * to be set in case this is the nr_events 0 -> 1 transition.
-	 *
-	 * So what we do is we add the event to the list here, which will allow
-	 * a future context switch to DTRT and then send a racy IPI. If the IPI
-	 * fails to hit the right task, this means a context switch must have
-	 * happened and that will have taken care of business.
 	 */
+again:
+	/*
+	 * Cannot use task_function_call() because we need to run on the task's
+	 * CPU regardless of whether its current or not.
+	 */
+	if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
+		return;
+
 	raw_spin_lock_irq(&ctx->lock);
 	task = ctx->task;
-	/*
-	 * Worse, we cannot even rely on the ctx actually existing anymore. If
-	 * between find_get_context() and perf_install_in_context() the task
-	 * went through perf_event_exit_task() its dead and we should not be
-	 * adding new events.
-	 */
-	if (task == TASK_TOMBSTONE) {
+	if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
+		/*
+		 * Cannot happen because we already checked above (which also
+		 * cannot happen), and we hold ctx->mutex, which serializes us
+		 * against perf_event_exit_task_context().
+		 */
 		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
-	update_context_time(ctx);
-	/*
-	 * Update cgrp time only if current cgrp matches event->cgrp.
-	 * Must be done before calling add_event_to_ctx().
-	 */
-	update_cgrp_time_from_event(event);
-	add_event_to_ctx(event, ctx);
 	raw_spin_unlock_irq(&ctx->lock);
-
-	if (task)
-		task_function_call(task, __perf_install_in_context, ctx);
-	else
-		cpu_function_call(cpu, __perf_install_in_context, ctx);
+	/*
+	 * Since !ctx->is_active doesn't mean anything, we must IPI
+	 * unconditionally.
+	 */
+	goto again;
 }
 
 /*
@@ -2219,17 +2265,18 @@
 	    event->state <= PERF_EVENT_STATE_ERROR)
 		return;
 
-	update_context_time(ctx);
+	if (ctx->is_active)
+		ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+
 	__perf_event_mark_enabled(event);
 
 	if (!ctx->is_active)
 		return;
 
 	if (!event_filter_match(event)) {
-		if (is_cgroup_event(event)) {
-			perf_cgroup_set_timestamp(current, ctx); // XXX ?
+		if (is_cgroup_event(event))
 			perf_cgroup_defer_enabled(event);
-		}
+		ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
 		return;
 	}
 
@@ -2237,8 +2284,10 @@
 	 * If the event is in a group and isn't the group leader,
 	 * then don't put it on unless the group is on.
 	 */
-	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
+	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
+		ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
 		return;
+	}
 
 	task_ctx = cpuctx->task_ctx;
 	if (ctx->task)
@@ -2344,24 +2393,33 @@
 	}
 
 	ctx->is_active &= ~event_type;
+	if (!(ctx->is_active & EVENT_ALL))
+		ctx->is_active = 0;
+
 	if (ctx->task) {
 		WARN_ON_ONCE(cpuctx->task_ctx != ctx);
 		if (!ctx->is_active)
 			cpuctx->task_ctx = NULL;
 	}
 
-	update_context_time(ctx);
-	update_cgrp_time_from_cpuctx(cpuctx);
-	if (!ctx->nr_active)
+	is_active ^= ctx->is_active; /* changed bits */
+
+	if (is_active & EVENT_TIME) {
+		/* update (and stop) ctx time */
+		update_context_time(ctx);
+		update_cgrp_time_from_cpuctx(cpuctx);
+	}
+
+	if (!ctx->nr_active || !(is_active & EVENT_ALL))
 		return;
 
 	perf_pmu_disable(ctx->pmu);
-	if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
+	if (is_active & EVENT_PINNED) {
 		list_for_each_entry(event, &ctx->pinned_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
 
-	if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
+	if (is_active & EVENT_FLEXIBLE) {
 		list_for_each_entry(event, &ctx->flexible_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
@@ -2641,18 +2699,6 @@
 		perf_cgroup_sched_out(task, next);
 }
 
-static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
-			       struct perf_event_context *ctx)
-{
-	if (!cpuctx->task_ctx)
-		return;
-
-	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
-		return;
-
-	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-}
-
 /*
  * Called with IRQs disabled
  */
@@ -2735,7 +2781,7 @@
 	if (likely(!ctx->nr_events))
 		return;
 
-	ctx->is_active |= event_type;
+	ctx->is_active |= (event_type | EVENT_TIME);
 	if (ctx->task) {
 		if (!is_active)
 			cpuctx->task_ctx = ctx;
@@ -2743,18 +2789,24 @@
 			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
 	}
 
-	now = perf_clock();
-	ctx->timestamp = now;
-	perf_cgroup_set_timestamp(task, ctx);
+	is_active ^= ctx->is_active; /* changed bits */
+
+	if (is_active & EVENT_TIME) {
+		/* start ctx time */
+		now = perf_clock();
+		ctx->timestamp = now;
+		perf_cgroup_set_timestamp(task, ctx);
+	}
+
 	/*
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
 	 */
-	if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
+	if (is_active & EVENT_PINNED)
 		ctx_pinned_sched_in(ctx, cpuctx);
 
 	/* Then walk through the lower prio flexible groups */
-	if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
+	if (is_active & EVENT_FLEXIBLE)
 		ctx_flexible_sched_in(ctx, cpuctx);
 }
 
@@ -3060,17 +3112,6 @@
 	return rotate;
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-bool perf_event_can_stop_tick(void)
-{
-	if (atomic_read(&nr_freq_events) ||
-	    __this_cpu_read(perf_throttled_count))
-		return false;
-	else
-		return true;
-}
-#endif
-
 void perf_event_task_tick(void)
 {
 	struct list_head *head = this_cpu_ptr(&active_ctx_list);
@@ -3081,6 +3122,7 @@
 
 	__this_cpu_inc(perf_throttled_seq);
 	throttled = __this_cpu_xchg(perf_throttled_count, 0);
+	tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
 
 	list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
 		perf_adjust_freq_unthr_context(ctx, throttled);
@@ -3120,6 +3162,7 @@
 
 	cpuctx = __get_cpu_context(ctx);
 	perf_ctx_lock(cpuctx, ctx);
+	ctx_sched_out(ctx, cpuctx, EVENT_TIME);
 	list_for_each_entry(event, &ctx->event_list, event_entry)
 		enabled |= event_enable_on_exec(event, ctx);
 
@@ -3511,6 +3554,28 @@
 		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static DEFINE_SPINLOCK(nr_freq_lock);
+#endif
+
+static void unaccount_freq_event_nohz(void)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	spin_lock(&nr_freq_lock);
+	if (atomic_dec_and_test(&nr_freq_events))
+		tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
+	spin_unlock(&nr_freq_lock);
+#endif
+}
+
+static void unaccount_freq_event(void)
+{
+	if (tick_nohz_full_enabled())
+		unaccount_freq_event_nohz();
+	else
+		atomic_dec(&nr_freq_events);
+}
+
 static void unaccount_event(struct perf_event *event)
 {
 	bool dec = false;
@@ -3527,7 +3592,7 @@
 	if (event->attr.task)
 		atomic_dec(&nr_task_events);
 	if (event->attr.freq)
-		atomic_dec(&nr_freq_events);
+		unaccount_freq_event();
 	if (event->attr.context_switch) {
 		dec = true;
 		atomic_dec(&nr_switch_events);
@@ -3537,12 +3602,22 @@
 	if (has_branch_stack(event))
 		dec = true;
 
-	if (dec)
-		static_key_slow_dec_deferred(&perf_sched_events);
+	if (dec) {
+		if (!atomic_add_unless(&perf_sched_count, -1, 1))
+			schedule_delayed_work(&perf_sched_work, HZ);
+	}
 
 	unaccount_event_cpu(event, event->cpu);
 }
 
+static void perf_sched_delayed(struct work_struct *work)
+{
+	mutex_lock(&perf_sched_mutex);
+	if (atomic_dec_and_test(&perf_sched_count))
+		static_branch_disable(&perf_sched_events);
+	mutex_unlock(&perf_sched_mutex);
+}
+
 /*
  * The following implement mutual exclusion of events on "exclusive" pmus
  * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
@@ -3752,30 +3827,42 @@
  */
 int perf_event_release_kernel(struct perf_event *event)
 {
-	struct perf_event_context *ctx;
+	struct perf_event_context *ctx = event->ctx;
 	struct perf_event *child, *tmp;
 
+	/*
+	 * If we got here through err_file: fput(event_file); we will not have
+	 * attached to a context yet.
+	 */
+	if (!ctx) {
+		WARN_ON_ONCE(event->attach_state &
+				(PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
+		goto no_ctx;
+	}
+
 	if (!is_kernel_event(event))
 		perf_remove_from_owner(event);
 
 	ctx = perf_event_ctx_lock(event);
 	WARN_ON_ONCE(ctx->parent_ctx);
-	perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
-	perf_event_ctx_unlock(event, ctx);
+	perf_remove_from_context(event, DETACH_GROUP);
 
+	raw_spin_lock_irq(&ctx->lock);
 	/*
-	 * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
-	 * either from the above perf_remove_from_context() or through
-	 * perf_event_exit_event().
+	 * Mark this even as STATE_DEAD, there is no external reference to it
+	 * anymore.
 	 *
-	 * Therefore, anybody acquiring event->child_mutex after the below
-	 * loop _must_ also see this, most importantly inherit_event() which
-	 * will avoid placing more children on the list.
+	 * Anybody acquiring event->child_mutex after the below loop _must_
+	 * also see this, most importantly inherit_event() which will avoid
+	 * placing more children on the list.
 	 *
 	 * Thus this guarantees that we will in fact observe and kill _ALL_
 	 * child events.
 	 */
-	WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
+	event->state = PERF_EVENT_STATE_DEAD;
+	raw_spin_unlock_irq(&ctx->lock);
+
+	perf_event_ctx_unlock(event, ctx);
 
 again:
 	mutex_lock(&event->child_mutex);
@@ -3830,8 +3917,8 @@
 	}
 	mutex_unlock(&event->child_mutex);
 
-	/* Must be the last reference */
-	put_event(event);
+no_ctx:
+	put_event(event); /* Must be the 'last' reference */
 	return 0;
 }
 EXPORT_SYMBOL_GPL(perf_event_release_kernel);
@@ -3988,7 +4075,7 @@
 {
 	bool no_children;
 
-	if (event->state != PERF_EVENT_STATE_EXIT)
+	if (event->state > PERF_EVENT_STATE_EXIT)
 		return false;
 
 	mutex_lock(&event->child_mutex);
@@ -6349,9 +6436,9 @@
 		if (unlikely(throttle
 			     && hwc->interrupts >= max_samples_per_tick)) {
 			__this_cpu_inc(perf_throttled_count);
+			tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
 			hwc->interrupts = MAX_INTERRUPTS;
 			perf_log_throttle(event, 0);
-			tick_nohz_full_kick();
 			ret = 1;
 		}
 	}
@@ -6710,7 +6797,7 @@
 	kfree_rcu(hlist, rcu_head);
 }
 
-static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
+static void swevent_hlist_put_cpu(int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
@@ -6722,15 +6809,15 @@
 	mutex_unlock(&swhash->hlist_mutex);
 }
 
-static void swevent_hlist_put(struct perf_event *event)
+static void swevent_hlist_put(void)
 {
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		swevent_hlist_put_cpu(event, cpu);
+		swevent_hlist_put_cpu(cpu);
 }
 
-static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
+static int swevent_hlist_get_cpu(int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 	int err = 0;
@@ -6753,14 +6840,13 @@
 	return err;
 }
 
-static int swevent_hlist_get(struct perf_event *event)
+static int swevent_hlist_get(void)
 {
-	int err;
-	int cpu, failed_cpu;
+	int err, cpu, failed_cpu;
 
 	get_online_cpus();
 	for_each_possible_cpu(cpu) {
-		err = swevent_hlist_get_cpu(event, cpu);
+		err = swevent_hlist_get_cpu(cpu);
 		if (err) {
 			failed_cpu = cpu;
 			goto fail;
@@ -6773,7 +6859,7 @@
 	for_each_possible_cpu(cpu) {
 		if (cpu == failed_cpu)
 			break;
-		swevent_hlist_put_cpu(event, cpu);
+		swevent_hlist_put_cpu(cpu);
 	}
 
 	put_online_cpus();
@@ -6789,7 +6875,7 @@
 	WARN_ON(event->parent);
 
 	static_key_slow_dec(&perf_swevent_enabled[event_id]);
-	swevent_hlist_put(event);
+	swevent_hlist_put();
 }
 
 static int perf_swevent_init(struct perf_event *event)
@@ -6820,7 +6906,7 @@
 	if (!event->parent) {
 		int err;
 
-		err = swevent_hlist_get(event);
+		err = swevent_hlist_get();
 		if (err)
 			return err;
 
@@ -7741,6 +7827,27 @@
 		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
 }
 
+/* Freq events need the tick to stay alive (see perf_event_task_tick). */
+static void account_freq_event_nohz(void)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	/* Lock so we don't race with concurrent unaccount */
+	spin_lock(&nr_freq_lock);
+	if (atomic_inc_return(&nr_freq_events) == 1)
+		tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
+	spin_unlock(&nr_freq_lock);
+#endif
+}
+
+static void account_freq_event(void)
+{
+	if (tick_nohz_full_enabled())
+		account_freq_event_nohz();
+	else
+		atomic_inc(&nr_freq_events);
+}
+
+
 static void account_event(struct perf_event *event)
 {
 	bool inc = false;
@@ -7756,10 +7863,8 @@
 		atomic_inc(&nr_comm_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
-	if (event->attr.freq) {
-		if (atomic_inc_return(&nr_freq_events) == 1)
-			tick_nohz_full_kick_all();
-	}
+	if (event->attr.freq)
+		account_freq_event();
 	if (event->attr.context_switch) {
 		atomic_inc(&nr_switch_events);
 		inc = true;
@@ -7769,8 +7874,28 @@
 	if (is_cgroup_event(event))
 		inc = true;
 
-	if (inc)
-		static_key_slow_inc(&perf_sched_events.key);
+	if (inc) {
+		if (atomic_inc_not_zero(&perf_sched_count))
+			goto enabled;
+
+		mutex_lock(&perf_sched_mutex);
+		if (!atomic_read(&perf_sched_count)) {
+			static_branch_enable(&perf_sched_events);
+			/*
+			 * Guarantee that all CPUs observe they key change and
+			 * call the perf scheduling hooks before proceeding to
+			 * install events that need them.
+			 */
+			synchronize_sched();
+		}
+		/*
+		 * Now that we have waited for the sync_sched(), allow further
+		 * increments to by-pass the mutex.
+		 */
+		atomic_inc(&perf_sched_count);
+		mutex_unlock(&perf_sched_mutex);
+	}
+enabled:
 
 	account_event_cpu(event, event->cpu);
 }
@@ -7906,6 +8031,9 @@
 		}
 	}
 
+	/* symmetric to unaccount_event() in _free_event() */
+	account_event(event);
+
 	return event;
 
 err_per_task:
@@ -8269,8 +8397,6 @@
 		}
 	}
 
-	account_event(event);
-
 	/*
 	 * Special case software events and allow them to be part of
 	 * any hardware group.
@@ -8389,10 +8515,19 @@
 	if (move_group) {
 		gctx = group_leader->ctx;
 		mutex_lock_double(&gctx->mutex, &ctx->mutex);
+		if (gctx->task == TASK_TOMBSTONE) {
+			err = -ESRCH;
+			goto err_locked;
+		}
 	} else {
 		mutex_lock(&ctx->mutex);
 	}
 
+	if (ctx->task == TASK_TOMBSTONE) {
+		err = -ESRCH;
+		goto err_locked;
+	}
+
 	if (!perf_event_validate_size(event)) {
 		err = -E2BIG;
 		goto err_locked;
@@ -8509,7 +8644,12 @@
 	perf_unpin_context(ctx);
 	put_ctx(ctx);
 err_alloc:
-	free_event(event);
+	/*
+	 * If event_file is set, the fput() above will have called ->release()
+	 * and that will take care of freeing the event.
+	 */
+	if (!event_file)
+		free_event(event);
 err_cpus:
 	put_online_cpus();
 err_task:
@@ -8553,8 +8693,6 @@
 	/* Mark owner so we could distinguish it from user events. */
 	event->owner = TASK_TOMBSTONE;
 
-	account_event(event);
-
 	ctx = find_get_context(event->pmu, task, event);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
@@ -8563,12 +8701,14 @@
 
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
+	if (ctx->task == TASK_TOMBSTONE) {
+		err = -ESRCH;
+		goto err_unlock;
+	}
+
 	if (!exclusive_event_installable(event, ctx)) {
-		mutex_unlock(&ctx->mutex);
-		perf_unpin_context(ctx);
-		put_ctx(ctx);
 		err = -EBUSY;
-		goto err_free;
+		goto err_unlock;
 	}
 
 	perf_install_in_context(ctx, event, cpu);
@@ -8577,6 +8717,10 @@
 
 	return event;
 
+err_unlock:
+	mutex_unlock(&ctx->mutex);
+	perf_unpin_context(ctx);
+	put_ctx(ctx);
 err_free:
 	free_event(event);
 err:
@@ -8695,7 +8839,7 @@
 	if (parent_event)
 		perf_group_detach(child_event);
 	list_del_event(child_event, child_ctx);
-	child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
+	child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
 	raw_spin_unlock_irq(&child_ctx->lock);
 
 	/*
@@ -9313,9 +9457,6 @@
 	ret = init_hw_breakpoint();
 	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 
-	/* do not patch jump label more than once per second */
-	jump_label_rate_limit(&perf_sched_events, HZ);
-
 	/*
 	 * Build time assertion that we keep the data_head at the intended
 	 * location.  IOW, validation we got the __reserved[] size right.
@@ -9335,6 +9476,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(perf_event_sysfs_show);
 
 static int __init perf_event_sysfs_init(void)
 {

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 0167679..5f6ce93 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c

@@ -1178,6 +1178,7 @@
 		goto free_area;
 
 	area->xol_mapping.name = "[uprobes]";
+	area->xol_mapping.fault = NULL;
 	area->xol_mapping.pages = area->pages;
 	area->pages[0] = alloc_page(GFP_HIGHUSER);
 	if (!area->pages[0])

diff --git a/kernel/futex.c b/kernel/futex.c
index 5d6ce64..a5d2e74 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c

@@ -124,16 +124,16 @@
  *   futex_wait(futex, val);
  *
  *   waiters++; (a)
- *   mb(); (A) <-- paired with -.
- *                              |
- *   lock(hash_bucket(futex));  |
- *                              |
- *   uval = *futex;             |
- *                              |        *futex = newval;
- *                              |        sys_futex(WAKE, futex);
- *                              |          futex_wake(futex);
- *                              |
- *                              `------->  mb(); (B)
+ *   smp_mb(); (A) <-- paired with -.
+ *                                  |
+ *   lock(hash_bucket(futex));      |
+ *                                  |
+ *   uval = *futex;                 |
+ *                                  |        *futex = newval;
+ *                                  |        sys_futex(WAKE, futex);
+ *                                  |          futex_wake(futex);
+ *                                  |
+ *                                  `--------> smp_mb(); (B)
  *   if (uval == val)
  *     queue();
  *     unlock(hash_bucket(futex));
@@ -334,7 +334,7 @@
 	/*
 	 * Ensure futex_get_mm() implies a full barrier such that
 	 * get_futex_key() implies a full barrier. This is relied upon
-	 * as full barrier (B), see the ordering comment above.
+	 * as smp_mb(); (B), see the ordering comment above.
 	 */
 	smp_mb__after_atomic();
 }
@@ -407,10 +407,10 @@
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		ihold(key->shared.inode); /* implies MB (B) */
+		ihold(key->shared.inode); /* implies smp_mb(); (B) */
 		break;
 	case FUT_OFF_MMSHARED:
-		futex_get_mm(key); /* implies MB (B) */
+		futex_get_mm(key); /* implies smp_mb(); (B) */
 		break;
 	default:
 		/*
@@ -418,7 +418,7 @@
 		 * mm, therefore the only purpose of calling get_futex_key_refs
 		 * is because we need the barrier for the lockless waiter check.
 		 */
-		smp_mb(); /* explicit MB (B) */
+		smp_mb(); /* explicit smp_mb(); (B) */
 	}
 }
 
@@ -497,7 +497,7 @@
 	if (!fshared) {
 		key->private.mm = mm;
 		key->private.address = address;
-		get_futex_key_refs(key);  /* implies MB (B) */
+		get_futex_key_refs(key);  /* implies smp_mb(); (B) */
 		return 0;
 	}
 
@@ -520,7 +520,20 @@
 	else
 		err = 0;
 
-	lock_page(page);
+	/*
+	 * The treatment of mapping from this point on is critical. The page
+	 * lock protects many things but in this context the page lock
+	 * stabilizes mapping, prevents inode freeing in the shared
+	 * file-backed region case and guards against movement to swap cache.
+	 *
+	 * Strictly speaking the page lock is not needed in all cases being
+	 * considered here and page lock forces unnecessarily serialization
+	 * From this point on, mapping will be re-verified if necessary and
+	 * page lock will be acquired only if it is unavoidable
+	 */
+	page = compound_head(page);
+	mapping = READ_ONCE(page->mapping);
+
 	/*
 	 * If page->mapping is NULL, then it cannot be a PageAnon
 	 * page; but it might be the ZERO_PAGE or in the gate area or
@@ -536,19 +549,31 @@
 	 * shmem_writepage move it from filecache to swapcache beneath us:
 	 * an unlikely race, but we do need to retry for page->mapping.
 	 */
-	mapping = compound_head(page)->mapping;
-	if (!mapping) {
-		int shmem_swizzled = PageSwapCache(page);
+	if (unlikely(!mapping)) {
+		int shmem_swizzled;
+
+		/*
+		 * Page lock is required to identify which special case above
+		 * applies. If this is really a shmem page then the page lock
+		 * will prevent unexpected transitions.
+		 */
+		lock_page(page);
+		shmem_swizzled = PageSwapCache(page) || page->mapping;
 		unlock_page(page);
 		put_page(page);
+
 		if (shmem_swizzled)
 			goto again;
+
 		return -EFAULT;
 	}
 
 	/*
 	 * Private mappings are handled in a simple way.
 	 *
+	 * If the futex key is stored on an anonymous page, then the associated
+	 * object is the mm which is implicitly pinned by the calling process.
+	 *
 	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
 	 * it's a read-only handle, it's expected that futexes attach to
 	 * the object not the particular process.
@@ -566,16 +591,74 @@
 		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
 		key->private.mm = mm;
 		key->private.address = address;
+
+		get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
 	} else {
+		struct inode *inode;
+
+		/*
+		 * The associated futex object in this case is the inode and
+		 * the page->mapping must be traversed. Ordinarily this should
+		 * be stabilised under page lock but it's not strictly
+		 * necessary in this case as we just want to pin the inode, not
+		 * update the radix tree or anything like that.
+		 *
+		 * The RCU read lock is taken as the inode is finally freed
+		 * under RCU. If the mapping still matches expectations then the
+		 * mapping->host can be safely accessed as being a valid inode.
+		 */
+		rcu_read_lock();
+
+		if (READ_ONCE(page->mapping) != mapping) {
+			rcu_read_unlock();
+			put_page(page);
+
+			goto again;
+		}
+
+		inode = READ_ONCE(mapping->host);
+		if (!inode) {
+			rcu_read_unlock();
+			put_page(page);
+
+			goto again;
+		}
+
+		/*
+		 * Take a reference unless it is about to be freed. Previously
+		 * this reference was taken by ihold under the page lock
+		 * pinning the inode in place so i_lock was unnecessary. The
+		 * only way for this check to fail is if the inode was
+		 * truncated in parallel so warn for now if this happens.
+		 *
+		 * We are not calling into get_futex_key_refs() in file-backed
+		 * cases, therefore a successful atomic_inc return below will
+		 * guarantee that get_futex_key() will still imply smp_mb(); (B).
+		 */
+		if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
+			rcu_read_unlock();
+			put_page(page);
+
+			goto again;
+		}
+
+		/* Should be impossible but lets be paranoid for now */
+		if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+			err = -EFAULT;
+			rcu_read_unlock();
+			iput(inode);
+
+			goto out;
+		}
+
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-		key->shared.inode = mapping->host;
+		key->shared.inode = inode;
 		key->shared.pgoff = basepage_index(page);
+		rcu_read_unlock();
 	}
 
-	get_futex_key_refs(key); /* implies MB (B) */
-
 out:
-	unlock_page(page);
 	put_page(page);
 	return err;
 }
@@ -1864,7 +1947,7 @@
 
 	q->lock_ptr = &hb->lock;
 
-	spin_lock(&hb->lock); /* implies MB (A) */
+	spin_lock(&hb->lock); /* implies smp_mb(); (A) */
 	return hb;
 }
 
@@ -1927,8 +2010,12 @@
 
 	/* In the common case we don't take the spinlock, which is nice. */
 retry:
-	lock_ptr = q->lock_ptr;
-	barrier();
+	/*
+	 * q->lock_ptr can change between this read and the following spin_lock.
+	 * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
+	 * optimizing lock_ptr out of the logic below.
+	 */
+	lock_ptr = READ_ONCE(q->lock_ptr);
 	if (lock_ptr != NULL) {
 		spin_lock(lock_ptr);
 		/*

diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 3b48dab..3bbfd6a 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig

@@ -64,6 +64,10 @@
 	bool
 	select IRQ_DOMAIN
 
+# Generic IRQ IPI support
+config GENERIC_IRQ_IPI
+	bool
+
 # Generic MSI interrupt support
 config GENERIC_MSI_IRQ
 	bool

diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 2fc9cbd..2ee42e9 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile

@@ -8,3 +8,4 @@
 obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
 obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
+obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 5797909..2f9f2b0 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c

@@ -961,6 +961,7 @@
 	data = data->parent_data;
 	data->chip->irq_mask(data);
 }
+EXPORT_SYMBOL_GPL(irq_chip_mask_parent);
 
 /**
  * irq_chip_unmask_parent - Unmask the parent interrupt
@@ -971,6 +972,7 @@
 	data = data->parent_data;
 	data->chip->irq_unmask(data);
 }
+EXPORT_SYMBOL_GPL(irq_chip_unmask_parent);
 
 /**
  * irq_chip_eoi_parent - Invoke EOI on the parent interrupt
@@ -981,6 +983,7 @@
 	data = data->parent_data;
 	data->chip->irq_eoi(data);
 }
+EXPORT_SYMBOL_GPL(irq_chip_eoi_parent);
 
 /**
  * irq_chip_set_affinity_parent - Set affinity on the parent interrupt
@@ -1016,6 +1019,7 @@
 
 	return -ENOSYS;
 }
+EXPORT_SYMBOL_GPL(irq_chip_set_type_parent);
 
 /**
  * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 57bff78..a15b548 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c

@@ -136,10 +136,9 @@
 {
 	irqreturn_t retval = IRQ_NONE;
 	unsigned int flags = 0, irq = desc->irq_data.irq;
-	struct irqaction *action = desc->action;
+	struct irqaction *action;
 
-	/* action might have become NULL since we dropped the lock */
-	while (action) {
+	for_each_action_of_desc(desc, action) {
 		irqreturn_t res;
 
 		trace_irq_handler_entry(irq, action);
@@ -173,7 +172,6 @@
 		}
 
 		retval |= res;
-		action = action->next;
 	}
 
 	add_interrupt_randomness(irq, flags);

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index fcab63c..09be2c9 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h

@@ -131,6 +131,9 @@
 #define IRQ_GET_DESC_CHECK_GLOBAL	(_IRQ_DESC_CHECK)
 #define IRQ_GET_DESC_CHECK_PERCPU	(_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU)
 
+#define for_each_action_of_desc(desc, act)			\
+	for (act = desc->act; act; act = act->next)
+
 struct irq_desc *
 __irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
 		    unsigned int check);
@@ -160,6 +163,8 @@
 	__irq_put_desc_unlock(desc, flags, false);
 }
 
+#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
+
 /*
  * Manipulation functions for irq_data.state
  */
@@ -188,6 +193,8 @@
 	return __irqd_to_state(d) & mask;
 }
 
+#undef __irqd_to_state
+
 static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
 {
 	__this_cpu_inc(*desc->kstat_irqs);

diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
new file mode 100644
index 0000000..c37f34b
--- /dev/null
+++ b/kernel/irq/ipi.c

@@ -0,0 +1,326 @@
+/*
+ * linux/kernel/irq/ipi.c
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd
+ * Author: Qais Yousef <qais.yousef@imgtec.com>
+ *
+ * This file contains driver APIs to the IPI subsystem.
+ */
+
+#define pr_fmt(fmt) "genirq/ipi: " fmt
+
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+
+/**
+ * irq_reserve_ipi() - Setup an IPI to destination cpumask
+ * @domain:	IPI domain
+ * @dest:	cpumask of cpus which can receive the IPI
+ *
+ * Allocate a virq that can be used to send IPI to any CPU in dest mask.
+ *
+ * On success it'll return linux irq number and 0 on failure
+ */
+unsigned int irq_reserve_ipi(struct irq_domain *domain,
+			     const struct cpumask *dest)
+{
+	unsigned int nr_irqs, offset;
+	struct irq_data *data;
+	int virq, i;
+
+	if (!domain ||!irq_domain_is_ipi(domain)) {
+		pr_warn("Reservation on a non IPI domain\n");
+		return 0;
+	}
+
+	if (!cpumask_subset(dest, cpu_possible_mask)) {
+		pr_warn("Reservation is not in possible_cpu_mask\n");
+		return 0;
+	}
+
+	nr_irqs = cpumask_weight(dest);
+	if (!nr_irqs) {
+		pr_warn("Reservation for empty destination mask\n");
+		return 0;
+	}
+
+	if (irq_domain_is_ipi_single(domain)) {
+		/*
+		 * If the underlying implementation uses a single HW irq on
+		 * all cpus then we only need a single Linux irq number for
+		 * it. We have no restrictions vs. the destination mask. The
+		 * underlying implementation can deal with holes nicely.
+		 */
+		nr_irqs = 1;
+		offset = 0;
+	} else {
+		unsigned int next;
+
+		/*
+		 * The IPI requires a seperate HW irq on each CPU. We require
+		 * that the destination mask is consecutive. If an
+		 * implementation needs to support holes, it can reserve
+		 * several IPI ranges.
+		 */
+		offset = cpumask_first(dest);
+		/*
+		 * Find a hole and if found look for another set bit after the
+		 * hole. For now we don't support this scenario.
+		 */
+		next = cpumask_next_zero(offset, dest);
+		if (next < nr_cpu_ids)
+			next = cpumask_next(next, dest);
+		if (next < nr_cpu_ids) {
+			pr_warn("Destination mask has holes\n");
+			return 0;
+		}
+	}
+
+	virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE);
+	if (virq <= 0) {
+		pr_warn("Can't reserve IPI, failed to alloc descs\n");
+		return 0;
+	}
+
+	virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE,
+				       (void *) dest, true);
+
+	if (virq <= 0) {
+		pr_warn("Can't reserve IPI, failed to alloc hw irqs\n");
+		goto free_descs;
+	}
+
+	for (i = 0; i < nr_irqs; i++) {
+		data = irq_get_irq_data(virq + i);
+		cpumask_copy(data->common->affinity, dest);
+		data->common->ipi_offset = offset;
+	}
+	return virq;
+
+free_descs:
+	irq_free_descs(virq, nr_irqs);
+	return 0;
+}
+
+/**
+ * irq_destroy_ipi() - unreserve an IPI that was previously allocated
+ * @irq:	linux irq number to be destroyed
+ *
+ * Return the IPIs allocated with irq_reserve_ipi() to the system destroying
+ * all virqs associated with them.
+ */
+void irq_destroy_ipi(unsigned int irq)
+{
+	struct irq_data *data = irq_get_irq_data(irq);
+	struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
+	struct irq_domain *domain;
+	unsigned int nr_irqs;
+
+	if (!irq || !data || !ipimask)
+		return;
+
+	domain = data->domain;
+	if (WARN_ON(domain == NULL))
+		return;
+
+	if (!irq_domain_is_ipi(domain)) {
+		pr_warn("Trying to destroy a non IPI domain!\n");
+		return;
+	}
+
+	if (irq_domain_is_ipi_per_cpu(domain))
+		nr_irqs = cpumask_weight(ipimask);
+	else
+		nr_irqs = 1;
+
+	irq_domain_free_irqs(irq, nr_irqs);
+}
+
+/**
+ * ipi_get_hwirq - Get the hwirq associated with an IPI to a cpu
+ * @irq:	linux irq number
+ * @cpu:	the target cpu
+ *
+ * When dealing with coprocessors IPI, we need to inform the coprocessor of
+ * the hwirq it needs to use to receive and send IPIs.
+ *
+ * Returns hwirq value on success and INVALID_HWIRQ on failure.
+ */
+irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
+{
+	struct irq_data *data = irq_get_irq_data(irq);
+	struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
+
+	if (!data || !ipimask || cpu > nr_cpu_ids)
+		return INVALID_HWIRQ;
+
+	if (!cpumask_test_cpu(cpu, ipimask))
+		return INVALID_HWIRQ;
+
+	/*
+	 * Get the real hardware irq number if the underlying implementation
+	 * uses a seperate irq per cpu. If the underlying implementation uses
+	 * a single hardware irq for all cpus then the IPI send mechanism
+	 * needs to take care of the cpu destinations.
+	 */
+	if (irq_domain_is_ipi_per_cpu(data->domain))
+		data = irq_get_irq_data(irq + cpu - data->common->ipi_offset);
+
+	return data ? irqd_to_hwirq(data) : INVALID_HWIRQ;
+}
+EXPORT_SYMBOL_GPL(ipi_get_hwirq);
+
+static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
+			   const struct cpumask *dest, unsigned int cpu)
+{
+	struct cpumask *ipimask = irq_data_get_affinity_mask(data);
+
+	if (!chip || !ipimask)
+		return -EINVAL;
+
+	if (!chip->ipi_send_single && !chip->ipi_send_mask)
+		return -EINVAL;
+
+	if (cpu > nr_cpu_ids)
+		return -EINVAL;
+
+	if (dest) {
+		if (!cpumask_subset(dest, ipimask))
+			return -EINVAL;
+	} else {
+		if (!cpumask_test_cpu(cpu, ipimask))
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * __ipi_send_single - send an IPI to a target Linux SMP CPU
+ * @desc:	pointer to irq_desc of the IRQ
+ * @cpu:	destination CPU, must in the destination mask passed to
+ *		irq_reserve_ipi()
+ *
+ * This function is for architecture or core code to speed up IPI sending. Not
+ * usable from driver code.
+ *
+ * Returns zero on success and negative error number on failure.
+ */
+int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+	struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+#ifdef DEBUG
+	/*
+	 * Minimise the overhead by omitting the checks for Linux SMP IPIs.
+	 * Since the callers should be arch or core code which is generally
+	 * trusted, only check for errors when debugging.
+	 */
+	if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu)))
+		return -EINVAL;
+#endif
+	if (!chip->ipi_send_single) {
+		chip->ipi_send_mask(data, cpumask_of(cpu));
+		return 0;
+	}
+
+	/* FIXME: Store this information in irqdata flags */
+	if (irq_domain_is_ipi_per_cpu(data->domain) &&
+	    cpu != data->common->ipi_offset) {
+		/* use the correct data for that cpu */
+		unsigned irq = data->irq + cpu - data->common->ipi_offset;
+
+		data = irq_get_irq_data(irq);
+	}
+	chip->ipi_send_single(data, cpu);
+	return 0;
+}
+
+/**
+ * ipi_send_mask - send an IPI to target Linux SMP CPU(s)
+ * @desc:	pointer to irq_desc of the IRQ
+ * @dest:	dest CPU(s), must be a subset of the mask passed to
+ *		irq_reserve_ipi()
+ *
+ * This function is for architecture or core code to speed up IPI sending. Not
+ * usable from driver code.
+ *
+ * Returns zero on success and negative error number on failure.
+ */
+int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+	struct irq_chip *chip = irq_data_get_irq_chip(data);
+	unsigned int cpu;
+
+#ifdef DEBUG
+	/*
+	 * Minimise the overhead by omitting the checks for Linux SMP IPIs.
+	 * Since the callers should be arch or core code which is generally
+	 * trusted, only check for errors when debugging.
+	 */
+	if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0)))
+		return -EINVAL;
+#endif
+	if (chip->ipi_send_mask) {
+		chip->ipi_send_mask(data, dest);
+		return 0;
+	}
+
+	if (irq_domain_is_ipi_per_cpu(data->domain)) {
+		unsigned int base = data->irq;
+
+		for_each_cpu(cpu, dest) {
+			unsigned irq = base + cpu - data->common->ipi_offset;
+
+			data = irq_get_irq_data(irq);
+			chip->ipi_send_single(data, cpu);
+		}
+	} else {
+		for_each_cpu(cpu, dest)
+			chip->ipi_send_single(data, cpu);
+	}
+	return 0;
+}
+
+/**
+ * ipi_send_single - Send an IPI to a single CPU
+ * @virq:	linux irq number from irq_reserve_ipi()
+ * @cpu:	destination CPU, must in the destination mask passed to
+ *		irq_reserve_ipi()
+ *
+ * Returns zero on success and negative error number on failure.
+ */
+int ipi_send_single(unsigned int virq, unsigned int cpu)
+{
+	struct irq_desc *desc = irq_to_desc(virq);
+	struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL;
+	struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL;
+
+	if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu)))
+		return -EINVAL;
+
+	return __ipi_send_single(desc, cpu);
+}
+EXPORT_SYMBOL_GPL(ipi_send_single);
+
+/**
+ * ipi_send_mask - Send an IPI to target CPU(s)
+ * @virq:	linux irq number from irq_reserve_ipi()
+ * @dest:	dest CPU(s), must be a subset of the mask passed to
+ *		irq_reserve_ipi()
+ *
+ * Returns zero on success and negative error number on failure.
+ */
+int ipi_send_mask(unsigned int virq, const struct cpumask *dest)
+{
+	struct irq_desc *desc = irq_to_desc(virq);
+	struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL;
+	struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL;
+
+	if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0)))
+		return -EINVAL;
+
+	return __ipi_send_mask(desc, dest);
+}
+EXPORT_SYMBOL_GPL(ipi_send_mask);

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 0409da0..0ccd028 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c

@@ -24,10 +24,27 @@
 static struct lock_class_key irq_desc_lock_class;
 
 #if defined(CONFIG_SMP)
+static int __init irq_affinity_setup(char *str)
+{
+	zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+	cpulist_parse(str, irq_default_affinity);
+	/*
+	 * Set at least the boot cpu. We don't want to end up with
+	 * bugreports caused by random comandline masks
+	 */
+	cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
+	return 1;
+}
+__setup("irqaffinity=", irq_affinity_setup);
+
 static void __init init_irq_default_affinity(void)
 {
-	alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
-	cpumask_setall(irq_default_affinity);
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!irq_default_affinity)
+		zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
+#endif
+	if (cpumask_empty(irq_default_affinity))
+		cpumask_setall(irq_default_affinity);
 }
 #else
 static void __init init_irq_default_affinity(void)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3e56d2f0..3a519a0 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c

@@ -23,8 +23,6 @@
 static DEFINE_MUTEX(revmap_trees_mutex);
 static struct irq_domain *irq_default_domain;
 
-static int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
-				  irq_hw_number_t hwirq, int node);
 static void irq_domain_check_hierarchy(struct irq_domain *domain);
 
 struct irqchip_fwid {
@@ -840,8 +838,8 @@
 };
 EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
 
-static int irq_domain_alloc_descs(int virq, unsigned int cnt,
-				  irq_hw_number_t hwirq, int node)
+int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
+			   int node)
 {
 	unsigned int hint;
 
@@ -895,6 +893,7 @@
 
 	return domain;
 }
+EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy);
 
 static void irq_domain_insert_irq(int virq)
 {
@@ -1045,6 +1044,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
 
 /**
  * irq_domain_set_info - Set the complete data for a @virq in @domain
@@ -1078,6 +1078,7 @@
 	irq_data->chip = &no_irq_chip;
 	irq_data->chip_data = NULL;
 }
+EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);
 
 /**
  * irq_domain_free_irqs_common - Clear irq_data and free the parent
@@ -1275,6 +1276,7 @@
 						       nr_irqs, arg);
 	return -ENOSYS;
 }
+EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
 
 /**
  * irq_domain_free_irqs_parent - Free interrupts from parent domain
@@ -1292,6 +1294,7 @@
 		irq_domain_free_irqs_recursive(domain->parent, irq_base,
 					       nr_irqs);
 }
+EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
 
 /**
  * irq_domain_activate_irq - Call domain_ops->activate recursively to activate

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 8411872..3ddd229 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c

@@ -144,13 +144,11 @@
  */
 void irq_set_thread_affinity(struct irq_desc *desc)
 {
-	struct irqaction *action = desc->action;
+	struct irqaction *action;
 
-	while (action) {
+	for_each_action_of_desc(desc, action)
 		if (action->thread)
 			set_bit(IRQTF_AFFINITY, &action->thread_flags);
-		action = action->next;
-	}
 }
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -994,7 +992,7 @@
 		return;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	for (action = desc->action; action; action = action->next) {
+	for_each_action_of_desc(desc, action) {
 		if (action->dev_id == dev_id) {
 			if (action->thread)
 				__irq_wake_thread(desc, action);

diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index a2c02fd..4e1b947 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c

@@ -291,7 +291,7 @@
 	int ret = 1;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	for (action = desc->action ; action; action = action->next) {
+	for_each_action_of_desc(desc, action) {
 		if ((action != new_action) && action->name &&
 				!strcmp(new_action->name, action->name)) {
 			ret = 0;

diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3214417..5707f97 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c

@@ -211,14 +211,12 @@
 	 * desc->lock here. See synchronize_irq().
 	 */
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	action = desc->action;
-	while (action) {
+	for_each_action_of_desc(desc, action) {
 		printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
 		if (action->thread_fn)
 			printk(KERN_CONT " threaded [<%p>] %pf",
 					action->thread_fn, action->thread_fn);
 		printk(KERN_CONT "\n");
-		action = action->next;
 	}
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }

diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 8dc6591..8d34308 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c

@@ -66,13 +66,15 @@
 	.name  = "Crash kernel",
 	.start = 0,
 	.end   = 0,
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+	.desc  = IORES_DESC_CRASH_KERNEL
 };
 struct resource crashk_low_res = {
 	.name  = "Crash kernel",
 	.start = 0,
 	.end   = 0,
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+	.desc  = IORES_DESC_CRASH_KERNEL
 };
 
 int kexec_should_crash(struct task_struct *p)
@@ -959,7 +961,7 @@
 
 	ram_res->start = end;
 	ram_res->end = crashk_res.end;
-	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 	ram_res->name = "System RAM";
 
 	crashk_res.end = end - 1;

diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 007b791..56b18eb 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c

@@ -524,10 +524,10 @@
 
 	/* Walk the RAM ranges and allocate a suitable range for the buffer */
 	if (image->type == KEXEC_TYPE_CRASH)
-		ret = walk_iomem_res("Crash kernel",
-				     IORESOURCE_MEM | IORESOURCE_BUSY,
-				     crashk_res.start, crashk_res.end, kbuf,
-				     locate_mem_hole_callback);
+		ret = walk_iomem_res_desc(crashk_res.desc,
+				IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+				crashk_res.start, crashk_res.end, kbuf,
+				locate_mem_hole_callback);
 	else
 		ret = walk_system_ram_res(0, -1, kbuf,
 					  locate_mem_hole_callback);

diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index a028127..b5c30d9 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c

@@ -47,12 +47,12 @@
  * of times)
  */
 
-#include <linux/latencytop.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
 #include <linux/proc_fs.h>
+#include <linux/latencytop.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/list.h>
@@ -289,4 +289,16 @@
 	proc_create("latency_stats", 0644, NULL, &lstats_fops);
 	return 0;
 }
+
+int sysctl_latencytop(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int err;
+
+	err = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (latencytop_enabled)
+		force_schedstat_enabled();
+
+	return err;
+}
 device_initcall(init_lstats_procfs);

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 716547f..f894a2c 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c

@@ -123,8 +123,6 @@
 	return ret;
 }
 
-static int lockdep_initialized;
-
 unsigned long nr_list_entries;
 static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 
@@ -434,19 +432,6 @@
 
 #ifdef CONFIG_DEBUG_LOCKDEP
 /*
- * We cannot printk in early bootup code. Not even early_printk()
- * might work. So we mark any initialization errors and printk
- * about it later on, in lockdep_info().
- */
-static int lockdep_init_error;
-static const char *lock_init_error;
-static unsigned long lockdep_init_trace_data[20];
-static struct stack_trace lockdep_init_trace = {
-	.max_entries = ARRAY_SIZE(lockdep_init_trace_data),
-	.entries = lockdep_init_trace_data,
-};
-
-/*
  * Various lockdep statistics:
  */
 DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
@@ -669,20 +654,6 @@
 	struct hlist_head *hash_head;
 	struct lock_class *class;
 
-#ifdef CONFIG_DEBUG_LOCKDEP
-	/*
-	 * If the architecture calls into lockdep before initializing
-	 * the hashes then we'll warn about it later. (we cannot printk
-	 * right now)
-	 */
-	if (unlikely(!lockdep_initialized)) {
-		lockdep_init();
-		lockdep_init_error = 1;
-		lock_init_error = lock->name;
-		save_stack_trace(&lockdep_init_trace);
-	}
-#endif
-
 	if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
 		debug_locks_off();
 		printk(KERN_ERR
@@ -2011,6 +1982,53 @@
 }
 
 /*
+ * Returns the index of the first held_lock of the current chain
+ */
+static inline int get_first_held_lock(struct task_struct *curr,
+					struct held_lock *hlock)
+{
+	int i;
+	struct held_lock *hlock_curr;
+
+	for (i = curr->lockdep_depth - 1; i >= 0; i--) {
+		hlock_curr = curr->held_locks + i;
+		if (hlock_curr->irq_context != hlock->irq_context)
+			break;
+
+	}
+
+	return ++i;
+}
+
+/*
+ * Checks whether the chain and the current held locks are consistent
+ * in depth and also in content. If they are not it most likely means
+ * that there was a collision during the calculation of the chain_key.
+ * Returns: 0 not passed, 1 passed
+ */
+static int check_no_collision(struct task_struct *curr,
+			struct held_lock *hlock,
+			struct lock_chain *chain)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+	int i, j, id;
+
+	i = get_first_held_lock(curr, hlock);
+
+	if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
+		return 0;
+
+	for (j = 0; j < chain->depth - 1; j++, i++) {
+		id = curr->held_locks[i].class_idx - 1;
+
+		if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
+			return 0;
+	}
+#endif
+	return 1;
+}
+
+/*
  * Look up a dependency chain. If the key is not present yet then
  * add it and return 1 - in this case the new dependency chain is
  * validated. If the key is already hashed, return 0.
@@ -2023,7 +2041,6 @@
 	struct lock_class *class = hlock_class(hlock);
 	struct hlist_head *hash_head = chainhashentry(chain_key);
 	struct lock_chain *chain;
-	struct held_lock *hlock_curr;
 	int i, j;
 
 	/*
@@ -2041,6 +2058,9 @@
 		if (chain->chain_key == chain_key) {
 cache_hit:
 			debug_atomic_inc(chain_lookup_hits);
+			if (!check_no_collision(curr, hlock, chain))
+				return 0;
+
 			if (very_verbose(class))
 				printk("\nhash chain already cached, key: "
 					"%016Lx tail class: [%p] %s\n",
@@ -2078,13 +2098,7 @@
 	chain = lock_chains + nr_lock_chains++;
 	chain->chain_key = chain_key;
 	chain->irq_context = hlock->irq_context;
-	/* Find the first held_lock of current chain */
-	for (i = curr->lockdep_depth - 1; i >= 0; i--) {
-		hlock_curr = curr->held_locks + i;
-		if (hlock_curr->irq_context != hlock->irq_context)
-			break;
-	}
-	i++;
+	i = get_first_held_lock(curr, hlock);
 	chain->depth = curr->lockdep_depth + 1 - i;
 	if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
 		chain->base = nr_chain_hlocks;
@@ -2172,7 +2186,7 @@
 {
 #ifdef CONFIG_DEBUG_LOCKDEP
 	struct held_lock *hlock, *prev_hlock = NULL;
-	unsigned int i, id;
+	unsigned int i;
 	u64 chain_key = 0;
 
 	for (i = 0; i < curr->lockdep_depth; i++) {
@@ -2189,17 +2203,16 @@
 				(unsigned long long)hlock->prev_chain_key);
 			return;
 		}
-		id = hlock->class_idx - 1;
 		/*
 		 * Whoops ran out of static storage again?
 		 */
-		if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+		if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
 			return;
 
 		if (prev_hlock && (prev_hlock->irq_context !=
 							hlock->irq_context))
 			chain_key = 0;
-		chain_key = iterate_chain_key(chain_key, id);
+		chain_key = iterate_chain_key(chain_key, hlock->class_idx);
 		prev_hlock = hlock;
 	}
 	if (chain_key != curr->curr_chain_key) {
@@ -3077,7 +3090,7 @@
 	struct task_struct *curr = current;
 	struct lock_class *class = NULL;
 	struct held_lock *hlock;
-	unsigned int depth, id;
+	unsigned int depth;
 	int chain_head = 0;
 	int class_idx;
 	u64 chain_key;
@@ -3180,11 +3193,10 @@
 	 * The 'key ID' is what is the most compact key value to drive
 	 * the hash, not class->key.
 	 */
-	id = class - lock_classes;
 	/*
 	 * Whoops, we did it again.. ran straight out of our static allocation.
 	 */
-	if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+	if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
 		return 0;
 
 	chain_key = curr->curr_chain_key;
@@ -3202,7 +3214,7 @@
 		chain_key = 0;
 		chain_head = 1;
 	}
-	chain_key = iterate_chain_key(chain_key, id);
+	chain_key = iterate_chain_key(chain_key, class_idx);
 
 	if (nest_lock && !__lock_is_held(nest_lock))
 		return print_lock_nested_lock_not_held(curr, hlock, ip);
@@ -4013,28 +4025,6 @@
 	raw_local_irq_restore(flags);
 }
 
-void lockdep_init(void)
-{
-	int i;
-
-	/*
-	 * Some architectures have their own start_kernel()
-	 * code which calls lockdep_init(), while we also
-	 * call lockdep_init() from the start_kernel() itself,
-	 * and we want to initialize the hashes only once:
-	 */
-	if (lockdep_initialized)
-		return;
-
-	for (i = 0; i < CLASSHASH_SIZE; i++)
-		INIT_HLIST_HEAD(classhash_table + i);
-
-	for (i = 0; i < CHAINHASH_SIZE; i++)
-		INIT_HLIST_HEAD(chainhash_table + i);
-
-	lockdep_initialized = 1;
-}
-
 void __init lockdep_info(void)
 {
 	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
@@ -4061,14 +4051,6 @@
 
 	printk(" per task-struct memory footprint: %lu bytes\n",
 		sizeof(struct held_lock) * MAX_LOCK_DEPTH);
-
-#ifdef CONFIG_DEBUG_LOCKDEP
-	if (lockdep_init_error) {
-		printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
-		printk("Call stack leading to lockdep invocation was:\n");
-		print_stack_trace(&lockdep_init_trace, 0);
-	}
-#endif
 }
 
 static void

diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 5b9102a..c835270 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h

@@ -67,7 +67,13 @@
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg_acquire(lock, node);
+	/*
+	 * We rely on the full barrier with global transitivity implied by the
+	 * below xchg() to order the initialization stores above against any
+	 * observation of @node. And to provide the ACQUIRE ordering associated
+	 * with a LOCK primitive.
+	 */
+	prev = xchg(lock, node);
 	if (likely(prev == NULL)) {
 		/*
 		 * Lock acquired, don't need to set node->locked to 1. Threads

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 0551c21..e364b42 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c

@@ -716,6 +716,7 @@
 __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
 {
 	unsigned long flags;
+	WAKE_Q(wake_q);
 
 	/*
 	 * As a performance measurement, release the lock before doing other
@@ -743,11 +744,11 @@
 					   struct mutex_waiter, list);
 
 		debug_mutex_wake_waiter(lock, waiter);
-
-		wake_up_process(waiter->task);
+		wake_q_add(&wake_q, waiter->task);
 	}
 
 	spin_unlock_mutex(&lock->wait_lock, flags);
+	wake_up_q(&wake_q);
 }
 
 /*

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 393d187..ce2f75e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c

@@ -358,8 +358,7 @@
 	 * sequentiality; this is because not all clear_pending_set_locked()
 	 * implementations imply full barriers.
 	 */
-	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
-		cpu_relax();
+	smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
 
 	/*
 	 * take ownership and clear the pending bit.
@@ -435,7 +434,7 @@
 	 *
 	 * The PV pv_wait_head_or_lock function, if active, will acquire
 	 * the lock and return a non-zero value. So we have to skip the
-	 * smp_load_acquire() call. As the next PV queue head hasn't been
+	 * smp_cond_acquire() call. As the next PV queue head hasn't been
 	 * designated yet, there is no way for the locked value to become
 	 * _Q_SLOW_VAL. So both the set_locked() and the
 	 * atomic_cmpxchg_relaxed() calls will be safe.
@@ -466,7 +465,7 @@
 			break;
 		}
 		/*
-		 * The smp_load_acquire() call above has provided the necessary
+		 * The smp_cond_acquire() call above has provided the necessary
 		 * acquire semantics required for locking. At most two
 		 * iterations of this loop may be ran.
 		 */

diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 87bb235..21ede57 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h

@@ -55,6 +55,11 @@
 };
 
 /*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
+/*
  * By replacing the regular queued_spin_trylock() with the function below,
  * it will be called once when a lock waiter enter the PV slowpath before
  * being queued. By allowing one lock stealing attempt here when the pending
@@ -65,9 +70,11 @@
 static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
+	int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+		   (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
 
-	return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
-		(cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
+	qstat_inc(qstat_pv_lock_stealing, ret);
+	return ret;
 }
 
 /*
@@ -138,11 +145,6 @@
 #endif /* _Q_PENDING_BITS == 8 */
 
 /*
- * Include queued spinlock statistics code
- */
-#include "qspinlock_stat.h"
-
-/*
  * Lock and MCS node addresses hash table for fast lookup
  *
  * Hashing is done on a per-cacheline basis to minimize the need to access
@@ -398,6 +400,11 @@
 	if (READ_ONCE(pn->state) == vcpu_hashed)
 		lp = (struct qspinlock **)1;
 
+	/*
+	 * Tracking # of slowpath locking operations
+	 */
+	qstat_inc(qstat_pv_lock_slowpath, true);
+
 	for (;; waitcnt++) {
 		/*
 		 * Set correct vCPU state to be used by queue node wait-early

diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 640dcec..eb2a2c9 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h

@@ -22,6 +22,7 @@
  *   pv_kick_wake	- # of vCPU kicks used for computing pv_latency_wake
  *   pv_latency_kick	- average latency (ns) of vCPU kick operation
  *   pv_latency_wake	- average latency (ns) from vCPU kick to wakeup
+ *   pv_lock_slowpath	- # of locking operations via the slowpath
  *   pv_lock_stealing	- # of lock stealing operations
  *   pv_spurious_wakeup	- # of spurious wakeups
  *   pv_wait_again	- # of vCPU wait's that happened after a vCPU kick
@@ -45,6 +46,7 @@
 	qstat_pv_kick_wake,
 	qstat_pv_latency_kick,
 	qstat_pv_latency_wake,
+	qstat_pv_lock_slowpath,
 	qstat_pv_lock_stealing,
 	qstat_pv_spurious_wakeup,
 	qstat_pv_wait_again,
@@ -70,6 +72,7 @@
 	[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
 	[qstat_pv_latency_kick]	   = "pv_latency_kick",
 	[qstat_pv_latency_wake]    = "pv_latency_wake",
+	[qstat_pv_lock_slowpath]   = "pv_lock_slowpath",
 	[qstat_pv_lock_stealing]   = "pv_lock_stealing",
 	[qstat_pv_wait_again]      = "pv_wait_again",
 	[qstat_pv_wait_early]      = "pv_wait_early",
@@ -279,19 +282,6 @@
 #define pv_kick(c)	__pv_kick(c)
 #define pv_wait(p, v)	__pv_wait(p, v)
 
-/*
- * PV unfair trylock count tracking function
- */
-static inline int qstat_spin_steal_lock(struct qspinlock *lock)
-{
-	int ret = pv_queued_spin_steal_lock(lock);
-
-	qstat_inc(qstat_pv_lock_stealing, ret);
-	return ret;
-}
-#undef  queued_spin_trylock
-#define queued_spin_trylock(l)	qstat_spin_steal_lock(l)
-
 #else /* CONFIG_QUEUED_LOCK_STAT */
 
 static inline void qstat_inc(enum qlock_stats stat, bool cond)	{ }

diff --git a/kernel/memremap.c b/kernel/memremap.c
index 7a1b5c3..fb9b887 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c

@@ -29,10 +29,10 @@
 
 static void *try_ram_remap(resource_size_t offset, size_t size)
 {
-	struct page *page = pfn_to_page(offset >> PAGE_SHIFT);
+	unsigned long pfn = PHYS_PFN(offset);
 
 	/* In the simple case just return the existing linear address */
-	if (!PageHighMem(page))
+	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
 		return __va(offset);
 	return NULL; /* fallback to ioremap_cache */
 }
@@ -47,7 +47,7 @@
  * being mapped does not have i/o side effects and the __iomem
  * annotation is not applicable.
  *
- * MEMREMAP_WB - matches the default mapping for "System RAM" on
+ * MEMREMAP_WB - matches the default mapping for System RAM on
  * the architecture.  This is usually a read-allocate write-back cache.
  * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
  * memremap() will bypass establishing a new mapping and instead return
@@ -56,11 +56,12 @@
  * MEMREMAP_WT - establish a mapping whereby writes either bypass the
  * cache or are written through to memory and never exist in a
  * cache-dirty state with respect to program visibility.  Attempts to
- * map "System RAM" with this mapping type will fail.
+ * map System RAM with this mapping type will fail.
  */
 void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 {
-	int is_ram = region_intersects(offset, size, "System RAM");
+	int is_ram = region_intersects(offset, size,
+				       IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
 	void *addr = NULL;
 
 	if (is_ram == REGION_MIXED) {
@@ -76,7 +77,7 @@
 		 * MEMREMAP_WB is special in that it can be satisifed
 		 * from the direct map.  Some archs depend on the
 		 * capability of memremap() to autodetect cases where
-		 * the requested range is potentially in "System RAM"
+		 * the requested range is potentially in System RAM.
 		 */
 		if (is_ram == REGION_INTERSECTS)
 			addr = try_ram_remap(offset, size);
@@ -88,7 +89,7 @@
 	 * If we don't have a mapping yet and more request flags are
 	 * pending then we will be attempting to establish a new virtual
 	 * address mapping.  Enforce that this mapping is not aliasing
-	 * "System RAM"
+	 * System RAM.
 	 */
 	if (!addr && is_ram == REGION_INTERSECTS && flags) {
 		WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
@@ -136,8 +137,10 @@
 	if (addr) {
 		*ptr = addr;
 		devres_add(dev, ptr);
-	} else
+	} else {
 		devres_free(ptr);
+		return ERR_PTR(-ENXIO);
+	}
 
 	return addr;
 }
@@ -268,13 +271,17 @@
 void *devm_memremap_pages(struct device *dev, struct resource *res,
 		struct percpu_ref *ref, struct vmem_altmap *altmap)
 {
-	int is_ram = region_intersects(res->start, resource_size(res),
-			"System RAM");
 	resource_size_t key, align_start, align_size, align_end;
 	struct dev_pagemap *pgmap;
 	struct page_map *page_map;
+	int error, nid, is_ram;
 	unsigned long pfn;
-	int error, nid;
+
+	align_start = res->start & ~(SECTION_SIZE - 1);
+	align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
+		- align_start;
+	is_ram = region_intersects(align_start, align_size,
+		IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
 
 	if (is_ram == REGION_MIXED) {
 		WARN_ONCE(1, "%s attempted on mixed region %pr\n",
@@ -312,8 +319,6 @@
 
 	mutex_lock(&pgmap_lock);
 	error = 0;
-	align_start = res->start & ~(SECTION_SIZE - 1);
-	align_size = ALIGN(resource_size(res), SECTION_SIZE);
 	align_end = align_start + align_size - 1;
 	for (key = align_start; key <= align_end; key += SECTION_SIZE) {
 		struct dev_pagemap *dup;
@@ -349,8 +354,13 @@
 	for_each_device_pfn(pfn, page_map) {
 		struct page *page = pfn_to_page(pfn);
 
-		/* ZONE_DEVICE pages must never appear on a slab lru */
-		list_force_poison(&page->lru);
+		/*
+		 * ZONE_DEVICE pages union ->lru with a ->pgmap back
+		 * pointer.  It is a bug if a ZONE_DEVICE page is ever
+		 * freed or placed on a driver-private list.  Seed the
+		 * storage with LIST_POISON* values.
+		 */
+		list_del(&page->lru);
 		page->pgmap = pgmap;
 	}
 	devres_add(dev, page_map);

diff --git a/kernel/profile.c b/kernel/profile.c
index 99513e1..5136969 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c

@@ -59,6 +59,7 @@
 
 	if (!strncmp(str, sleepstr, strlen(sleepstr))) {
 #ifdef CONFIG_SCHEDSTATS
+		force_schedstat_enabled();
 		prof_on = SLEEP_PROFILING;
 		if (str[strlen(sleepstr)] == ',')
 			str += strlen(sleepstr) + 1;

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d2988d0..65ae0e5 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c

@@ -932,12 +932,14 @@
 	int nsynctypes = 0;
 
 	VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
-	pr_alert("%s" TORTURE_FLAG
-		 " Grace periods expedited from boot/sysfs for %s,\n",
-		 torture_type, cur_ops->name);
-	pr_alert("%s" TORTURE_FLAG
-		 " Testing of dynamic grace-period expediting diabled.\n",
-		 torture_type);
+	if (!can_expedite) {
+		pr_alert("%s" TORTURE_FLAG
+			 " Grace periods expedited from boot/sysfs for %s,\n",
+			 torture_type, cur_ops->name);
+		pr_alert("%s" TORTURE_FLAG
+			 " Disabled dynamic grace-period expediting.\n",
+			 torture_type);
+	}
 
 	/* Initialize synctype[] array.  If none set, take default. */
 	if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)

diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index e492a52..196f030 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h

@@ -23,7 +23,7 @@
  */
 
 #include <linux/kthread.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
@@ -122,18 +122,7 @@
 	debugfs_remove_recursive(rcudir);
 	return 1;
 }
-
-static void __exit rcutiny_trace_cleanup(void)
-{
-	debugfs_remove_recursive(rcudir);
-}
-
-module_init(rcutiny_trace_init);
-module_exit(rcutiny_trace_cleanup);
-
-MODULE_AUTHOR("Paul E. McKenney");
-MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
-MODULE_LICENSE("GPL");
+device_initcall(rcutiny_trace_init);
 
 static void check_cpu_stall(struct rcu_ctrlblk *rcp)
 {

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e41dd413..9a535a8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c

@@ -108,7 +108,6 @@
 RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
 
 static struct rcu_state *const rcu_state_p;
-static struct rcu_data __percpu *const rcu_data_p;
 LIST_HEAD(rcu_struct_flavors);
 
 /* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -1083,13 +1082,12 @@
 	rcu_sysidle_check_cpu(rdp, isidle, maxj);
 	if ((rdp->dynticks_snap & 0x1) == 0) {
 		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
-		return 1;
-	} else {
 		if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
 				 rdp->mynode->gpnum))
 			WRITE_ONCE(rdp->gpwrap, true);
-		return 0;
+		return 1;
 	}
+	return 0;
 }
 
 /*
@@ -1173,15 +1171,16 @@
 			smp_mb(); /* ->cond_resched_completed before *rcrmp. */
 			WRITE_ONCE(*rcrmp,
 				   READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
-			resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
-			rdp->rsp->jiffies_resched += 5; /* Enable beating. */
-		} else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
-			/* Time to beat on that CPU again! */
-			resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
-			rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 		}
+		rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
 	}
 
+	/* And if it has been a really long time, kick the CPU as well. */
+	if (ULONG_CMP_GE(jiffies,
+			 rdp->rsp->gp_start + 2 * jiffies_till_sched_qs) ||
+	    ULONG_CMP_GE(jiffies, rdp->rsp->gp_start + jiffies_till_sched_qs))
+		resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
+
 	return 0;
 }
 
@@ -1246,7 +1245,7 @@
 				if (rnp->qsmask & (1UL << cpu))
 					dump_cpu_task(rnp->grplo + cpu);
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
 
@@ -1266,12 +1265,12 @@
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	delta = jiffies - READ_ONCE(rsp->jiffies_stall);
 	if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	WRITE_ONCE(rsp->jiffies_stall,
 		   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 	/*
 	 * OK, time to rat on our buddy...
@@ -1292,7 +1291,7 @@
 					ndetected++;
 				}
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 
 	print_cpu_stall_info_end();
@@ -1357,7 +1356,7 @@
 	if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
 		WRITE_ONCE(rsp->jiffies_stall,
 			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 	/*
 	 * Attempt to revive the RCU machinery by forcing a context switch.
@@ -1595,7 +1594,7 @@
 	}
 unlock_out:
 	if (rnp != rnp_root)
-		raw_spin_unlock(&rnp_root->lock);
+		raw_spin_unlock_rcu_node(rnp_root);
 out:
 	if (c_out != NULL)
 		*c_out = c;
@@ -1614,7 +1613,6 @@
 	int needmore;
 	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
 
-	rcu_nocb_gp_cleanup(rsp, rnp);
 	rnp->need_future_gp[c & 0x1] = 0;
 	needmore = rnp->need_future_gp[(c + 1) & 0x1];
 	trace_rcu_future_gp(rnp, rdp, c,
@@ -1635,7 +1633,7 @@
 	    !READ_ONCE(rsp->gp_flags) ||
 	    !rsp->gp_kthread)
 		return;
-	wake_up(&rsp->gp_wq);
+	swake_up(&rsp->gp_wq);
 }
 
 /*
@@ -1815,7 +1813,7 @@
 		return;
 	}
 	needwake = __note_gp_changes(rsp, rnp, rdp);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	if (needwake)
 		rcu_gp_kthread_wake(rsp);
 }
@@ -1840,7 +1838,7 @@
 	raw_spin_lock_irq_rcu_node(rnp);
 	if (!READ_ONCE(rsp->gp_flags)) {
 		/* Spurious wakeup, tell caller to go back to sleep.  */
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 		return false;
 	}
 	WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
@@ -1850,7 +1848,7 @@
 		 * Grace period already in progress, don't start another.
 		 * Not supposed to be able to happen.
 		 */
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 		return false;
 	}
 
@@ -1859,7 +1857,7 @@
 	/* Record GP times before starting GP, hence smp_store_release(). */
 	smp_store_release(&rsp->gpnum, rsp->gpnum + 1);
 	trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
-	raw_spin_unlock_irq(&rnp->lock);
+	raw_spin_unlock_irq_rcu_node(rnp);
 
 	/*
 	 * Apply per-leaf buffered online and offline operations to the
@@ -1873,7 +1871,7 @@
 		if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
 		    !rnp->wait_blkd_tasks) {
 			/* Nothing to do on this leaf rcu_node structure. */
-			raw_spin_unlock_irq(&rnp->lock);
+			raw_spin_unlock_irq_rcu_node(rnp);
 			continue;
 		}
 
@@ -1907,7 +1905,7 @@
 			rcu_cleanup_dead_rnp(rnp);
 		}
 
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 	}
 
 	/*
@@ -1938,7 +1936,7 @@
 		trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
 					    rnp->level, rnp->grplo,
 					    rnp->grphi, rnp->qsmask);
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 		cond_resched_rcu_qs();
 		WRITE_ONCE(rsp->gp_activity, jiffies);
 	}
@@ -1996,7 +1994,7 @@
 		raw_spin_lock_irq_rcu_node(rnp);
 		WRITE_ONCE(rsp->gp_flags,
 			   READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
-		raw_spin_unlock_irq(&rnp->lock);
+		raw_spin_unlock_irq_rcu_node(rnp);
 	}
 }
 
@@ -2010,6 +2008,7 @@
 	int nocb = 0;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp = rcu_get_root(rsp);
+	struct swait_queue_head *sq;
 
 	WRITE_ONCE(rsp->gp_activity, jiffies);
 	raw_spin_lock_irq_rcu_node(rnp);
@@ -2025,7 +2024,7 @@
 	 * safe for us to drop the lock in order to mark the grace
 	 * period as completed in all of the rcu_node structures.
 	 */
-	raw_spin_unlock_irq(&rnp->lock);
+	raw_spin_unlock_irq_rcu_node(rnp);
 
 	/*
 	 * Propagate new ->completed value to rcu_node structures so
@@ -2046,7 +2045,9 @@
 			needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
 		/* smp_mb() provided by prior unlock-lock pair. */
 		nocb += rcu_future_gp_cleanup(rsp, rnp);
-		raw_spin_unlock_irq(&rnp->lock);
+		sq = rcu_nocb_gp_get(rnp);
+		raw_spin_unlock_irq_rcu_node(rnp);
+		rcu_nocb_gp_cleanup(sq);
 		cond_resched_rcu_qs();
 		WRITE_ONCE(rsp->gp_activity, jiffies);
 		rcu_gp_slow(rsp, gp_cleanup_delay);
@@ -2068,7 +2069,7 @@
 				       READ_ONCE(rsp->gpnum),
 				       TPS("newreq"));
 	}
-	raw_spin_unlock_irq(&rnp->lock);
+	raw_spin_unlock_irq_rcu_node(rnp);
 }
 
 /*
@@ -2092,7 +2093,7 @@
 					       READ_ONCE(rsp->gpnum),
 					       TPS("reqwait"));
 			rsp->gp_state = RCU_GP_WAIT_GPS;
-			wait_event_interruptible(rsp->gp_wq,
+			swait_event_interruptible(rsp->gp_wq,
 						 READ_ONCE(rsp->gp_flags) &
 						 RCU_GP_FLAG_INIT);
 			rsp->gp_state = RCU_GP_DONE_GPS;
@@ -2122,7 +2123,7 @@
 					       READ_ONCE(rsp->gpnum),
 					       TPS("fqswait"));
 			rsp->gp_state = RCU_GP_WAIT_FQS;
-			ret = wait_event_interruptible_timeout(rsp->gp_wq,
+			ret = swait_event_interruptible_timeout(rsp->gp_wq,
 					rcu_gp_fqs_check_wake(rsp, &gf), j);
 			rsp->gp_state = RCU_GP_DOING_FQS;
 			/* Locking provides needed memory barriers. */
@@ -2234,19 +2235,21 @@
 }
 
 /*
- * Report a full set of quiescent states to the specified rcu_state
- * data structure.  This involves cleaning up after the prior grace
- * period and letting rcu_start_gp() start up the next grace period
- * if one is needed.  Note that the caller must hold rnp->lock, which
- * is released before return.
+ * Report a full set of quiescent states to the specified rcu_state data
+ * structure.  Invoke rcu_gp_kthread_wake() to awaken the grace-period
+ * kthread if another grace period is required.  Whether we wake
+ * the grace-period kthread or it awakens itself for the next round
+ * of quiescent-state forcing, that kthread will clean up after the
+ * just-completed grace period.  Note that the caller must hold rnp->lock,
+ * which is released before return.
  */
 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 	__releases(rcu_get_root(rsp)->lock)
 {
 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
-	raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
-	rcu_gp_kthread_wake(rsp);
+	raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
+	swake_up(&rsp->gp_wq);  /* Memory barrier implied by swake_up() path. */
 }
 
 /*
@@ -2275,7 +2278,7 @@
 			 * Our bit has already been cleared, or the
 			 * relevant grace period is already over, so done.
 			 */
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			return;
 		}
 		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
@@ -2287,7 +2290,7 @@
 		if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
 
 			/* Other bits still set at this level, so done. */
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			return;
 		}
 		mask = rnp->grpmask;
@@ -2297,7 +2300,7 @@
 
 			break;
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		rnp_c = rnp;
 		rnp = rnp->parent;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -2329,7 +2332,7 @@
 
 	if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
 	    rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;  /* Still need more quiescent states! */
 	}
 
@@ -2346,19 +2349,14 @@
 	/* Report up the rest of the hierarchy, tracking current ->gpnum. */
 	gps = rnp->gpnum;
 	mask = rnp->grpmask;
-	raw_spin_unlock(&rnp->lock);	/* irqs remain disabled. */
+	raw_spin_unlock_rcu_node(rnp);	/* irqs remain disabled. */
 	raw_spin_lock_rcu_node(rnp_p);	/* irqs already disabled. */
 	rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
 }
 
 /*
  * Record a quiescent state for the specified CPU to that CPU's rcu_data
- * structure.  This must be either called from the specified CPU, or
- * called when the specified CPU is known to be offline (and when it is
- * also known that no other CPU is concurrently trying to help the offline
- * CPU).  The lastcomp argument is used to make sure we are still in the
- * grace period of interest.  We don't want to end the current grace period
- * based on quiescent states detected in an earlier grace period!
+ * structure.  This must be called from the specified CPU.
  */
 static void
 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
@@ -2383,14 +2381,14 @@
 		 */
 		rdp->cpu_no_qs.b.norm = true;	/* need qs for new gp. */
 		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	mask = rdp->grpmask;
 	if ((rnp->qsmask & mask) == 0) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	} else {
-		rdp->core_needs_qs = 0;
+		rdp->core_needs_qs = false;
 
 		/*
 		 * This GP can't end until cpu checks in, so all of our
@@ -2599,36 +2597,15 @@
 		rnp->qsmaskinit &= ~mask;
 		rnp->qsmask &= ~mask;
 		if (rnp->qsmaskinit) {
-			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+			raw_spin_unlock_rcu_node(rnp);
+			/* irqs remain disabled. */
 			return;
 		}
-		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
 	}
 }
 
 /*
- * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
- * function.  We now remove it from the rcu_node tree's ->qsmaskinit
- * bit masks.
- */
-static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
-{
-	unsigned long flags;
-	unsigned long mask;
-	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
-
-	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
-		return;
-
-	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
-	mask = rdp->grpmask;
-	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
-	rnp->qsmaskinitnext &= ~mask;
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
-}
-
-/*
  * The CPU has been completely removed, and some other CPU is reporting
  * this fact from process context.  Do the remainder of the cleanup,
  * including orphaning the outgoing CPU's RCU callbacks, and also
@@ -2859,7 +2836,7 @@
 			rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags);
 		} else {
 			/* Nothing to do here, so just drop the lock. */
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 	}
 }
@@ -2895,12 +2872,12 @@
 	raw_spin_unlock(&rnp_old->fqslock);
 	if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
 		rsp->n_force_qs_lh++;
-		raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
 		return;  /* Someone beat us to it. */
 	}
 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
-	raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
-	rcu_gp_kthread_wake(rsp);
+	raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
+	swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
 }
 
 /*
@@ -2925,7 +2902,7 @@
 	if (cpu_needs_another_gp(rsp, rdp)) {
 		raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
 		needwake = rcu_start_gp(rsp);
-		raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
 		if (needwake)
 			rcu_gp_kthread_wake(rsp);
 	} else {
@@ -3016,7 +2993,7 @@
 
 			raw_spin_lock_rcu_node(rnp_root);
 			needwake = rcu_start_gp(rsp);
-			raw_spin_unlock(&rnp_root->lock);
+			raw_spin_unlock_rcu_node(rnp_root);
 			if (needwake)
 				rcu_gp_kthread_wake(rsp);
 		} else {
@@ -3436,14 +3413,14 @@
 	rcu_for_each_leaf_node(rsp, rnp) {
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->expmaskinit == rnp->expmaskinitnext) {
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			continue;  /* No new CPUs, nothing to do. */
 		}
 
 		/* Update this node's mask, track old value for propagation. */
 		oldmask = rnp->expmaskinit;
 		rnp->expmaskinit = rnp->expmaskinitnext;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 		/* If was already nonzero, nothing to propagate. */
 		if (oldmask)
@@ -3458,7 +3435,7 @@
 			if (rnp_up->expmaskinit)
 				done = true;
 			rnp_up->expmaskinit |= mask;
-			raw_spin_unlock_irqrestore(&rnp_up->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
 			if (done)
 				break;
 			mask = rnp_up->grpmask;
@@ -3481,7 +3458,7 @@
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		WARN_ON_ONCE(rnp->expmask);
 		rnp->expmask = rnp->expmaskinit;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
 
@@ -3522,19 +3499,19 @@
 			if (!rnp->expmask)
 				rcu_initiate_boost(rnp, flags);
 			else
-				raw_spin_unlock_irqrestore(&rnp->lock, flags);
+				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			break;
 		}
 		if (rnp->parent == NULL) {
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			if (wake) {
 				smp_mb(); /* EGP done before wake_up(). */
-				wake_up(&rsp->expedited_wq);
+				swake_up(&rsp->expedited_wq);
 			}
 			break;
 		}
 		mask = rnp->grpmask;
-		raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
+		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
 		rnp = rnp->parent;
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
 		WARN_ON_ONCE(!(rnp->expmask & mask));
@@ -3569,7 +3546,7 @@
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	if (!(rnp->expmask & mask)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	rnp->expmask &= ~mask;
@@ -3730,7 +3707,7 @@
 		 */
 		if (rcu_preempt_has_tasks(rnp))
 			rnp->exp_tasks = rnp->blkd_tasks.next;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 		/* IPI the remaining CPUs for expedited quiescent state. */
 		mask = 1;
@@ -3747,7 +3724,7 @@
 			raw_spin_lock_irqsave_rcu_node(rnp, flags);
 			if (cpu_online(cpu) &&
 			    (rnp->expmask & mask)) {
-				raw_spin_unlock_irqrestore(&rnp->lock, flags);
+				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 				schedule_timeout_uninterruptible(1);
 				if (cpu_online(cpu) &&
 				    (rnp->expmask & mask))
@@ -3756,7 +3733,7 @@
 			}
 			if (!(rnp->expmask & mask))
 				mask_ofl_ipi &= ~mask;
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 		/* Report quiescent states for those that went offline. */
 		mask_ofl_test |= mask_ofl_ipi;
@@ -3780,7 +3757,7 @@
 	jiffies_start = jiffies;
 
 	for (;;) {
-		ret = wait_event_interruptible_timeout(
+		ret = swait_event_timeout(
 				rsp->expedited_wq,
 				sync_rcu_preempt_exp_done(rnp_root),
 				jiffies_stall);
@@ -3788,7 +3765,7 @@
 			return;
 		if (ret < 0) {
 			/* Hit a signal, disable CPU stall warnings. */
-			wait_event(rsp->expedited_wq,
+			swait_event(rsp->expedited_wq,
 				   sync_rcu_preempt_exp_done(rnp_root));
 			return;
 		}
@@ -4163,7 +4140,7 @@
 			return;
 		raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
 		rnp->qsmaskinit |= mask;
-		raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
+		raw_spin_unlock_rcu_node(rnp); /* Interrupts remain disabled. */
 	}
 }
 
@@ -4187,7 +4164,7 @@
 	rdp->rsp = rsp;
 	mutex_init(&rdp->exp_funnel_mutex);
 	rcu_boot_init_nocb_percpu_data(rdp);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 /*
@@ -4215,7 +4192,7 @@
 	rcu_sysidle_init_percpu_data(rdp->dynticks);
 	atomic_set(&rdp->dynticks->dynticks,
 		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
-	raw_spin_unlock(&rnp->lock);		/* irqs remain disabled. */
+	raw_spin_unlock_rcu_node(rnp);		/* irqs remain disabled. */
 
 	/*
 	 * Add CPU to leaf rcu_node pending-online bitmask.  Any needed
@@ -4236,7 +4213,7 @@
 	rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
 	rdp->core_needs_qs = false;
 	trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 static void rcu_prepare_cpu(int cpu)
@@ -4247,6 +4224,46 @@
 		rcu_init_percpu_data(cpu, rsp);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
+ * function.  We now remove it from the rcu_node tree's ->qsmaskinit
+ * bit masks.
+ * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
+ * function.  We now remove it from the rcu_node tree's ->qsmaskinit
+ * bit masks.
+ */
+static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
+{
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */
+
+	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
+		return;
+
+	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
+	mask = rdp->grpmask;
+	raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
+	rnp->qsmaskinitnext &= ~mask;
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+}
+
+void rcu_report_dead(unsigned int cpu)
+{
+	struct rcu_state *rsp;
+
+	/* QS for any half-done expedited RCU-sched GP. */
+	preempt_disable();
+	rcu_report_exp_rdp(&rcu_sched_state,
+			   this_cpu_ptr(rcu_sched_state.rda), true);
+	preempt_enable();
+	for_each_rcu_flavor(rsp)
+		rcu_cleanup_dying_idle_cpu(cpu, rsp);
+}
+#endif
+
 /*
  * Handle CPU online/offline notification events.
  */
@@ -4278,17 +4295,6 @@
 		for_each_rcu_flavor(rsp)
 			rcu_cleanup_dying_cpu(rsp);
 		break;
-	case CPU_DYING_IDLE:
-		/* QS for any half-done expedited RCU-sched GP. */
-		preempt_disable();
-		rcu_report_exp_rdp(&rcu_sched_state,
-				   this_cpu_ptr(rcu_sched_state.rda), true);
-		preempt_enable();
-
-		for_each_rcu_flavor(rsp) {
-			rcu_cleanup_dying_idle_cpu(cpu, rsp);
-		}
-		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 	case CPU_UP_CANCELED:
@@ -4358,7 +4364,7 @@
 			sp.sched_priority = kthread_prio;
 			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 		}
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		wake_up_process(t);
 	}
 	rcu_spawn_nocb_kthreads();
@@ -4449,8 +4455,8 @@
 		cpustride *= levelspread[i];
 		rnp = rsp->level[i];
 		for (j = 0; j < levelcnt[i]; j++, rnp++) {
-			raw_spin_lock_init(&rnp->lock);
-			lockdep_set_class_and_name(&rnp->lock,
+			raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
+			lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
 						   &rcu_node_class[i], buf[i]);
 			raw_spin_lock_init(&rnp->fqslock);
 			lockdep_set_class_and_name(&rnp->fqslock,
@@ -4482,8 +4488,8 @@
 		}
 	}
 
-	init_waitqueue_head(&rsp->gp_wq);
-	init_waitqueue_head(&rsp->expedited_wq);
+	init_swait_queue_head(&rsp->gp_wq);
+	init_swait_queue_head(&rsp->expedited_wq);
 	rnp = rsp->level[rcu_num_lvls - 1];
 	for_each_possible_cpu(i) {
 		while (i > rnp->grphi)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 83360b4..df668c0 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h

@@ -27,6 +27,7 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
+#include <linux/swait.h>
 #include <linux/stop_machine.h>
 
 /*
@@ -149,8 +150,9 @@
  * Definition for node within the RCU grace-period-detection hierarchy.
  */
 struct rcu_node {
-	raw_spinlock_t lock;	/* Root rcu_node's lock protects some */
-				/*  rcu_state fields as well as following. */
+	raw_spinlock_t __private lock;	/* Root rcu_node's lock protects */
+					/*  some rcu_state fields as well as */
+					/*  following. */
 	unsigned long gpnum;	/* Current grace period for this node. */
 				/*  This will either be equal to or one */
 				/*  behind the root rcu_node's gpnum. */
@@ -243,7 +245,7 @@
 				/* Refused to boost: not sure why, though. */
 				/*  This can happen due to race conditions. */
 #ifdef CONFIG_RCU_NOCB_CPU
-	wait_queue_head_t nocb_gp_wq[2];
+	struct swait_queue_head nocb_gp_wq[2];
 				/* Place for rcu_nocb_kthread() to wait GP. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 	int need_future_gp[2];
@@ -399,7 +401,7 @@
 	atomic_long_t nocb_q_count_lazy; /*  invocation (all stages). */
 	struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
 	struct rcu_head **nocb_follower_tail;
-	wait_queue_head_t nocb_wq;	/* For nocb kthreads to sleep on. */
+	struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
 	struct task_struct *nocb_kthread;
 	int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */
 
@@ -478,7 +480,7 @@
 	unsigned long gpnum;			/* Current gp number. */
 	unsigned long completed;		/* # of last completed gp. */
 	struct task_struct *gp_kthread;		/* Task for grace periods. */
-	wait_queue_head_t gp_wq;		/* Where GP task waits. */
+	struct swait_queue_head gp_wq;		/* Where GP task waits. */
 	short gp_flags;				/* Commands for GP task. */
 	short gp_state;				/* GP kthread sleep state. */
 
@@ -506,7 +508,7 @@
 	unsigned long expedited_sequence;	/* Take a ticket. */
 	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
 	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
-	wait_queue_head_t expedited_wq;		/* Wait for check-ins. */
+	struct swait_queue_head expedited_wq;	/* Wait for check-ins. */
 	int ncpus_snap;				/* # CPUs seen last time. */
 
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
@@ -621,7 +623,8 @@
 static void increment_cpu_stall_ticks(void);
 static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 			    bool lazy, unsigned long flags);
@@ -680,7 +683,7 @@
 #endif /* #else #ifdef CONFIG_PPC */
 
 /*
- * Wrappers for the rcu_node::lock acquire.
+ * Wrappers for the rcu_node::lock acquire and release.
  *
  * Because the rcu_nodes form a tree, the tree traversal locking will observe
  * different lock values, this in turn means that an UNLOCK of one level
@@ -689,29 +692,48 @@
  *
  * In order to restore full ordering between tree levels, augment the regular
  * lock acquire functions with smp_mb__after_unlock_lock().
+ *
+ * As ->lock of struct rcu_node is a __private field, therefore one should use
+ * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
  */
 static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
 {
-	raw_spin_lock(&rnp->lock);
+	raw_spin_lock(&ACCESS_PRIVATE(rnp, lock));
 	smp_mb__after_unlock_lock();
 }
 
+static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp)
+{
+	raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock));
+}
+
 static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
 {
-	raw_spin_lock_irq(&rnp->lock);
+	raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock));
 	smp_mb__after_unlock_lock();
 }
 
-#define raw_spin_lock_irqsave_rcu_node(rnp, flags)	\
-do {							\
-	typecheck(unsigned long, flags);		\
-	raw_spin_lock_irqsave(&(rnp)->lock, flags);	\
-	smp_mb__after_unlock_lock();			\
+static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp)
+{
+	raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock));
+}
+
+#define raw_spin_lock_irqsave_rcu_node(rnp, flags)			\
+do {									\
+	typecheck(unsigned long, flags);				\
+	raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags);	\
+	smp_mb__after_unlock_lock();					\
+} while (0)
+
+#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags)			\
+do {									\
+	typecheck(unsigned long, flags);				\
+	raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags);	\
 } while (0)
 
 static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
 {
-	bool locked = raw_spin_trylock(&rnp->lock);
+	bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock));
 
 	if (locked)
 		smp_mb__after_unlock_lock();

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 9467a8b..efdf7b6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h

@@ -235,7 +235,7 @@
 		rnp->gp_tasks = &t->rcu_node_entry;
 	if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
 		rnp->exp_tasks = &t->rcu_node_entry;
-	raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */
+	raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
 
 	/*
 	 * Report the quiescent state for the expedited GP.  This expedited
@@ -489,7 +489,7 @@
 							 !!rnp->gp_tasks);
 			rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
 		} else {
-			raw_spin_unlock_irqrestore(&rnp->lock, flags);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		}
 
 		/* Unboost if we were boosted. */
@@ -518,14 +518,14 @@
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	if (!rcu_preempt_blocked_readers_cgp(rnp)) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	t = list_entry(rnp->gp_tasks->prev,
 		       struct task_struct, rcu_node_entry);
 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
 		sched_show_task(t);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 /*
@@ -807,7 +807,6 @@
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
 static struct rcu_state *const rcu_state_p = &rcu_sched_state;
-static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data;
 
 /*
  * Tell them what RCU they are running.
@@ -991,7 +990,7 @@
 	 * might exit their RCU read-side critical sections on their own.
 	 */
 	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return 0;
 	}
 
@@ -1028,7 +1027,7 @@
 	 */
 	t = container_of(tb, struct task_struct, rcu_node_entry);
 	rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	/* Lock only for side effect: boosts task t's priority. */
 	rt_mutex_lock(&rnp->boost_mtx);
 	rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
@@ -1088,7 +1087,7 @@
 
 	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
 		rnp->n_balk_exp_gp_tasks++;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
 	if (rnp->exp_tasks != NULL ||
@@ -1098,13 +1097,13 @@
 	     ULONG_CMP_GE(jiffies, rnp->boost_time))) {
 		if (rnp->exp_tasks == NULL)
 			rnp->boost_tasks = rnp->gp_tasks;
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		t = rnp->boost_kthread_task;
 		if (t)
 			rcu_wake_cond(t, rnp->boost_kthread_status);
 	} else {
 		rcu_initiate_boost_trace(rnp);
-		raw_spin_unlock_irqrestore(&rnp->lock, flags);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	}
 }
 
@@ -1172,7 +1171,7 @@
 		return PTR_ERR(t);
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	rnp->boost_kthread_task = t;
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	sp.sched_priority = kthread_prio;
 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
@@ -1308,7 +1307,7 @@
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	__releases(rnp->lock)
 {
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
 static void invoke_rcu_callbacks_kthread(void)
@@ -1559,7 +1558,7 @@
 		rnp = rdp->mynode;
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
 		needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
-		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
+		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
 		if (needwake)
 			rcu_gp_kthread_wake(rsp);
 	}
@@ -1811,9 +1810,9 @@
  * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
  * grace period.
  */
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
 {
-	wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+	swake_up_all(sq);
 }
 
 /*
@@ -1829,10 +1828,15 @@
 	rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
 }
 
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
+{
+	return &rnp->nocb_gp_wq[rnp->completed & 0x1];
+}
+
 static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
-	init_waitqueue_head(&rnp->nocb_gp_wq[0]);
-	init_waitqueue_head(&rnp->nocb_gp_wq[1]);
+	init_swait_queue_head(&rnp->nocb_gp_wq[0]);
+	init_swait_queue_head(&rnp->nocb_gp_wq[1]);
 }
 
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
@@ -1857,7 +1861,7 @@
 	if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
 		/* Prior smp_mb__after_atomic() orders against prior enqueue. */
 		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
-		wake_up(&rdp_leader->nocb_wq);
+		swake_up(&rdp_leader->nocb_wq);
 	}
 }
 
@@ -2059,7 +2063,7 @@
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	needwake = rcu_start_future_gp(rnp, rdp, &c);
-	raw_spin_unlock_irqrestore(&rnp->lock, flags);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	if (needwake)
 		rcu_gp_kthread_wake(rdp->rsp);
 
@@ -2069,7 +2073,7 @@
 	 */
 	trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
 	for (;;) {
-		wait_event_interruptible(
+		swait_event_interruptible(
 			rnp->nocb_gp_wq[c & 0x1],
 			(d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
 		if (likely(d))
@@ -2097,7 +2101,7 @@
 	/* Wait for callbacks to appear. */
 	if (!rcu_nocb_poll) {
 		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
-		wait_event_interruptible(my_rdp->nocb_wq,
+		swait_event_interruptible(my_rdp->nocb_wq,
 				!READ_ONCE(my_rdp->nocb_leader_sleep));
 		/* Memory barrier handled by smp_mb() calls below and repoll. */
 	} else if (firsttime) {
@@ -2172,7 +2176,7 @@
 			 * List was empty, wake up the follower.
 			 * Memory barriers supplied by atomic_long_add().
 			 */
-			wake_up(&rdp->nocb_wq);
+			swake_up(&rdp->nocb_wq);
 		}
 	}
 
@@ -2193,7 +2197,7 @@
 		if (!rcu_nocb_poll) {
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    "FollowerSleep");
-			wait_event_interruptible(rdp->nocb_wq,
+			swait_event_interruptible(rdp->nocb_wq,
 						 READ_ONCE(rdp->nocb_follower_head));
 		} else if (firsttime) {
 			/* Don't drown trace log with "Poll"! */
@@ -2352,7 +2356,7 @@
 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 {
 	rdp->nocb_tail = &rdp->nocb_head;
-	init_waitqueue_head(&rdp->nocb_wq);
+	init_swait_queue_head(&rdp->nocb_wq);
 	rdp->nocb_follower_tail = &rdp->nocb_follower_head;
 }
 
@@ -2502,7 +2506,7 @@
 	return false;
 }
 
-static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
 {
 }
 
@@ -2510,6 +2514,11 @@
 {
 }
 
+static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
+{
+	return NULL;
+}
+
 static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
 }

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 76b94e1..ca828b4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c

@@ -128,6 +128,7 @@
 {
 	return READ_ONCE(rcu_normal);
 }
+EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
 
 static atomic_t rcu_expedited_nesting =
 	ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);

diff --git a/kernel/resource.c b/kernel/resource.c
index 3669d1b..4d46605 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c

@@ -333,13 +333,13 @@
 EXPORT_SYMBOL(release_resource);
 
 /*
- * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
- * the caller must specify res->start, res->end, res->flags and "name".
- * If found, returns 0, res is overwritten, if not found, returns -1.
- * This walks through whole tree and not just first level children
- * until and unless first_level_children_only is true.
+ * Finds the lowest iomem resource existing within [res->start.res->end).
+ * The caller must specify res->start, res->end, res->flags, and optionally
+ * desc.  If found, returns 0, res is overwritten, if not found, returns -1.
+ * This function walks the whole tree and not just first level children until
+ * and unless first_level_children_only is true.
  */
-static int find_next_iomem_res(struct resource *res, char *name,
+static int find_next_iomem_res(struct resource *res, unsigned long desc,
 			       bool first_level_children_only)
 {
 	resource_size_t start, end;
@@ -358,9 +358,9 @@
 	read_lock(&resource_lock);
 
 	for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) {
-		if (p->flags != res->flags)
+		if ((p->flags & res->flags) != res->flags)
 			continue;
-		if (name && strcmp(p->name, name))
+		if ((desc != IORES_DESC_NONE) && (desc != p->desc))
 			continue;
 		if (p->start > end) {
 			p = NULL;
@@ -385,15 +385,18 @@
  * Walks through iomem resources and calls func() with matching resource
  * ranges. This walks through whole tree and not just first level children.
  * All the memory ranges which overlap start,end and also match flags and
- * name are valid candidates.
+ * desc are valid candidates.
  *
- * @name: name of resource
- * @flags: resource flags
+ * @desc: I/O resource descriptor. Use IORES_DESC_NONE to skip @desc check.
+ * @flags: I/O resource flags
  * @start: start addr
  * @end: end addr
+ *
+ * NOTE: For a new descriptor search, define a new IORES_DESC in
+ * <linux/ioport.h> and set it in 'desc' of a target resource entry.
  */
-int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
-		void *arg, int (*func)(u64, u64, void *))
+int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
+		u64 end, void *arg, int (*func)(u64, u64, void *))
 {
 	struct resource res;
 	u64 orig_end;
@@ -403,23 +406,27 @@
 	res.end = end;
 	res.flags = flags;
 	orig_end = res.end;
+
 	while ((res.start < res.end) &&
-		(!find_next_iomem_res(&res, name, false))) {
+		(!find_next_iomem_res(&res, desc, false))) {
+
 		ret = (*func)(res.start, res.end, arg);
 		if (ret)
 			break;
+
 		res.start = res.end + 1;
 		res.end = orig_end;
 	}
+
 	return ret;
 }
 
 /*
- * This function calls callback against all memory range of "System RAM"
- * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
- * Now, this function is only for "System RAM". This function deals with
- * full ranges and not pfn. If resources are not pfn aligned, dealing
- * with pfn can truncate ranges.
+ * This function calls the @func callback against all memory ranges of type
+ * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
+ * Now, this function is only for System RAM, it deals with full ranges and
+ * not PFNs. If resources are not PFN-aligned, dealing with PFNs can truncate
+ * ranges.
  */
 int walk_system_ram_res(u64 start, u64 end, void *arg,
 				int (*func)(u64, u64, void *))
@@ -430,10 +437,10 @@
 
 	res.start = start;
 	res.end = end;
-	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 	orig_end = res.end;
 	while ((res.start < res.end) &&
-		(!find_next_iomem_res(&res, "System RAM", true))) {
+		(!find_next_iomem_res(&res, IORES_DESC_NONE, true))) {
 		ret = (*func)(res.start, res.end, arg);
 		if (ret)
 			break;
@@ -446,9 +453,9 @@
 #if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 
 /*
- * This function calls callback against all memory range of "System RAM"
- * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
- * Now, this function is only for "System RAM".
+ * This function calls the @func callback against all memory ranges of type
+ * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
+ * It is to be used only for System RAM.
  */
 int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *))
@@ -460,10 +467,10 @@
 
 	res.start = (u64) start_pfn << PAGE_SHIFT;
 	res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
-	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 	orig_end = res.end;
 	while ((res.start < res.end) &&
-		(find_next_iomem_res(&res, "System RAM", true) >= 0)) {
+		(find_next_iomem_res(&res, IORES_DESC_NONE, true) >= 0)) {
 		pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		end_pfn = (res.end + 1) >> PAGE_SHIFT;
 		if (end_pfn > pfn)
@@ -484,7 +491,7 @@
 }
 /*
  * This generic page_is_ram() returns true if specified address is
- * registered as "System RAM" in iomem_resource list.
+ * registered as System RAM in iomem_resource list.
  */
 int __weak page_is_ram(unsigned long pfn)
 {
@@ -496,30 +503,34 @@
  * region_intersects() - determine intersection of region with known resources
  * @start: region start address
  * @size: size of region
- * @name: name of resource (in iomem_resource)
+ * @flags: flags of resource (in iomem_resource)
+ * @desc: descriptor of resource (in iomem_resource) or IORES_DESC_NONE
  *
  * Check if the specified region partially overlaps or fully eclipses a
- * resource identified by @name.  Return REGION_DISJOINT if the region
- * does not overlap @name, return REGION_MIXED if the region overlaps
- * @type and another resource, and return REGION_INTERSECTS if the
- * region overlaps @type and no other defined resource. Note, that
- * REGION_INTERSECTS is also returned in the case when the specified
- * region overlaps RAM and undefined memory holes.
+ * resource identified by @flags and @desc (optional with IORES_DESC_NONE).
+ * Return REGION_DISJOINT if the region does not overlap @flags/@desc,
+ * return REGION_MIXED if the region overlaps @flags/@desc and another
+ * resource, and return REGION_INTERSECTS if the region overlaps @flags/@desc
+ * and no other defined resource. Note that REGION_INTERSECTS is also
+ * returned in the case when the specified region overlaps RAM and undefined
+ * memory holes.
  *
  * region_intersect() is used by memory remapping functions to ensure
  * the user is not remapping RAM and is a vast speed up over walking
  * through the resource table page by page.
  */
-int region_intersects(resource_size_t start, size_t size, const char *name)
+int region_intersects(resource_size_t start, size_t size, unsigned long flags,
+		      unsigned long desc)
 {
-	unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	resource_size_t end = start + size - 1;
 	int type = 0; int other = 0;
 	struct resource *p;
 
 	read_lock(&resource_lock);
 	for (p = iomem_resource.child; p ; p = p->sibling) {
-		bool is_type = strcmp(p->name, name) == 0 && p->flags == flags;
+		bool is_type = (((p->flags & flags) == flags) &&
+				((desc == IORES_DESC_NONE) ||
+				 (desc == p->desc)));
 
 		if (start >= p->start && start <= p->end)
 			is_type ? type++ : other++;
@@ -538,6 +549,7 @@
 
 	return REGION_DISJOINT;
 }
+EXPORT_SYMBOL_GPL(region_intersects);
 
 void __weak arch_remove_reservations(struct resource *avail)
 {
@@ -948,6 +960,7 @@
 	res->start = start;
 	res->end = end;
 	res->flags = IORESOURCE_BUSY;
+	res->desc = IORES_DESC_NONE;
 
 	while (1) {
 
@@ -982,6 +995,7 @@
 				next_res->start = conflict->end + 1;
 				next_res->end = end;
 				next_res->flags = IORESOURCE_BUSY;
+				next_res->desc = IORES_DESC_NONE;
 			}
 		} else {
 			res->start = conflict->end + 1;
@@ -1071,8 +1085,9 @@
 	res->name = name;
 	res->start = start;
 	res->end = start + n - 1;
-	res->flags = resource_type(parent);
+	res->flags = resource_type(parent) | resource_ext_type(parent);
 	res->flags |= IORESOURCE_BUSY | flags;
+	res->desc = IORES_DESC_NONE;
 
 	write_lock(&resource_lock);
 
@@ -1238,6 +1253,7 @@
 			new_res->start = end + 1;
 			new_res->end = res->end;
 			new_res->flags = res->flags;
+			new_res->desc = res->desc;
 			new_res->parent = res->parent;
 			new_res->sibling = res->sibling;
 			new_res->child = NULL;
@@ -1413,6 +1429,7 @@
 			res->start = io_start;
 			res->end = io_start + io_num - 1;
 			res->flags = IORESOURCE_BUSY;
+			res->desc = IORES_DESC_NONE;
 			res->child = NULL;
 			if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
 				reserved = x+1;

diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 6768797..7d4cba2 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile

@@ -13,7 +13,7 @@
 
 obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
-obj-y += wait.o completion.o idle.o
+obj-y += wait.o swait.o completion.o idle.o
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o

diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index bc54e84..fedb967 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c

@@ -61,6 +61,7 @@
 #include <linux/static_key.h>
 #include <linux/workqueue.h>
 #include <linux/compiler.h>
+#include <linux/tick.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -89,6 +90,8 @@
 {
 	if (!sched_clock_stable())
 		static_key_slow_inc(&__sched_clock_stable);
+
+	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
 void set_sched_clock_stable(void)
@@ -108,6 +111,8 @@
 	/* XXX worry about clock continuity */
 	if (sched_clock_stable())
 		static_key_slow_dec(&__sched_clock_stable);
+
+	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
 static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9503d59..ea8f49a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -26,6 +26,7 @@
  *              Thomas Gleixner, Mike Kravetz
  */
 
+#include <linux/kasan.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
@@ -66,12 +67,10 @@
 #include <linux/pagemap.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
-#include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
 #include <linux/slab.h>
 #include <linux/init_task.h>
-#include <linux/binfmts.h>
 #include <linux/context_tracking.h>
 #include <linux/compiler.h>
 
@@ -124,138 +123,6 @@
 
 #undef SCHED_FEAT
 
-#ifdef CONFIG_SCHED_DEBUG
-#define SCHED_FEAT(name, enabled)	\
-	#name ,
-
-static const char * const sched_feat_names[] = {
-#include "features.h"
-};
-
-#undef SCHED_FEAT
-
-static int sched_feat_show(struct seq_file *m, void *v)
-{
-	int i;
-
-	for (i = 0; i < __SCHED_FEAT_NR; i++) {
-		if (!(sysctl_sched_features & (1UL << i)))
-			seq_puts(m, "NO_");
-		seq_printf(m, "%s ", sched_feat_names[i]);
-	}
-	seq_puts(m, "\n");
-
-	return 0;
-}
-
-#ifdef HAVE_JUMP_LABEL
-
-#define jump_label_key__true  STATIC_KEY_INIT_TRUE
-#define jump_label_key__false STATIC_KEY_INIT_FALSE
-
-#define SCHED_FEAT(name, enabled)	\
-	jump_label_key__##enabled ,
-
-struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
-#include "features.h"
-};
-
-#undef SCHED_FEAT
-
-static void sched_feat_disable(int i)
-{
-	static_key_disable(&sched_feat_keys[i]);
-}
-
-static void sched_feat_enable(int i)
-{
-	static_key_enable(&sched_feat_keys[i]);
-}
-#else
-static void sched_feat_disable(int i) { };
-static void sched_feat_enable(int i) { };
-#endif /* HAVE_JUMP_LABEL */
-
-static int sched_feat_set(char *cmp)
-{
-	int i;
-	int neg = 0;
-
-	if (strncmp(cmp, "NO_", 3) == 0) {
-		neg = 1;
-		cmp += 3;
-	}
-
-	for (i = 0; i < __SCHED_FEAT_NR; i++) {
-		if (strcmp(cmp, sched_feat_names[i]) == 0) {
-			if (neg) {
-				sysctl_sched_features &= ~(1UL << i);
-				sched_feat_disable(i);
-			} else {
-				sysctl_sched_features |= (1UL << i);
-				sched_feat_enable(i);
-			}
-			break;
-		}
-	}
-
-	return i;
-}
-
-static ssize_t
-sched_feat_write(struct file *filp, const char __user *ubuf,
-		size_t cnt, loff_t *ppos)
-{
-	char buf[64];
-	char *cmp;
-	int i;
-	struct inode *inode;
-
-	if (cnt > 63)
-		cnt = 63;
-
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-
-	buf[cnt] = 0;
-	cmp = strstrip(buf);
-
-	/* Ensure the static_key remains in a consistent state */
-	inode = file_inode(filp);
-	inode_lock(inode);
-	i = sched_feat_set(cmp);
-	inode_unlock(inode);
-	if (i == __SCHED_FEAT_NR)
-		return -EINVAL;
-
-	*ppos += cnt;
-
-	return cnt;
-}
-
-static int sched_feat_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, sched_feat_show, NULL);
-}
-
-static const struct file_operations sched_feat_fops = {
-	.open		= sched_feat_open,
-	.write		= sched_feat_write,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static __init int sched_init_debug(void)
-{
-	debugfs_create_file("sched_features", 0644, NULL, NULL,
-			&sched_feat_fops);
-
-	return 0;
-}
-late_initcall(sched_init_debug);
-#endif /* CONFIG_SCHED_DEBUG */
-
 /*
  * Number of tasks to iterate in a single balance run.
  * Limited because this is done with IRQs disabled.
@@ -453,20 +320,6 @@
 }
 #endif	/* CONFIG_SCHED_HRTICK */
 
-/*
- * cmpxchg based fetch_or, macro so it works for different integer types
- */
-#define fetch_or(ptr, val)						\
-({	typeof(*(ptr)) __old, __val = *(ptr);				\
- 	for (;;) {							\
- 		__old = cmpxchg((ptr), __val, __val | (val));		\
- 		if (__old == __val)					\
- 			break;						\
- 		__val = __old;						\
- 	}								\
- 	__old;								\
-})
-
 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
@@ -715,31 +568,36 @@
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
-bool sched_can_stop_tick(void)
+bool sched_can_stop_tick(struct rq *rq)
 {
+	int fifo_nr_running;
+
+	/* Deadline tasks, even if single, need the tick */
+	if (rq->dl.dl_nr_running)
+		return false;
+
 	/*
-	 * FIFO realtime policy runs the highest priority task. Other runnable
-	 * tasks are of a lower priority. The scheduler tick does nothing.
+	 * FIFO realtime policy runs the highest priority task (after DEADLINE).
+	 * Other runnable tasks are of a lower priority. The scheduler tick
+	 * isn't needed.
 	 */
-	if (current->policy == SCHED_FIFO)
+	fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
+	if (fifo_nr_running)
 		return true;
 
 	/*
 	 * Round-robin realtime tasks time slice with other tasks at the same
-	 * realtime priority. Is this task the only one at this priority?
+	 * realtime priority.
 	 */
-	if (current->policy == SCHED_RR) {
-		struct sched_rt_entity *rt_se = &current->rt;
-
-		return list_is_singular(&rt_se->run_list);
+	if (rq->rt.rr_nr_running) {
+		if (rq->rt.rr_nr_running == 1)
+			return true;
+		else
+			return false;
 	}
 
-	/*
-	 * More than one running task need preemption.
-	 * nr_running update is assumed to be visible
-	 * after IPI is sent from wakers.
-	 */
-	if (this_rq()->nr_running > 1)
+	/* Normal multitasking need periodic preemption checks */
+	if (rq->cfs.nr_running > 1)
 		return false;
 
 	return true;
@@ -2093,7 +1951,8 @@
 
 	ttwu_queue(p, cpu);
 stat:
-	ttwu_stat(p, cpu, wake_flags);
+	if (schedstat_enabled())
+		ttwu_stat(p, cpu, wake_flags);
 out:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
@@ -2141,7 +2000,8 @@
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
 	ttwu_do_wakeup(rq, p, 0);
-	ttwu_stat(p, smp_processor_id(), 0);
+	if (schedstat_enabled())
+		ttwu_stat(p, smp_processor_id(), 0);
 out:
 	raw_spin_unlock(&p->pi_lock);
 }
@@ -2183,7 +2043,6 @@
 	dl_se->dl_bw = 0;
 
 	dl_se->dl_throttled = 0;
-	dl_se->dl_new = 1;
 	dl_se->dl_yielded = 0;
 }
 
@@ -2210,6 +2069,7 @@
 #endif
 
 #ifdef CONFIG_SCHEDSTATS
+	/* Even if schedstat is disabled, there should not be garbage */
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
@@ -2218,6 +2078,10 @@
 	__dl_clear_params(p);
 
 	INIT_LIST_HEAD(&p->rt.run_list);
+	p->rt.timeout		= 0;
+	p->rt.time_slice	= sched_rr_timeslice;
+	p->rt.on_rq		= 0;
+	p->rt.on_list		= 0;
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2281,6 +2145,69 @@
 #endif
 #endif
 
+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+
+#ifdef CONFIG_SCHEDSTATS
+static void set_schedstats(bool enabled)
+{
+	if (enabled)
+		static_branch_enable(&sched_schedstats);
+	else
+		static_branch_disable(&sched_schedstats);
+}
+
+void force_schedstat_enabled(void)
+{
+	if (!schedstat_enabled()) {
+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
+		static_branch_enable(&sched_schedstats);
+	}
+}
+
+static int __init setup_schedstats(char *str)
+{
+	int ret = 0;
+	if (!str)
+		goto out;
+
+	if (!strcmp(str, "enable")) {
+		set_schedstats(true);
+		ret = 1;
+	} else if (!strcmp(str, "disable")) {
+		set_schedstats(false);
+		ret = 1;
+	}
+out:
+	if (!ret)
+		pr_warn("Unable to parse schedstats=\n");
+
+	return ret;
+}
+__setup("schedstats=", setup_schedstats);
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_schedstats(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table t;
+	int err;
+	int state = static_branch_likely(&sched_schedstats);
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	t = *table;
+	t.data = &state;
+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+	if (write)
+		set_schedstats(state);
+	return err;
+}
+#endif
+#endif
+
 /*
  * fork()/clone()-time setup:
  */
@@ -3010,16 +2937,6 @@
 }
 #endif
 
-notrace unsigned long get_parent_ip(unsigned long addr)
-{
-	if (in_lock_functions(addr)) {
-		addr = CALLER_ADDR2;
-		if (in_lock_functions(addr))
-			addr = CALLER_ADDR3;
-	}
-	return addr;
-}
-
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_PREEMPT_TRACER))
 
@@ -3041,7 +2958,7 @@
 				PREEMPT_MASK - 10);
 #endif
 	if (preempt_count() == val) {
-		unsigned long ip = get_parent_ip(CALLER_ADDR1);
+		unsigned long ip = get_lock_parent_ip();
 #ifdef CONFIG_DEBUG_PREEMPT
 		current->preempt_disable_ip = ip;
 #endif
@@ -3068,7 +2985,7 @@
 #endif
 
 	if (preempt_count() == val)
-		trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
 	__preempt_count_sub(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
@@ -3280,7 +3197,6 @@
 
 		trace_sched_switch(preempt, prev, next);
 		rq = context_switch(rq, prev, next); /* unlocks the rq */
-		cpu = cpu_of(rq);
 	} else {
 		lockdep_unpin_lock(&rq->lock);
 		raw_spin_unlock_irq(&rq->lock);
@@ -3466,7 +3382,7 @@
  */
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
-	int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE;
+	int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
 	struct rq *rq;
 	const struct sched_class *prev_class;
 
@@ -3494,11 +3410,15 @@
 
 	trace_sched_pi_setprio(p, prio);
 	oldprio = p->prio;
+
+	if (oldprio == prio)
+		queue_flag &= ~DEQUEUE_MOVE;
+
 	prev_class = p->sched_class;
 	queued = task_on_rq_queued(p);
 	running = task_current(rq, p);
 	if (queued)
-		dequeue_task(rq, p, DEQUEUE_SAVE);
+		dequeue_task(rq, p, queue_flag);
 	if (running)
 		put_prev_task(rq, p);
 
@@ -3516,7 +3436,7 @@
 		if (!dl_prio(p->normal_prio) ||
 		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
 			p->dl.dl_boosted = 1;
-			enqueue_flag |= ENQUEUE_REPLENISH;
+			queue_flag |= ENQUEUE_REPLENISH;
 		} else
 			p->dl.dl_boosted = 0;
 		p->sched_class = &dl_sched_class;
@@ -3524,7 +3444,7 @@
 		if (dl_prio(oldprio))
 			p->dl.dl_boosted = 0;
 		if (oldprio < prio)
-			enqueue_flag |= ENQUEUE_HEAD;
+			queue_flag |= ENQUEUE_HEAD;
 		p->sched_class = &rt_sched_class;
 	} else {
 		if (dl_prio(oldprio))
@@ -3539,7 +3459,7 @@
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (queued)
-		enqueue_task(rq, p, enqueue_flag);
+		enqueue_task(rq, p, queue_flag);
 
 	check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
@@ -3895,6 +3815,7 @@
 	const struct sched_class *prev_class;
 	struct rq *rq;
 	int reset_on_fork;
+	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
 
 	/* may grab non-irq protected spin_locks */
 	BUG_ON(in_interrupt());
@@ -4077,17 +3998,14 @@
 		 * itself.
 		 */
 		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
-		if (new_effective_prio == oldprio) {
-			__setscheduler_params(p, attr);
-			task_rq_unlock(rq, p, &flags);
-			return 0;
-		}
+		if (new_effective_prio == oldprio)
+			queue_flags &= ~DEQUEUE_MOVE;
 	}
 
 	queued = task_on_rq_queued(p);
 	running = task_current(rq, p);
 	if (queued)
-		dequeue_task(rq, p, DEQUEUE_SAVE);
+		dequeue_task(rq, p, queue_flags);
 	if (running)
 		put_prev_task(rq, p);
 
@@ -4097,15 +4015,14 @@
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (queued) {
-		int enqueue_flags = ENQUEUE_RESTORE;
 		/*
 		 * We enqueue to tail when the priority of a task is
 		 * increased (user space view).
 		 */
-		if (oldprio <= p->prio)
-			enqueue_flags |= ENQUEUE_HEAD;
+		if (oldprio < p->prio)
+			queue_flags |= ENQUEUE_HEAD;
 
-		enqueue_task(rq, p, enqueue_flags);
+		enqueue_task(rq, p, queue_flags);
 	}
 
 	check_class_changed(rq, p, prev_class, oldprio);
@@ -5096,6 +5013,8 @@
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 
+	kasan_unpoison_task_stack(idle);
+
 #ifdef CONFIG_SMP
 	/*
 	 * Its possible that init_idle() gets called multiple times on a task,
@@ -5405,183 +5324,6 @@
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-
-static struct ctl_table sd_ctl_dir[] = {
-	{
-		.procname	= "sched_domain",
-		.mode		= 0555,
-	},
-	{}
-};
-
-static struct ctl_table sd_ctl_root[] = {
-	{
-		.procname	= "kernel",
-		.mode		= 0555,
-		.child		= sd_ctl_dir,
-	},
-	{}
-};
-
-static struct ctl_table *sd_alloc_ctl_entry(int n)
-{
-	struct ctl_table *entry =
-		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
-
-	return entry;
-}
-
-static void sd_free_ctl_entry(struct ctl_table **tablep)
-{
-	struct ctl_table *entry;
-
-	/*
-	 * In the intermediate directories, both the child directory and
-	 * procname are dynamically allocated and could fail but the mode
-	 * will always be set. In the lowest directory the names are
-	 * static strings and all have proc handlers.
-	 */
-	for (entry = *tablep; entry->mode; entry++) {
-		if (entry->child)
-			sd_free_ctl_entry(&entry->child);
-		if (entry->proc_handler == NULL)
-			kfree(entry->procname);
-	}
-
-	kfree(*tablep);
-	*tablep = NULL;
-}
-
-static int min_load_idx = 0;
-static int max_load_idx = CPU_LOAD_IDX_MAX-1;
-
-static void
-set_table_entry(struct ctl_table *entry,
-		const char *procname, void *data, int maxlen,
-		umode_t mode, proc_handler *proc_handler,
-		bool load_idx)
-{
-	entry->procname = procname;
-	entry->data = data;
-	entry->maxlen = maxlen;
-	entry->mode = mode;
-	entry->proc_handler = proc_handler;
-
-	if (load_idx) {
-		entry->extra1 = &min_load_idx;
-		entry->extra2 = &max_load_idx;
-	}
-}
-
-static struct ctl_table *
-sd_alloc_ctl_domain_table(struct sched_domain *sd)
-{
-	struct ctl_table *table = sd_alloc_ctl_entry(14);
-
-	if (table == NULL)
-		return NULL;
-
-	set_table_entry(&table[0], "min_interval", &sd->min_interval,
-		sizeof(long), 0644, proc_doulongvec_minmax, false);
-	set_table_entry(&table[1], "max_interval", &sd->max_interval,
-		sizeof(long), 0644, proc_doulongvec_minmax, false);
-	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
-		sizeof(int), 0644, proc_dointvec_minmax, true);
-	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
-		sizeof(int), 0644, proc_dointvec_minmax, false);
-	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
-		sizeof(int), 0644, proc_dointvec_minmax, false);
-	set_table_entry(&table[9], "cache_nice_tries",
-		&sd->cache_nice_tries,
-		sizeof(int), 0644, proc_dointvec_minmax, false);
-	set_table_entry(&table[10], "flags", &sd->flags,
-		sizeof(int), 0644, proc_dointvec_minmax, false);
-	set_table_entry(&table[11], "max_newidle_lb_cost",
-		&sd->max_newidle_lb_cost,
-		sizeof(long), 0644, proc_doulongvec_minmax, false);
-	set_table_entry(&table[12], "name", sd->name,
-		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
-	/* &table[13] is terminator */
-
-	return table;
-}
-
-static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
-{
-	struct ctl_table *entry, *table;
-	struct sched_domain *sd;
-	int domain_num = 0, i;
-	char buf[32];
-
-	for_each_domain(cpu, sd)
-		domain_num++;
-	entry = table = sd_alloc_ctl_entry(domain_num + 1);
-	if (table == NULL)
-		return NULL;
-
-	i = 0;
-	for_each_domain(cpu, sd) {
-		snprintf(buf, 32, "domain%d", i);
-		entry->procname = kstrdup(buf, GFP_KERNEL);
-		entry->mode = 0555;
-		entry->child = sd_alloc_ctl_domain_table(sd);
-		entry++;
-		i++;
-	}
-	return table;
-}
-
-static struct ctl_table_header *sd_sysctl_header;
-static void register_sched_domain_sysctl(void)
-{
-	int i, cpu_num = num_possible_cpus();
-	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
-	char buf[32];
-
-	WARN_ON(sd_ctl_dir[0].child);
-	sd_ctl_dir[0].child = entry;
-
-	if (entry == NULL)
-		return;
-
-	for_each_possible_cpu(i) {
-		snprintf(buf, 32, "cpu%d", i);
-		entry->procname = kstrdup(buf, GFP_KERNEL);
-		entry->mode = 0555;
-		entry->child = sd_alloc_ctl_cpu_table(i);
-		entry++;
-	}
-
-	WARN_ON(sd_sysctl_header);
-	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
-}
-
-/* may be called multiple times per register */
-static void unregister_sched_domain_sysctl(void)
-{
-	unregister_sysctl_table(sd_sysctl_header);
-	sd_sysctl_header = NULL;
-	if (sd_ctl_dir[0].child)
-		sd_free_ctl_entry(&sd_ctl_dir[0].child);
-}
-#else
-static void register_sched_domain_sysctl(void)
-{
-}
-static void unregister_sched_domain_sysctl(void)
-{
-}
-#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
-
 static void set_rq_online(struct rq *rq)
 {
 	if (!rq->online) {
@@ -5692,16 +5434,6 @@
 		set_cpu_rq_start_time();
 		return NOTIFY_OK;
 
-	case CPU_ONLINE:
-		/*
-		 * At this point a starting CPU has marked itself as online via
-		 * set_cpu_online(). But it might not yet have marked itself
-		 * as active, which is essential from here on.
-		 */
-		set_cpu_active(cpu, true);
-		stop_machine_unpark(cpu);
-		return NOTIFY_OK;
-
 	case CPU_DOWN_FAILED:
 		set_cpu_active(cpu, true);
 		return NOTIFY_OK;
@@ -6173,11 +5905,16 @@
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
+	int ret;
+
 	alloc_bootmem_cpumask_var(&cpu_isolated_map);
-	cpulist_parse(str, cpu_isolated_map);
+	ret = cpulist_parse(str, cpu_isolated_map);
+	if (ret) {
+		pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
+		return 0;
+	}
 	return 1;
 }
-
 __setup("isolcpus=", isolated_cpu_setup);
 
 struct s_data {
@@ -7860,11 +7597,9 @@
 void sched_offline_group(struct task_group *tg)
 {
 	unsigned long flags;
-	int i;
 
 	/* end participation in shares distribution */
-	for_each_possible_cpu(i)
-		unregister_fair_sched_group(tg, i);
+	unregister_fair_sched_group(tg);
 
 	spin_lock_irqsave(&task_group_lock, flags);
 	list_del_rcu(&tg->list);
@@ -7890,7 +7625,7 @@
 	queued = task_on_rq_queued(tsk);
 
 	if (queued)
-		dequeue_task(rq, tsk, DEQUEUE_SAVE);
+		dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
 	if (unlikely(running))
 		put_prev_task(rq, tsk);
 
@@ -7914,7 +7649,7 @@
 	if (unlikely(running))
 		tsk->sched_class->set_curr_task(rq);
 	if (queued)
-		enqueue_task(rq, tsk, ENQUEUE_RESTORE);
+		enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
 
 	task_rq_unlock(rq, tsk, &flags);
 }

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b2ab2ff..75f98c5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c

@@ -262,21 +262,21 @@
 #ifdef CONFIG_PARAVIRT
 	if (static_key_false(&paravirt_steal_enabled)) {
 		u64 steal;
-		cputime_t steal_ct;
+		unsigned long steal_jiffies;
 
 		steal = paravirt_steal_clock(smp_processor_id());
 		steal -= this_rq()->prev_steal_time;
 
 		/*
-		 * cputime_t may be less precise than nsecs (eg: if it's
-		 * based on jiffies). Lets cast the result to cputime
+		 * steal is in nsecs but our caller is expecting steal
+		 * time in jiffies. Lets cast the result to jiffies
 		 * granularity and account the rest on the next rounds.
 		 */
-		steal_ct = nsecs_to_cputime(steal);
-		this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct);
+		steal_jiffies = nsecs_to_jiffies(steal);
+		this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
 
-		account_steal_time(steal_ct);
-		return steal_ct;
+		account_steal_time(jiffies_to_cputime(steal_jiffies));
+		return steal_jiffies;
 	}
 #endif
 	return false;
@@ -668,26 +668,25 @@
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static unsigned long long vtime_delta(struct task_struct *tsk)
+static cputime_t vtime_delta(struct task_struct *tsk)
 {
-	unsigned long long clock;
+	unsigned long now = READ_ONCE(jiffies);
 
-	clock = local_clock();
-	if (clock < tsk->vtime_snap)
+	if (time_before(now, (unsigned long)tsk->vtime_snap))
 		return 0;
 
-	return clock - tsk->vtime_snap;
+	return jiffies_to_cputime(now - tsk->vtime_snap);
 }
 
 static cputime_t get_vtime_delta(struct task_struct *tsk)
 {
-	unsigned long long delta = vtime_delta(tsk);
+	unsigned long now = READ_ONCE(jiffies);
+	unsigned long delta = now - tsk->vtime_snap;
 
 	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
-	tsk->vtime_snap += delta;
+	tsk->vtime_snap = now;
 
-	/* CHECKME: always safe to convert nsecs to cputime? */
-	return nsecs_to_cputime(delta);
+	return jiffies_to_cputime(delta);
 }
 
 static void __vtime_account_system(struct task_struct *tsk)
@@ -699,6 +698,9 @@
 
 void vtime_account_system(struct task_struct *tsk)
 {
+	if (!vtime_delta(tsk))
+		return;
+
 	write_seqcount_begin(&tsk->vtime_seqcount);
 	__vtime_account_system(tsk);
 	write_seqcount_end(&tsk->vtime_seqcount);
@@ -707,7 +709,8 @@
 void vtime_gen_account_irq_exit(struct task_struct *tsk)
 {
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	__vtime_account_system(tsk);
+	if (vtime_delta(tsk))
+		__vtime_account_system(tsk);
 	if (context_tracking_in_user())
 		tsk->vtime_snap_whence = VTIME_USER;
 	write_seqcount_end(&tsk->vtime_seqcount);
@@ -718,16 +721,19 @@
 	cputime_t delta_cpu;
 
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	delta_cpu = get_vtime_delta(tsk);
 	tsk->vtime_snap_whence = VTIME_SYS;
-	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+	if (vtime_delta(tsk)) {
+		delta_cpu = get_vtime_delta(tsk);
+		account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+	}
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
 
 void vtime_user_enter(struct task_struct *tsk)
 {
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	__vtime_account_system(tsk);
+	if (vtime_delta(tsk))
+		__vtime_account_system(tsk);
 	tsk->vtime_snap_whence = VTIME_USER;
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
@@ -742,7 +748,8 @@
 	 * that can thus safely catch up with a tickless delta.
 	 */
 	write_seqcount_begin(&tsk->vtime_seqcount);
-	__vtime_account_system(tsk);
+	if (vtime_delta(tsk))
+		__vtime_account_system(tsk);
 	current->flags |= PF_VCPU;
 	write_seqcount_end(&tsk->vtime_seqcount);
 }
@@ -772,7 +779,7 @@
 
 	write_seqcount_begin(&current->vtime_seqcount);
 	current->vtime_snap_whence = VTIME_SYS;
-	current->vtime_snap = sched_clock_cpu(smp_processor_id());
+	current->vtime_snap = jiffies;
 	write_seqcount_end(&current->vtime_seqcount);
 }
 
@@ -783,7 +790,7 @@
 	local_irq_save(flags);
 	write_seqcount_begin(&t->vtime_seqcount);
 	t->vtime_snap_whence = VTIME_SYS;
-	t->vtime_snap = sched_clock_cpu(cpu);
+	t->vtime_snap = jiffies;
 	write_seqcount_end(&t->vtime_seqcount);
 	local_irq_restore(flags);
 }

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index cd64c97..c7a036f 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c

@@ -352,7 +352,15 @@
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
-	WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
+	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
+
+	/*
+	 * We are racing with the deadline timer. So, do nothing because
+	 * the deadline timer handler will take care of properly recharging
+	 * the runtime and postponing the deadline
+	 */
+	if (dl_se->dl_throttled)
+		return;
 
 	/*
 	 * We use the regular wall clock time to set deadlines in the
@@ -361,7 +369,6 @@
 	 */
 	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 	dl_se->runtime = pi_se->dl_runtime;
-	dl_se->dl_new = 0;
 }
 
 /*
@@ -399,6 +406,9 @@
 		dl_se->runtime = pi_se->dl_runtime;
 	}
 
+	if (dl_se->dl_yielded && dl_se->runtime > 0)
+		dl_se->runtime = 0;
+
 	/*
 	 * We keep moving the deadline away until we get some
 	 * available runtime for the entity. This ensures correct
@@ -420,7 +430,7 @@
 	 * entity.
 	 */
 	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
-		printk_deferred_once("sched: DL replenish lagged to much\n");
+		printk_deferred_once("sched: DL replenish lagged too much\n");
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
@@ -500,15 +510,6 @@
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
-	/*
-	 * The arrival of a new instance needs special treatment, i.e.,
-	 * the actual scheduling parameters have to be "renewed".
-	 */
-	if (dl_se->dl_new) {
-		setup_new_dl_entity(dl_se, pi_se);
-		return;
-	}
-
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
@@ -605,16 +606,6 @@
 	}
 
 	/*
-	 * This is possible if switched_from_dl() raced against a running
-	 * callback that took the above !dl_task() path and we've since then
-	 * switched back into SCHED_DEADLINE.
-	 *
-	 * There's nothing to do except drop our task reference.
-	 */
-	if (dl_se->dl_new)
-		goto unlock;
-
-	/*
 	 * The task might have been boosted by someone else and might be in the
 	 * boosting/deboosting path, its not throttled.
 	 */
@@ -735,8 +726,11 @@
 	 * approach need further study.
 	 */
 	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
-	if (unlikely((s64)delta_exec <= 0))
+	if (unlikely((s64)delta_exec <= 0)) {
+		if (unlikely(dl_se->dl_yielded))
+			goto throttle;
 		return;
+	}
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
@@ -749,8 +743,10 @@
 
 	sched_rt_avg_update(rq, delta_exec);
 
-	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
-	if (dl_runtime_exceeded(dl_se)) {
+	dl_se->runtime -= delta_exec;
+
+throttle:
+	if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
 		dl_se->dl_throttled = 1;
 		__dequeue_task_dl(rq, curr, 0);
 		if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
@@ -917,7 +913,7 @@
 	 * parameters of the task might need updating. Otherwise,
 	 * we want a replenishment of its runtime.
 	 */
-	if (dl_se->dl_new || flags & ENQUEUE_WAKEUP)
+	if (flags & ENQUEUE_WAKEUP)
 		update_dl_entity(dl_se, pi_se);
 	else if (flags & ENQUEUE_REPLENISH)
 		replenish_dl_entity(dl_se, pi_se);
@@ -994,18 +990,14 @@
  */
 static void yield_task_dl(struct rq *rq)
 {
-	struct task_struct *p = rq->curr;
-
 	/*
 	 * We make the task go to sleep until its current deadline by
 	 * forcing its runtime to zero. This way, update_curr_dl() stops
 	 * it and the bandwidth timer will wake it up and will give it
 	 * new scheduling parameters (thanks to dl_yielded=1).
 	 */
-	if (p->dl.runtime > 0) {
-		rq->curr->dl.dl_yielded = 1;
-		p->dl.runtime = 0;
-	}
+	rq->curr->dl.dl_yielded = 1;
+
 	update_rq_clock(rq);
 	update_curr_dl(rq);
 	/*
@@ -1722,6 +1714,9 @@
  */
 static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
+	if (dl_time_before(p->dl.deadline, rq_clock(rq)))
+		setup_new_dl_entity(&p->dl, &p->dl);
+
 	if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
@@ -1768,8 +1763,7 @@
 		 */
 		resched_curr(rq);
 #endif /* CONFIG_SMP */
-	} else
-		switched_to_dl(rq, p);
+	}
 }
 
 const struct sched_class dl_sched_class = {

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 6415117..4fbc3bd 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c

@@ -16,6 +16,7 @@
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
 #include <linux/mempolicy.h>
+#include <linux/debugfs.h>
 
 #include "sched.h"
 
@@ -58,6 +59,309 @@
 
 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
 
+#define SCHED_FEAT(name, enabled)	\
+	#name ,
+
+static const char * const sched_feat_names[] = {
+#include "features.h"
+};
+
+#undef SCHED_FEAT
+
+static int sched_feat_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	for (i = 0; i < __SCHED_FEAT_NR; i++) {
+		if (!(sysctl_sched_features & (1UL << i)))
+			seq_puts(m, "NO_");
+		seq_printf(m, "%s ", sched_feat_names[i]);
+	}
+	seq_puts(m, "\n");
+
+	return 0;
+}
+
+#ifdef HAVE_JUMP_LABEL
+
+#define jump_label_key__true  STATIC_KEY_INIT_TRUE
+#define jump_label_key__false STATIC_KEY_INIT_FALSE
+
+#define SCHED_FEAT(name, enabled)	\
+	jump_label_key__##enabled ,
+
+struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
+#include "features.h"
+};
+
+#undef SCHED_FEAT
+
+static void sched_feat_disable(int i)
+{
+	static_key_disable(&sched_feat_keys[i]);
+}
+
+static void sched_feat_enable(int i)
+{
+	static_key_enable(&sched_feat_keys[i]);
+}
+#else
+static void sched_feat_disable(int i) { };
+static void sched_feat_enable(int i) { };
+#endif /* HAVE_JUMP_LABEL */
+
+static int sched_feat_set(char *cmp)
+{
+	int i;
+	int neg = 0;
+
+	if (strncmp(cmp, "NO_", 3) == 0) {
+		neg = 1;
+		cmp += 3;
+	}
+
+	for (i = 0; i < __SCHED_FEAT_NR; i++) {
+		if (strcmp(cmp, sched_feat_names[i]) == 0) {
+			if (neg) {
+				sysctl_sched_features &= ~(1UL << i);
+				sched_feat_disable(i);
+			} else {
+				sysctl_sched_features |= (1UL << i);
+				sched_feat_enable(i);
+			}
+			break;
+		}
+	}
+
+	return i;
+}
+
+static ssize_t
+sched_feat_write(struct file *filp, const char __user *ubuf,
+		size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	char *cmp;
+	int i;
+	struct inode *inode;
+
+	if (cnt > 63)
+		cnt = 63;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+	cmp = strstrip(buf);
+
+	/* Ensure the static_key remains in a consistent state */
+	inode = file_inode(filp);
+	inode_lock(inode);
+	i = sched_feat_set(cmp);
+	inode_unlock(inode);
+	if (i == __SCHED_FEAT_NR)
+		return -EINVAL;
+
+	*ppos += cnt;
+
+	return cnt;
+}
+
+static int sched_feat_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_feat_show, NULL);
+}
+
+static const struct file_operations sched_feat_fops = {
+	.open		= sched_feat_open,
+	.write		= sched_feat_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static __init int sched_init_debug(void)
+{
+	debugfs_create_file("sched_features", 0644, NULL, NULL,
+			&sched_feat_fops);
+
+	return 0;
+}
+late_initcall(sched_init_debug);
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_SYSCTL
+
+static struct ctl_table sd_ctl_dir[] = {
+	{
+		.procname	= "sched_domain",
+		.mode		= 0555,
+	},
+	{}
+};
+
+static struct ctl_table sd_ctl_root[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= sd_ctl_dir,
+	},
+	{}
+};
+
+static struct ctl_table *sd_alloc_ctl_entry(int n)
+{
+	struct ctl_table *entry =
+		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
+
+	return entry;
+}
+
+static void sd_free_ctl_entry(struct ctl_table **tablep)
+{
+	struct ctl_table *entry;
+
+	/*
+	 * In the intermediate directories, both the child directory and
+	 * procname are dynamically allocated and could fail but the mode
+	 * will always be set. In the lowest directory the names are
+	 * static strings and all have proc handlers.
+	 */
+	for (entry = *tablep; entry->mode; entry++) {
+		if (entry->child)
+			sd_free_ctl_entry(&entry->child);
+		if (entry->proc_handler == NULL)
+			kfree(entry->procname);
+	}
+
+	kfree(*tablep);
+	*tablep = NULL;
+}
+
+static int min_load_idx = 0;
+static int max_load_idx = CPU_LOAD_IDX_MAX-1;
+
+static void
+set_table_entry(struct ctl_table *entry,
+		const char *procname, void *data, int maxlen,
+		umode_t mode, proc_handler *proc_handler,
+		bool load_idx)
+{
+	entry->procname = procname;
+	entry->data = data;
+	entry->maxlen = maxlen;
+	entry->mode = mode;
+	entry->proc_handler = proc_handler;
+
+	if (load_idx) {
+		entry->extra1 = &min_load_idx;
+		entry->extra2 = &max_load_idx;
+	}
+}
+
+static struct ctl_table *
+sd_alloc_ctl_domain_table(struct sched_domain *sd)
+{
+	struct ctl_table *table = sd_alloc_ctl_entry(14);
+
+	if (table == NULL)
+		return NULL;
+
+	set_table_entry(&table[0], "min_interval", &sd->min_interval,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[1], "max_interval", &sd->max_interval,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
+		sizeof(int), 0644, proc_dointvec_minmax, true);
+	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[9], "cache_nice_tries",
+		&sd->cache_nice_tries,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[10], "flags", &sd->flags,
+		sizeof(int), 0644, proc_dointvec_minmax, false);
+	set_table_entry(&table[11], "max_newidle_lb_cost",
+		&sd->max_newidle_lb_cost,
+		sizeof(long), 0644, proc_doulongvec_minmax, false);
+	set_table_entry(&table[12], "name", sd->name,
+		CORENAME_MAX_SIZE, 0444, proc_dostring, false);
+	/* &table[13] is terminator */
+
+	return table;
+}
+
+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
+{
+	struct ctl_table *entry, *table;
+	struct sched_domain *sd;
+	int domain_num = 0, i;
+	char buf[32];
+
+	for_each_domain(cpu, sd)
+		domain_num++;
+	entry = table = sd_alloc_ctl_entry(domain_num + 1);
+	if (table == NULL)
+		return NULL;
+
+	i = 0;
+	for_each_domain(cpu, sd) {
+		snprintf(buf, 32, "domain%d", i);
+		entry->procname = kstrdup(buf, GFP_KERNEL);
+		entry->mode = 0555;
+		entry->child = sd_alloc_ctl_domain_table(sd);
+		entry++;
+		i++;
+	}
+	return table;
+}
+
+static struct ctl_table_header *sd_sysctl_header;
+void register_sched_domain_sysctl(void)
+{
+	int i, cpu_num = num_possible_cpus();
+	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
+	char buf[32];
+
+	WARN_ON(sd_ctl_dir[0].child);
+	sd_ctl_dir[0].child = entry;
+
+	if (entry == NULL)
+		return;
+
+	for_each_possible_cpu(i) {
+		snprintf(buf, 32, "cpu%d", i);
+		entry->procname = kstrdup(buf, GFP_KERNEL);
+		entry->mode = 0555;
+		entry->child = sd_alloc_ctl_cpu_table(i);
+		entry++;
+	}
+
+	WARN_ON(sd_sysctl_header);
+	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
+}
+
+/* may be called multiple times per register */
+void unregister_sched_domain_sysctl(void)
+{
+	unregister_sysctl_table(sd_sysctl_header);
+	sd_sysctl_header = NULL;
+	if (sd_ctl_dir[0].child)
+		sd_free_ctl_entry(&sd_ctl_dir[0].child);
+}
+#endif /* CONFIG_SYSCTL */
+#endif /* CONFIG_SMP */
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
 {
@@ -75,16 +379,18 @@
 	PN(se->vruntime);
 	PN(se->sum_exec_runtime);
 #ifdef CONFIG_SCHEDSTATS
-	PN(se->statistics.wait_start);
-	PN(se->statistics.sleep_start);
-	PN(se->statistics.block_start);
-	PN(se->statistics.sleep_max);
-	PN(se->statistics.block_max);
-	PN(se->statistics.exec_max);
-	PN(se->statistics.slice_max);
-	PN(se->statistics.wait_max);
-	PN(se->statistics.wait_sum);
-	P(se->statistics.wait_count);
+	if (schedstat_enabled()) {
+		PN(se->statistics.wait_start);
+		PN(se->statistics.sleep_start);
+		PN(se->statistics.block_start);
+		PN(se->statistics.sleep_max);
+		PN(se->statistics.block_max);
+		PN(se->statistics.exec_max);
+		PN(se->statistics.slice_max);
+		PN(se->statistics.wait_max);
+		PN(se->statistics.wait_sum);
+		P(se->statistics.wait_count);
+	}
 #endif
 	P(se->load.weight);
 #ifdef CONFIG_SMP
@@ -122,10 +428,12 @@
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		SPLIT_NS(p->se.statistics.wait_sum),
-		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(p->se.statistics.sum_sleep_runtime));
+	if (schedstat_enabled()) {
+		SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
+			SPLIT_NS(p->se.statistics.wait_sum),
+			SPLIT_NS(p->se.sum_exec_runtime),
+			SPLIT_NS(p->se.statistics.sum_sleep_runtime));
+	}
 #else
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
 		0LL, 0L,
@@ -258,8 +566,17 @@
 
 void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
 {
+	struct dl_bw *dl_bw;
+
 	SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
 	SEQ_printf(m, "  .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
+#ifdef CONFIG_SMP
+	dl_bw = &cpu_rq(cpu)->rd->dl_bw;
+#else
+	dl_bw = &dl_rq->dl_bw;
+#endif
+	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
+	SEQ_printf(m, "  .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
 }
 
 extern __read_mostly int sched_clock_running;
@@ -313,17 +630,18 @@
 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
 
-	P(yld_count);
-
-	P(sched_count);
-	P(sched_goidle);
 #ifdef CONFIG_SMP
 	P64(avg_idle);
 	P64(max_idle_balance_cost);
 #endif
 
-	P(ttwu_count);
-	P(ttwu_local);
+	if (schedstat_enabled()) {
+		P(yld_count);
+		P(sched_count);
+		P(sched_goidle);
+		P(ttwu_count);
+		P(ttwu_local);
+	}
 
 #undef P
 #undef P64
@@ -569,38 +887,39 @@
 	nr_switches = p->nvcsw + p->nivcsw;
 
 #ifdef CONFIG_SCHEDSTATS
-	PN(se.statistics.sum_sleep_runtime);
-	PN(se.statistics.wait_start);
-	PN(se.statistics.sleep_start);
-	PN(se.statistics.block_start);
-	PN(se.statistics.sleep_max);
-	PN(se.statistics.block_max);
-	PN(se.statistics.exec_max);
-	PN(se.statistics.slice_max);
-	PN(se.statistics.wait_max);
-	PN(se.statistics.wait_sum);
-	P(se.statistics.wait_count);
-	PN(se.statistics.iowait_sum);
-	P(se.statistics.iowait_count);
 	P(se.nr_migrations);
-	P(se.statistics.nr_migrations_cold);
-	P(se.statistics.nr_failed_migrations_affine);
-	P(se.statistics.nr_failed_migrations_running);
-	P(se.statistics.nr_failed_migrations_hot);
-	P(se.statistics.nr_forced_migrations);
-	P(se.statistics.nr_wakeups);
-	P(se.statistics.nr_wakeups_sync);
-	P(se.statistics.nr_wakeups_migrate);
-	P(se.statistics.nr_wakeups_local);
-	P(se.statistics.nr_wakeups_remote);
-	P(se.statistics.nr_wakeups_affine);
-	P(se.statistics.nr_wakeups_affine_attempts);
-	P(se.statistics.nr_wakeups_passive);
-	P(se.statistics.nr_wakeups_idle);
 
-	{
+	if (schedstat_enabled()) {
 		u64 avg_atom, avg_per_cpu;
 
+		PN(se.statistics.sum_sleep_runtime);
+		PN(se.statistics.wait_start);
+		PN(se.statistics.sleep_start);
+		PN(se.statistics.block_start);
+		PN(se.statistics.sleep_max);
+		PN(se.statistics.block_max);
+		PN(se.statistics.exec_max);
+		PN(se.statistics.slice_max);
+		PN(se.statistics.wait_max);
+		PN(se.statistics.wait_sum);
+		P(se.statistics.wait_count);
+		PN(se.statistics.iowait_sum);
+		P(se.statistics.iowait_count);
+		P(se.statistics.nr_migrations_cold);
+		P(se.statistics.nr_failed_migrations_affine);
+		P(se.statistics.nr_failed_migrations_running);
+		P(se.statistics.nr_failed_migrations_hot);
+		P(se.statistics.nr_forced_migrations);
+		P(se.statistics.nr_wakeups);
+		P(se.statistics.nr_wakeups_sync);
+		P(se.statistics.nr_wakeups_migrate);
+		P(se.statistics.nr_wakeups_local);
+		P(se.statistics.nr_wakeups_remote);
+		P(se.statistics.nr_wakeups_affine);
+		P(se.statistics.nr_wakeups_affine_attempts);
+		P(se.statistics.nr_wakeups_passive);
+		P(se.statistics.nr_wakeups_idle);
+
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
 			avg_atom = div64_ul(avg_atom, nr_switches);

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 56b7d4b..3313052 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -20,8 +20,8 @@
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  */
 
-#include <linux/latencytop.h>
 #include <linux/sched.h>
+#include <linux/latencytop.h>
 #include <linux/cpumask.h>
 #include <linux/cpuidle.h>
 #include <linux/slab.h>
@@ -755,7 +755,9 @@
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *p;
-	u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+	u64 delta;
+
+	delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -776,22 +778,12 @@
 	se->statistics.wait_sum += delta;
 	se->statistics.wait_start = 0;
 }
-#else
-static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-#endif
 
 /*
  * Task is being enqueued - update stats:
  */
-static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static inline void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
@@ -802,7 +794,7 @@
 }
 
 static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 	/*
 	 * Mark the end of the wait period if dequeueing a
@@ -810,7 +802,40 @@
 	 */
 	if (se != cfs_rq->curr)
 		update_stats_wait_end(cfs_rq, se);
+
+	if (flags & DEQUEUE_SLEEP) {
+		if (entity_is_task(se)) {
+			struct task_struct *tsk = task_of(se);
+
+			if (tsk->state & TASK_INTERRUPTIBLE)
+				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
+			if (tsk->state & TASK_UNINTERRUPTIBLE)
+				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
+		}
+	}
+
 }
+#else
+static inline void
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+
+static inline void
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+{
+}
+#endif
 
 /*
  * We are picking a new current task - update its stats:
@@ -907,10 +932,11 @@
 	spinlock_t lock; /* nr_tasks, tasks */
 	int nr_tasks;
 	pid_t gid;
+	int active_nodes;
 
 	struct rcu_head rcu;
-	nodemask_t active_nodes;
 	unsigned long total_faults;
+	unsigned long max_faults_cpu;
 	/*
 	 * Faults_cpu is used to decide whether memory should move
 	 * towards the CPU. As a consequence, these stats are weighted
@@ -969,6 +995,18 @@
 		group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
 }
 
+/*
+ * A node triggering more than 1/3 as many NUMA faults as the maximum is
+ * considered part of a numa group's pseudo-interleaving set. Migrations
+ * between these nodes are slowed down, to allow things to settle down.
+ */
+#define ACTIVE_NODE_FRACTION 3
+
+static bool numa_is_active_node(int nid, struct numa_group *ng)
+{
+	return group_faults_cpu(ng, nid) * ACTIVE_NODE_FRACTION > ng->max_faults_cpu;
+}
+
 /* Handle placement on systems where not all nodes are directly connected. */
 static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
 					int maxdist, bool task)
@@ -1118,27 +1156,23 @@
 		return true;
 
 	/*
-	 * Do not migrate if the destination is not a node that
-	 * is actively used by this numa group.
+	 * Destination node is much more heavily used than the source
+	 * node? Allow migration.
 	 */
-	if (!node_isset(dst_nid, ng->active_nodes))
-		return false;
-
-	/*
-	 * Source is a node that is not actively used by this
-	 * numa group, while the destination is. Migrate.
-	 */
-	if (!node_isset(src_nid, ng->active_nodes))
+	if (group_faults_cpu(ng, dst_nid) > group_faults_cpu(ng, src_nid) *
+					ACTIVE_NODE_FRACTION)
 		return true;
 
 	/*
-	 * Both source and destination are nodes in active
-	 * use by this numa group. Maximize memory bandwidth
-	 * by migrating from more heavily used groups, to less
-	 * heavily used ones, spreading the load around.
-	 * Use a 1/4 hysteresis to avoid spurious page movement.
+	 * Distribute memory according to CPU & memory use on each node,
+	 * with 3/4 hysteresis to avoid unnecessary memory migrations:
+	 *
+	 * faults_cpu(dst)   3   faults_cpu(src)
+	 * --------------- * - > ---------------
+	 * faults_mem(dst)   4   faults_mem(src)
 	 */
-	return group_faults(p, dst_nid) < (group_faults(p, src_nid) * 3 / 4);
+	return group_faults_cpu(ng, dst_nid) * group_faults(p, src_nid) * 3 >
+	       group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
 }
 
 static unsigned long weighted_cpuload(const int cpu);
@@ -1484,7 +1518,7 @@
 
 		.best_task = NULL,
 		.best_imp = 0,
-		.best_cpu = -1
+		.best_cpu = -1,
 	};
 	struct sched_domain *sd;
 	unsigned long taskweight, groupweight;
@@ -1536,8 +1570,7 @@
 	 *   multiple NUMA nodes; in order to better consolidate the group,
 	 *   we need to check other locations.
 	 */
-	if (env.best_cpu == -1 || (p->numa_group &&
-			nodes_weight(p->numa_group->active_nodes) > 1)) {
+	if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
 		for_each_online_node(nid) {
 			if (nid == env.src_nid || nid == p->numa_preferred_nid)
 				continue;
@@ -1572,12 +1605,14 @@
 	 * trying for a better one later. Do not set the preferred node here.
 	 */
 	if (p->numa_group) {
+		struct numa_group *ng = p->numa_group;
+
 		if (env.best_cpu == -1)
 			nid = env.src_nid;
 		else
 			nid = env.dst_nid;
 
-		if (node_isset(nid, p->numa_group->active_nodes))
+		if (ng->active_nodes > 1 && numa_is_active_node(env.dst_nid, ng))
 			sched_setnuma(p, env.dst_nid);
 	}
 
@@ -1627,20 +1662,15 @@
 }
 
 /*
- * Find the nodes on which the workload is actively running. We do this by
+ * Find out how many nodes on the workload is actively running on. Do this by
  * tracking the nodes from which NUMA hinting faults are triggered. This can
  * be different from the set of nodes where the workload's memory is currently
  * located.
- *
- * The bitmask is used to make smarter decisions on when to do NUMA page
- * migrations, To prevent flip-flopping, and excessive page migrations, nodes
- * are added when they cause over 6/16 of the maximum number of faults, but
- * only removed when they drop below 3/16.
  */
-static void update_numa_active_node_mask(struct numa_group *numa_group)
+static void numa_group_count_active_nodes(struct numa_group *numa_group)
 {
 	unsigned long faults, max_faults = 0;
-	int nid;
+	int nid, active_nodes = 0;
 
 	for_each_online_node(nid) {
 		faults = group_faults_cpu(numa_group, nid);
@@ -1650,12 +1680,12 @@
 
 	for_each_online_node(nid) {
 		faults = group_faults_cpu(numa_group, nid);
-		if (!node_isset(nid, numa_group->active_nodes)) {
-			if (faults > max_faults * 6 / 16)
-				node_set(nid, numa_group->active_nodes);
-		} else if (faults < max_faults * 3 / 16)
-			node_clear(nid, numa_group->active_nodes);
+		if (faults * ACTIVE_NODE_FRACTION > max_faults)
+			active_nodes++;
 	}
+
+	numa_group->max_faults_cpu = max_faults;
+	numa_group->active_nodes = active_nodes;
 }
 
 /*
@@ -1946,7 +1976,7 @@
 	update_task_scan_period(p, fault_types[0], fault_types[1]);
 
 	if (p->numa_group) {
-		update_numa_active_node_mask(p->numa_group);
+		numa_group_count_active_nodes(p->numa_group);
 		spin_unlock_irq(group_lock);
 		max_nid = preferred_group_nid(p, max_group_nid);
 	}
@@ -1990,14 +2020,14 @@
 			return;
 
 		atomic_set(&grp->refcount, 1);
+		grp->active_nodes = 1;
+		grp->max_faults_cpu = 0;
 		spin_lock_init(&grp->lock);
 		grp->gid = p->pid;
 		/* Second half of the array tracks nids where faults happen */
 		grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
 						nr_node_ids;
 
-		node_set(task_node(current), grp->active_nodes);
-
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
 			grp->faults[i] = p->numa_faults[i];
 
@@ -2111,6 +2141,7 @@
 	bool migrated = flags & TNF_MIGRATED;
 	int cpu_node = task_node(current);
 	int local = !!(flags & TNF_FAULT_LOCAL);
+	struct numa_group *ng;
 	int priv;
 
 	if (!static_branch_likely(&sched_numa_balancing))
@@ -2151,9 +2182,10 @@
 	 * actively using should be counted as local. This allows the
 	 * scan rate to slow down when a workload has settled down.
 	 */
-	if (!priv && !local && p->numa_group &&
-			node_isset(cpu_node, p->numa_group->active_nodes) &&
-			node_isset(mem_node, p->numa_group->active_nodes))
+	ng = p->numa_group;
+	if (!priv && !local && ng && ng->active_nodes > 1 &&
+				numa_is_active_node(cpu_node, ng) &&
+				numa_is_active_node(mem_node, ng))
 		local = 1;
 
 	task_numa_placement(p);
@@ -3102,6 +3134,26 @@
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
 
+static inline void check_schedstat_required(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+	if (schedstat_enabled())
+		return;
+
+	/* Force schedstat enabled if a dependent tracepoint is active */
+	if (trace_sched_stat_wait_enabled()    ||
+			trace_sched_stat_sleep_enabled()   ||
+			trace_sched_stat_iowait_enabled()  ||
+			trace_sched_stat_blocked_enabled() ||
+			trace_sched_stat_runtime_enabled())  {
+		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+			     "stat_blocked and stat_runtime require the "
+			     "kernel parameter schedstats=enabled or "
+			     "kernel.sched_schedstats=1\n");
+	}
+#endif
+}
+
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
@@ -3122,11 +3174,15 @@
 
 	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
-		enqueue_sleeper(cfs_rq, se);
+		if (schedstat_enabled())
+			enqueue_sleeper(cfs_rq, se);
 	}
 
-	update_stats_enqueue(cfs_rq, se);
-	check_spread(cfs_rq, se);
+	check_schedstat_required();
+	if (schedstat_enabled()) {
+		update_stats_enqueue(cfs_rq, se);
+		check_spread(cfs_rq, se);
+	}
 	if (se != cfs_rq->curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
@@ -3193,19 +3249,8 @@
 	update_curr(cfs_rq);
 	dequeue_entity_load_avg(cfs_rq, se);
 
-	update_stats_dequeue(cfs_rq, se);
-	if (flags & DEQUEUE_SLEEP) {
-#ifdef CONFIG_SCHEDSTATS
-		if (entity_is_task(se)) {
-			struct task_struct *tsk = task_of(se);
-
-			if (tsk->state & TASK_INTERRUPTIBLE)
-				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
-			if (tsk->state & TASK_UNINTERRUPTIBLE)
-				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
-		}
-#endif
-	}
+	if (schedstat_enabled())
+		update_stats_dequeue(cfs_rq, se, flags);
 
 	clear_buddies(cfs_rq, se);
 
@@ -3279,7 +3324,8 @@
 		 * a CPU. So account for the time it spent waiting on the
 		 * runqueue.
 		 */
-		update_stats_wait_end(cfs_rq, se);
+		if (schedstat_enabled())
+			update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
 		update_load_avg(se, 1);
 	}
@@ -3292,7 +3338,7 @@
 	 * least twice that of our own weight (i.e. dont track it
 	 * when there are only lesser-weight tasks around):
 	 */
-	if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
+	if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
 		se->statistics.slice_max = max(se->statistics.slice_max,
 			se->sum_exec_runtime - se->prev_sum_exec_runtime);
 	}
@@ -3375,9 +3421,13 @@
 	/* throttle cfs_rqs exceeding runtime */
 	check_cfs_rq_runtime(cfs_rq);
 
-	check_spread(cfs_rq, prev);
+	if (schedstat_enabled()) {
+		check_spread(cfs_rq, prev);
+		if (prev->on_rq)
+			update_stats_wait_start(cfs_rq, prev);
+	}
+
 	if (prev->on_rq) {
-		update_stats_wait_start(cfs_rq, prev);
 		/* Put 'current' back into the tree. */
 		__enqueue_entity(cfs_rq, prev);
 		/* in !on_rq case, update occurred at dequeue */
@@ -4459,9 +4509,17 @@
 
 		/* scale is effectively 1 << i now, and >> i divides by scale */
 
-		old_load = this_rq->cpu_load[i] - tickless_load;
+		old_load = this_rq->cpu_load[i];
 		old_load = decay_load_missed(old_load, pending_updates - 1, i);
-		old_load += tickless_load;
+		if (tickless_load) {
+			old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
+			/*
+			 * old_load can never be a negative value because a
+			 * decayed tickless_load cannot be greater than the
+			 * original tickless_load.
+			 */
+			old_load += tickless_load;
+		}
 		new_load = this_load;
 		/*
 		 * Round up the averaging division if load is increasing. This
@@ -4484,6 +4542,25 @@
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
+static void __update_cpu_load_nohz(struct rq *this_rq,
+				   unsigned long curr_jiffies,
+				   unsigned long load,
+				   int active)
+{
+	unsigned long pending_updates;
+
+	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+	if (pending_updates) {
+		this_rq->last_load_update_tick = curr_jiffies;
+		/*
+		 * In the regular NOHZ case, we were idle, this means load 0.
+		 * In the NOHZ_FULL case, we were non-idle, we should consider
+		 * its weighted load.
+		 */
+		__update_cpu_load(this_rq, load, pending_updates, active);
+	}
+}
+
 /*
  * There is no sane way to deal with nohz on smp when using jiffies because the
  * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
@@ -4501,22 +4578,15 @@
  * Called from nohz_idle_balance() to update the load ratings before doing the
  * idle balance.
  */
-static void update_idle_cpu_load(struct rq *this_rq)
+static void update_cpu_load_idle(struct rq *this_rq)
 {
-	unsigned long curr_jiffies = READ_ONCE(jiffies);
-	unsigned long load = weighted_cpuload(cpu_of(this_rq));
-	unsigned long pending_updates;
-
 	/*
 	 * bail if there's load or we're actually up-to-date.
 	 */
-	if (load || curr_jiffies == this_rq->last_load_update_tick)
+	if (weighted_cpuload(cpu_of(this_rq)))
 		return;
 
-	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
-	this_rq->last_load_update_tick = curr_jiffies;
-
-	__update_cpu_load(this_rq, load, pending_updates, 0);
+	__update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
 }
 
 /*
@@ -4527,22 +4597,12 @@
 	struct rq *this_rq = this_rq();
 	unsigned long curr_jiffies = READ_ONCE(jiffies);
 	unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
-	unsigned long pending_updates;
 
 	if (curr_jiffies == this_rq->last_load_update_tick)
 		return;
 
 	raw_spin_lock(&this_rq->lock);
-	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
-	if (pending_updates) {
-		this_rq->last_load_update_tick = curr_jiffies;
-		/*
-		 * In the regular NOHZ case, we were idle, this means load 0.
-		 * In the NOHZ_FULL case, we were non-idle, we should consider
-		 * its weighted load.
-		 */
-		__update_cpu_load(this_rq, load, pending_updates, active);
-	}
+	__update_cpu_load_nohz(this_rq, curr_jiffies, load, active);
 	raw_spin_unlock(&this_rq->lock);
 }
 #endif /* CONFIG_NO_HZ */
@@ -4554,7 +4614,7 @@
 {
 	unsigned long load = weighted_cpuload(cpu_of(this_rq));
 	/*
-	 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
+	 * See the mess around update_cpu_load_idle() / update_cpu_load_nohz().
 	 */
 	this_rq->last_load_update_tick = jiffies;
 	__update_cpu_load(this_rq, load, 1, 1);
@@ -7848,7 +7908,7 @@
 		if (time_after_eq(jiffies, rq->next_balance)) {
 			raw_spin_lock_irq(&rq->lock);
 			update_rq_clock(rq);
-			update_idle_cpu_load(rq);
+			update_cpu_load_idle(rq);
 			raw_spin_unlock_irq(&rq->lock);
 			rebalance_domains(rq, CPU_IDLE);
 		}
@@ -8234,11 +8294,8 @@
 	for_each_possible_cpu(i) {
 		if (tg->cfs_rq)
 			kfree(tg->cfs_rq[i]);
-		if (tg->se) {
-			if (tg->se[i])
-				remove_entity_load_avg(tg->se[i]);
+		if (tg->se)
 			kfree(tg->se[i]);
-		}
 	}
 
 	kfree(tg->cfs_rq);
@@ -8286,21 +8343,29 @@
 	return 0;
 }
 
-void unregister_fair_sched_group(struct task_group *tg, int cpu)
+void unregister_fair_sched_group(struct task_group *tg)
 {
-	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
+	struct rq *rq;
+	int cpu;
 
-	/*
-	* Only empty task groups can be destroyed; so we can speculatively
-	* check on_list without danger of it being re-added.
-	*/
-	if (!tg->cfs_rq[cpu]->on_list)
-		return;
+	for_each_possible_cpu(cpu) {
+		if (tg->se[cpu])
+			remove_entity_load_avg(tg->se[cpu]);
 
-	raw_spin_lock_irqsave(&rq->lock, flags);
-	list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+		/*
+		 * Only empty task groups can be destroyed; so we can speculatively
+		 * check on_list without danger of it being re-added.
+		 */
+		if (!tg->cfs_rq[cpu]->on_list)
+			continue;
+
+		rq = cpu_rq(cpu);
+
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
 }
 
 void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
@@ -8382,7 +8447,7 @@
 	return 1;
 }
 
-void unregister_fair_sched_group(struct task_group *tg, int cpu) { }
+void unregister_fair_sched_group(struct task_group *tg) { }
 
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 544a713..bd12c6c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c

@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
+#include <linux/cpuhotplug.h>
 #include <linux/tick.h>
 #include <linux/mm.h>
 #include <linux/stackprotector.h>
@@ -193,8 +194,6 @@
 	rcu_idle_exit();
 }
 
-DEFINE_PER_CPU(bool, cpu_dead_idle);
-
 /*
  * Generic idle loop implementation
  *
@@ -221,10 +220,7 @@
 			rmb();
 
 			if (cpu_is_offline(smp_processor_id())) {
-				rcu_cpu_notify(NULL, CPU_DYING_IDLE,
-					       (void *)(long)smp_processor_id());
-				smp_mb(); /* all activity before dead. */
-				this_cpu_write(cpu_dead_idle, true);
+				cpuhp_report_idle_dead();
 				arch_cpu_idle_dead();
 			}
 
@@ -291,5 +287,6 @@
 	boot_init_stack_canary();
 #endif
 	arch_cpu_idle_prepare();
+	cpuhp_online_idle(state);
 	cpu_idle_loop();
 }

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8ec86ab..5624713 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c

@@ -58,7 +58,15 @@
 	raw_spin_lock(&rt_b->rt_runtime_lock);
 	if (!rt_b->rt_period_active) {
 		rt_b->rt_period_active = 1;
-		hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
+		/*
+		 * SCHED_DEADLINE updates the bandwidth, as a run away
+		 * RT task with a DL task could hog a CPU. But DL does
+		 * not reset the period. If a deadline task was running
+		 * without an RT task running, it can cause RT tasks to
+		 * throttle when they start up. Kick the timer right away
+		 * to update the period.
+		 */
+		hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
 		hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
 	}
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
@@ -436,7 +444,7 @@
 
 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 {
-	return !list_empty(&rt_se->run_list);
+	return rt_se->on_rq;
 }
 
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -482,8 +490,8 @@
 	return rt_se->my_q;
 }
 
-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
-static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
+static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
 
 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
@@ -499,7 +507,7 @@
 		if (!rt_se)
 			enqueue_top_rt_rq(rt_rq);
 		else if (!on_rt_rq(rt_se))
-			enqueue_rt_entity(rt_se, false);
+			enqueue_rt_entity(rt_se, 0);
 
 		if (rt_rq->highest_prio.curr < curr->prio)
 			resched_curr(rq);
@@ -516,7 +524,7 @@
 	if (!rt_se)
 		dequeue_top_rt_rq(rt_rq);
 	else if (on_rt_rq(rt_se))
-		dequeue_rt_entity(rt_se);
+		dequeue_rt_entity(rt_se, 0);
 }
 
 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1142,12 +1150,27 @@
 }
 
 static inline
+unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
+{
+	struct rt_rq *group_rq = group_rt_rq(rt_se);
+	struct task_struct *tsk;
+
+	if (group_rq)
+		return group_rq->rr_nr_running;
+
+	tsk = rt_task_of(rt_se);
+
+	return (tsk->policy == SCHED_RR) ? 1 : 0;
+}
+
+static inline
 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
 	int prio = rt_se_prio(rt_se);
 
 	WARN_ON(!rt_prio(prio));
 	rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
+	rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
 
 	inc_rt_prio(rt_rq, prio);
 	inc_rt_migration(rt_se, rt_rq);
@@ -1160,13 +1183,37 @@
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	WARN_ON(!rt_rq->rt_nr_running);
 	rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
+	rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
 
 	dec_rt_prio(rt_rq, rt_se_prio(rt_se));
 	dec_rt_migration(rt_se, rt_rq);
 	dec_rt_group(rt_se, rt_rq);
 }
 
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+/*
+ * Change rt_se->run_list location unless SAVE && !MOVE
+ *
+ * assumes ENQUEUE/DEQUEUE flags match
+ */
+static inline bool move_entity(unsigned int flags)
+{
+	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
+		return false;
+
+	return true;
+}
+
+static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
+{
+	list_del_init(&rt_se->run_list);
+
+	if (list_empty(array->queue + rt_se_prio(rt_se)))
+		__clear_bit(rt_se_prio(rt_se), array->bitmap);
+
+	rt_se->on_list = 0;
+}
+
+static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 	struct rt_prio_array *array = &rt_rq->active;
@@ -1179,26 +1226,37 @@
 	 * get throttled and the current group doesn't have any other
 	 * active members.
 	 */
-	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
+		if (rt_se->on_list)
+			__delist_rt_entity(rt_se, array);
 		return;
+	}
 
-	if (head)
-		list_add(&rt_se->run_list, queue);
-	else
-		list_add_tail(&rt_se->run_list, queue);
-	__set_bit(rt_se_prio(rt_se), array->bitmap);
+	if (move_entity(flags)) {
+		WARN_ON_ONCE(rt_se->on_list);
+		if (flags & ENQUEUE_HEAD)
+			list_add(&rt_se->run_list, queue);
+		else
+			list_add_tail(&rt_se->run_list, queue);
+
+		__set_bit(rt_se_prio(rt_se), array->bitmap);
+		rt_se->on_list = 1;
+	}
+	rt_se->on_rq = 1;
 
 	inc_rt_tasks(rt_se, rt_rq);
 }
 
-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
+static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 	struct rt_prio_array *array = &rt_rq->active;
 
-	list_del_init(&rt_se->run_list);
-	if (list_empty(array->queue + rt_se_prio(rt_se)))
-		__clear_bit(rt_se_prio(rt_se), array->bitmap);
+	if (move_entity(flags)) {
+		WARN_ON_ONCE(!rt_se->on_list);
+		__delist_rt_entity(rt_se, array);
+	}
+	rt_se->on_rq = 0;
 
 	dec_rt_tasks(rt_se, rt_rq);
 }
@@ -1207,7 +1265,7 @@
  * Because the prio of an upper entry depends on the lower
  * entries, we must remove entries top - down.
  */
-static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
+static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct sched_rt_entity *back = NULL;
 
@@ -1220,31 +1278,31 @@
 
 	for (rt_se = back; rt_se; rt_se = rt_se->back) {
 		if (on_rt_rq(rt_se))
-			__dequeue_rt_entity(rt_se);
+			__dequeue_rt_entity(rt_se, flags);
 	}
 }
 
-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct rq *rq = rq_of_rt_se(rt_se);
 
-	dequeue_rt_stack(rt_se);
+	dequeue_rt_stack(rt_se, flags);
 	for_each_sched_rt_entity(rt_se)
-		__enqueue_rt_entity(rt_se, head);
+		__enqueue_rt_entity(rt_se, flags);
 	enqueue_top_rt_rq(&rq->rt);
 }
 
-static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct rq *rq = rq_of_rt_se(rt_se);
 
-	dequeue_rt_stack(rt_se);
+	dequeue_rt_stack(rt_se, flags);
 
 	for_each_sched_rt_entity(rt_se) {
 		struct rt_rq *rt_rq = group_rt_rq(rt_se);
 
 		if (rt_rq && rt_rq->rt_nr_running)
-			__enqueue_rt_entity(rt_se, false);
+			__enqueue_rt_entity(rt_se, flags);
 	}
 	enqueue_top_rt_rq(&rq->rt);
 }
@@ -1260,7 +1318,7 @@
 	if (flags & ENQUEUE_WAKEUP)
 		rt_se->timeout = 0;
 
-	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
+	enqueue_rt_entity(rt_se, flags);
 
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
@@ -1271,7 +1329,7 @@
 	struct sched_rt_entity *rt_se = &p->rt;
 
 	update_curr_rt(rq);
-	dequeue_rt_entity(rt_se);
+	dequeue_rt_entity(rt_se, flags);
 
 	dequeue_pushable_task(rq, p);
 }

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 10f1637..b2ff5a2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -3,6 +3,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
+#include <linux/binfmts.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
@@ -313,12 +314,11 @@
 
 extern void free_fair_sched_group(struct task_group *tg);
 extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
-extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
+extern void unregister_fair_sched_group(struct task_group *tg);
 extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
 			struct sched_entity *se, int cpu,
 			struct sched_entity *parent);
 extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
-extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
 
 extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
 extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
@@ -450,6 +450,7 @@
 struct rt_rq {
 	struct rt_prio_array active;
 	unsigned int rt_nr_running;
+	unsigned int rr_nr_running;
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	struct {
 		int curr; /* highest queued rt task prio */
@@ -909,6 +910,18 @@
 
 extern int group_balance_cpu(struct sched_group *sg);
 
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+void register_sched_domain_sysctl(void);
+void unregister_sched_domain_sysctl(void);
+#else
+static inline void register_sched_domain_sysctl(void)
+{
+}
+static inline void unregister_sched_domain_sysctl(void)
+{
+}
+#endif
+
 #else
 
 static inline void sched_ttwu_pending(void) { }
@@ -1022,6 +1035,7 @@
 #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
 
 extern struct static_key_false sched_numa_balancing;
+extern struct static_key_false sched_schedstats;
 
 static inline u64 global_rt_period(void)
 {
@@ -1130,18 +1144,40 @@
 extern const int sched_prio_to_weight[40];
 extern const u32 sched_prio_to_wmult[40];
 
+/*
+ * {de,en}queue flags:
+ *
+ * DEQUEUE_SLEEP  - task is no longer runnable
+ * ENQUEUE_WAKEUP - task just became runnable
+ *
+ * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
+ *                are in a known state which allows modification. Such pairs
+ *                should preserve as much state as possible.
+ *
+ * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
+ *        in the runqueue.
+ *
+ * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
+ * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
+ * ENQUEUE_WAKING    - sched_class::task_waking was called
+ *
+ */
+
+#define DEQUEUE_SLEEP		0x01
+#define DEQUEUE_SAVE		0x02 /* matches ENQUEUE_RESTORE */
+#define DEQUEUE_MOVE		0x04 /* matches ENQUEUE_MOVE */
+
 #define ENQUEUE_WAKEUP		0x01
-#define ENQUEUE_HEAD		0x02
+#define ENQUEUE_RESTORE		0x02
+#define ENQUEUE_MOVE		0x04
+
+#define ENQUEUE_HEAD		0x08
+#define ENQUEUE_REPLENISH	0x10
 #ifdef CONFIG_SMP
-#define ENQUEUE_WAKING		0x04	/* sched_class::task_waking was called */
+#define ENQUEUE_WAKING		0x20
 #else
 #define ENQUEUE_WAKING		0x00
 #endif
-#define ENQUEUE_REPLENISH	0x08
-#define ENQUEUE_RESTORE	0x10
-
-#define DEQUEUE_SLEEP		0x01
-#define DEQUEUE_SAVE		0x02
 
 #define RETRY_TASK		((void *)-1UL)
 
@@ -1278,6 +1314,35 @@
 
 extern void init_entity_runnable_average(struct sched_entity *se);
 
+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(struct rq *rq);
+
+/*
+ * Tick may be needed by tasks in the runqueue depending on their policy and
+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
+ * nohz mode if necessary.
+ */
+static inline void sched_update_tick_dependency(struct rq *rq)
+{
+	int cpu;
+
+	if (!tick_nohz_full_enabled())
+		return;
+
+	cpu = cpu_of(rq);
+
+	if (!tick_nohz_full_cpu(cpu))
+		return;
+
+	if (sched_can_stop_tick(rq))
+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
+	else
+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
+}
+#else
+static inline void sched_update_tick_dependency(struct rq *rq) { }
+#endif
+
 static inline void add_nr_running(struct rq *rq, unsigned count)
 {
 	unsigned prev_nr = rq->nr_running;
@@ -1289,26 +1354,16 @@
 		if (!rq->rd->overload)
 			rq->rd->overload = true;
 #endif
-
-#ifdef CONFIG_NO_HZ_FULL
-		if (tick_nohz_full_cpu(rq->cpu)) {
-			/*
-			 * Tick is needed if more than one task runs on a CPU.
-			 * Send the target an IPI to kick it out of nohz mode.
-			 *
-			 * We assume that IPI implies full memory barrier and the
-			 * new value of rq->nr_running is visible on reception
-			 * from the target.
-			 */
-			tick_nohz_full_kick_cpu(rq->cpu);
-		}
-#endif
 	}
+
+	sched_update_tick_dependency(rq);
 }
 
 static inline void sub_nr_running(struct rq *rq, unsigned count)
 {
 	rq->nr_running -= count;
+	/* Check if we still need preemption */
+	sched_update_tick_dependency(rq);
 }
 
 static inline void rq_last_tick_reset(struct rq *rq)

diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index b0fbc76..70b3b6a 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h

@@ -29,9 +29,10 @@
 	if (rq)
 		rq->rq_sched_info.run_delay += delta;
 }
-# define schedstat_inc(rq, field)	do { (rq)->field++; } while (0)
-# define schedstat_add(rq, field, amt)	do { (rq)->field += (amt); } while (0)
-# define schedstat_set(var, val)	do { var = (val); } while (0)
+# define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
+# define schedstat_inc(rq, field)	do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
+# define schedstat_add(rq, field, amt)	do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
+# define schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
 rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
@@ -42,6 +43,7 @@
 static inline void
 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 {}
+# define schedstat_enabled()		0
 # define schedstat_inc(rq, field)	do { } while (0)
 # define schedstat_add(rq, field, amt)	do { } while (0)
 # define schedstat_set(var, val)	do { } while (0)

diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
new file mode 100644
index 0000000..82f0dff
--- /dev/null
+++ b/kernel/sched/swait.c

@@ -0,0 +1,123 @@
+#include <linux/sched.h>
+#include <linux/swait.h>
+
+void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
+			     struct lock_class_key *key)
+{
+	raw_spin_lock_init(&q->lock);
+	lockdep_set_class_and_name(&q->lock, key, name);
+	INIT_LIST_HEAD(&q->task_list);
+}
+EXPORT_SYMBOL(__init_swait_queue_head);
+
+/*
+ * The thing about the wake_up_state() return value; I think we can ignore it.
+ *
+ * If for some reason it would return 0, that means the previously waiting
+ * task is already running, so it will observe condition true (or has already).
+ */
+void swake_up_locked(struct swait_queue_head *q)
+{
+	struct swait_queue *curr;
+
+	if (list_empty(&q->task_list))
+		return;
+
+	curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
+	wake_up_process(curr->task);
+	list_del_init(&curr->task_list);
+}
+EXPORT_SYMBOL(swake_up_locked);
+
+void swake_up(struct swait_queue_head *q)
+{
+	unsigned long flags;
+
+	if (!swait_active(q))
+		return;
+
+	raw_spin_lock_irqsave(&q->lock, flags);
+	swake_up_locked(q);
+	raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(swake_up);
+
+/*
+ * Does not allow usage from IRQ disabled, since we must be able to
+ * release IRQs to guarantee bounded hold time.
+ */
+void swake_up_all(struct swait_queue_head *q)
+{
+	struct swait_queue *curr;
+	LIST_HEAD(tmp);
+
+	if (!swait_active(q))
+		return;
+
+	raw_spin_lock_irq(&q->lock);
+	list_splice_init(&q->task_list, &tmp);
+	while (!list_empty(&tmp)) {
+		curr = list_first_entry(&tmp, typeof(*curr), task_list);
+
+		wake_up_state(curr->task, TASK_NORMAL);
+		list_del_init(&curr->task_list);
+
+		if (list_empty(&tmp))
+			break;
+
+		raw_spin_unlock_irq(&q->lock);
+		raw_spin_lock_irq(&q->lock);
+	}
+	raw_spin_unlock_irq(&q->lock);
+}
+EXPORT_SYMBOL(swake_up_all);
+
+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	wait->task = current;
+	if (list_empty(&wait->task_list))
+		list_add(&wait->task_list, &q->task_list);
+}
+
+void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&q->lock, flags);
+	__prepare_to_swait(q, wait);
+	set_current_state(state);
+	raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_swait);
+
+long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
+{
+	if (signal_pending_state(state, current))
+		return -ERESTARTSYS;
+
+	prepare_to_swait(q, wait, state);
+
+	return 0;
+}
+EXPORT_SYMBOL(prepare_to_swait_event);
+
+void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	__set_current_state(TASK_RUNNING);
+	if (!list_empty(&wait->task_list))
+		list_del_init(&wait->task_list);
+}
+
+void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+	unsigned long flags;
+
+	__set_current_state(TASK_RUNNING);
+
+	if (!list_empty_careful(&wait->task_list)) {
+		raw_spin_lock_irqsave(&q->lock, flags);
+		list_del_init(&wait->task_list);
+		raw_spin_unlock_irqrestore(&q->lock, flags);
+	}
+}
+EXPORT_SYMBOL(finish_swait);

diff --git a/kernel/smp.c b/kernel/smp.c
index d903c02..7416544 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c

@@ -105,13 +105,12 @@
  * previous function call. For multi-cpu calls its even more interesting
  * as we'll have to ensure no other cpu is observing our csd.
  */
-static void csd_lock_wait(struct call_single_data *csd)
+static __always_inline void csd_lock_wait(struct call_single_data *csd)
 {
-	while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
-		cpu_relax();
+	smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
 }
 
-static void csd_lock(struct call_single_data *csd)
+static __always_inline void csd_lock(struct call_single_data *csd)
 {
 	csd_lock_wait(csd);
 	csd->flags |= CSD_FLAG_LOCK;
@@ -124,7 +123,7 @@
 	smp_wmb();
 }
 
-static void csd_unlock(struct call_single_data *csd)
+static __always_inline void csd_unlock(struct call_single_data *csd)
 {
 	WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
 
@@ -569,6 +568,7 @@
 	unsigned int cpu;
 
 	idle_threads_init();
+	cpuhp_threads_init();
 
 	/* FIXME: This should be done in userspace --RR */
 	for_each_present_cpu(cpu) {

diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index d264f59..13bc43d 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c

@@ -226,7 +226,7 @@
 		kthread_unpark(tsk);
 }
 
-void smpboot_unpark_threads(unsigned int cpu)
+int smpboot_unpark_threads(unsigned int cpu)
 {
 	struct smp_hotplug_thread *cur;
 
@@ -235,6 +235,7 @@
 		if (cpumask_test_cpu(cpu, cur->cpumask))
 			smpboot_unpark_thread(cur, cpu);
 	mutex_unlock(&smpboot_threads_lock);
+	return 0;
 }
 
 static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
@@ -245,7 +246,7 @@
 		kthread_park(tsk);
 }
 
-void smpboot_park_threads(unsigned int cpu)
+int smpboot_park_threads(unsigned int cpu)
 {
 	struct smp_hotplug_thread *cur;
 
@@ -253,6 +254,7 @@
 	list_for_each_entry_reverse(cur, &hotplug_threads, list)
 		smpboot_park_thread(cur, cpu);
 	mutex_unlock(&smpboot_threads_lock);
+	return 0;
 }
 
 static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)

diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index 72415a0..485b81c 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h

@@ -14,7 +14,9 @@
 #endif
 
 int smpboot_create_threads(unsigned int cpu);
-void smpboot_park_threads(unsigned int cpu);
-void smpboot_unpark_threads(unsigned int cpu);
+int smpboot_park_threads(unsigned int cpu);
+int smpboot_unpark_threads(unsigned int cpu);
+
+void __init cpuhp_threads_init(void);
 
 #endif

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 479e443..8aae49d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c

@@ -116,9 +116,9 @@
 
 	if (preempt_count() == cnt) {
 #ifdef CONFIG_DEBUG_PREEMPT
-		current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
+		current->preempt_disable_ip = get_lock_parent_ip();
 #endif
-		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
+		trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
 	}
 }
 EXPORT_SYMBOL(__local_bh_disable_ip);

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 97715fd..f5102fab 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -350,6 +350,17 @@
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_SCHEDSTATS
+	{
+		.procname	= "sched_schedstats",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_schedstats,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif /* CONFIG_SCHEDSTATS */
 #endif /* CONFIG_SMP */
 #ifdef CONFIG_NUMA_BALANCING
 	{
@@ -505,7 +516,7 @@
 		.data		= &latencytop_enabled,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= sysctl_latencytop,
 	},
 #endif
 #ifdef CONFIG_BLK_DEV_INITRD

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 664de53..56ece14 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c

@@ -323,13 +323,42 @@
 		/* cs is a watchdog. */
 		if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
-		/* Pick the best watchdog. */
-		if (!watchdog || cs->rating > watchdog->rating) {
-			watchdog = cs;
-			/* Reset watchdog cycles */
-			clocksource_reset_watchdog();
-		}
 	}
+	spin_unlock_irqrestore(&watchdog_lock, flags);
+}
+
+static void clocksource_select_watchdog(bool fallback)
+{
+	struct clocksource *cs, *old_wd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&watchdog_lock, flags);
+	/* save current watchdog */
+	old_wd = watchdog;
+	if (fallback)
+		watchdog = NULL;
+
+	list_for_each_entry(cs, &clocksource_list, list) {
+		/* cs is a clocksource to be watched. */
+		if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
+			continue;
+
+		/* Skip current if we were requested for a fallback. */
+		if (fallback && cs == old_wd)
+			continue;
+
+		/* Pick the best watchdog. */
+		if (!watchdog || cs->rating > watchdog->rating)
+			watchdog = cs;
+	}
+	/* If we failed to find a fallback restore the old one. */
+	if (!watchdog)
+		watchdog = old_wd;
+
+	/* If we changed the watchdog we need to reset cycles. */
+	if (watchdog != old_wd)
+		clocksource_reset_watchdog();
+
 	/* Check if the watchdog timer needs to be started. */
 	clocksource_start_watchdog();
 	spin_unlock_irqrestore(&watchdog_lock, flags);
@@ -404,6 +433,7 @@
 		cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 }
 
+static void clocksource_select_watchdog(bool fallback) { }
 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 static inline void clocksource_resume_watchdog(void) { }
 static inline int __clocksource_watchdog_kthread(void) { return 0; }
@@ -736,6 +766,7 @@
 	clocksource_enqueue(cs);
 	clocksource_enqueue_watchdog(cs);
 	clocksource_select();
+	clocksource_select_watchdog(false);
 	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
@@ -758,6 +789,7 @@
 	mutex_lock(&clocksource_mutex);
 	__clocksource_change_rating(cs, rating);
 	clocksource_select();
+	clocksource_select_watchdog(false);
 	mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_change_rating);
@@ -767,12 +799,12 @@
  */
 static int clocksource_unbind(struct clocksource *cs)
 {
-	/*
-	 * I really can't convince myself to support this on hardware
-	 * designed by lobotomized monkeys.
-	 */
-	if (clocksource_is_watchdog(cs))
-		return -EBUSY;
+	if (clocksource_is_watchdog(cs)) {
+		/* Select and try to install a replacement watchdog. */
+		clocksource_select_watchdog(true);
+		if (clocksource_is_watchdog(cs))
+			return -EBUSY;
+	}
 
 	if (cs == curr_clocksource) {
 		/* Select and try to install a replacement clock source */

diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 347fecf..555e21f 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c

@@ -68,7 +68,7 @@
 	.name		= "jiffies",
 	.rating		= 1, /* lowest valid rating*/
 	.read		= jiffies_read,
-	.mask		= 0xffffffff, /*32bits*/
+	.mask		= CLOCKSOURCE_MASK(32),
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
 	.shift		= JIFFIES_SHIFT,
 	.max_cycles	= 10,

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index f5e86d2..1cafba8 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c

@@ -333,7 +333,6 @@
 	return err;
 }
 
-
 /*
  * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
  * This is called from sys_timer_create() and do_cpu_nanosleep() with the
@@ -517,6 +516,10 @@
 				cputime_expires->sched_exp = exp;
 			break;
 		}
+		if (CPUCLOCK_PERTHREAD(timer->it_clock))
+			tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
+		else
+			tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
 	}
 }
 
@@ -582,39 +585,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-static void nohz_kick_work_fn(struct work_struct *work)
-{
-	tick_nohz_full_kick_all();
-}
-
-static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
-
-/*
- * We need the IPIs to be sent from sane process context.
- * The posix cpu timers are always set with irqs disabled.
- */
-static void posix_cpu_timer_kick_nohz(void)
-{
-	if (context_tracking_is_enabled())
-		schedule_work(&nohz_kick_work);
-}
-
-bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
-{
-	if (!task_cputime_zero(&tsk->cputime_expires))
-		return false;
-
-	/* Check if cputimer is running. This is accessed without locking. */
-	if (READ_ONCE(tsk->signal->cputimer.running))
-		return false;
-
-	return true;
-}
-#else
-static inline void posix_cpu_timer_kick_nohz(void) { }
-#endif
-
 /*
  * Guts of sys_timer_settime for CPU timers.
  * This is called with the timer locked and interrupts disabled.
@@ -761,8 +731,7 @@
 		sample_to_timespec(timer->it_clock,
 				   old_incr, &old->it_interval);
 	}
-	if (!ret)
-		posix_cpu_timer_kick_nohz();
+
 	return ret;
 }
 
@@ -911,6 +880,8 @@
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 		}
 	}
+	if (task_cputime_zero(tsk_expires))
+		tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static inline void stop_process_timers(struct signal_struct *sig)
@@ -919,6 +890,7 @@
 
 	/* Turn off cputimer->running. This is done without locking. */
 	WRITE_ONCE(cputimer->running, false);
+	tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static u32 onecputick;
@@ -1095,8 +1067,6 @@
 	arm_timer(timer);
 	unlock_task_sighand(p, &flags);
 
-	/* Kick full dynticks CPUs in case they need to tick on the new timer */
-	posix_cpu_timer_kick_nohz();
 out:
 	timer->it_overrun_last = timer->it_overrun;
 	timer->it_overrun = -1;
@@ -1270,7 +1240,7 @@
 		}
 
 		if (!*newval)
-			goto out;
+			return;
 		*newval += now;
 	}
 
@@ -1288,8 +1258,8 @@
 			tsk->signal->cputime_expires.virt_exp = *newval;
 		break;
 	}
-out:
-	posix_cpu_timer_kick_nohz();
+
+	tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0b17424..969e670 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c

@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
-#include <linux/perf_event.h>
 #include <linux/context_tracking.h>
 
 #include <asm/irq_regs.h>
@@ -158,54 +157,63 @@
 cpumask_var_t tick_nohz_full_mask;
 cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
+static unsigned long tick_dep_mask;
 
-static bool can_stop_full_tick(void)
+static void trace_tick_dependency(unsigned long dep)
+{
+	if (dep & TICK_DEP_MASK_POSIX_TIMER) {
+		trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_PERF_EVENTS) {
+		trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_SCHED) {
+		trace_tick_stop(0, TICK_DEP_MASK_SCHED);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
+		trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
+}
+
+static bool can_stop_full_tick(struct tick_sched *ts)
 {
 	WARN_ON_ONCE(!irqs_disabled());
 
-	if (!sched_can_stop_tick()) {
-		trace_tick_stop(0, "more than 1 task in runqueue\n");
+	if (tick_dep_mask) {
+		trace_tick_dependency(tick_dep_mask);
 		return false;
 	}
 
-	if (!posix_cpu_timers_can_stop_tick(current)) {
-		trace_tick_stop(0, "posix timers running\n");
+	if (ts->tick_dep_mask) {
+		trace_tick_dependency(ts->tick_dep_mask);
 		return false;
 	}
 
-	if (!perf_event_can_stop_tick()) {
-		trace_tick_stop(0, "perf events running\n");
+	if (current->tick_dep_mask) {
+		trace_tick_dependency(current->tick_dep_mask);
 		return false;
 	}
 
-	/* sched_clock_tick() needs us? */
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-	/*
-	 * TODO: kick full dynticks CPUs when
-	 * sched_clock_stable is set.
-	 */
-	if (!sched_clock_stable()) {
-		trace_tick_stop(0, "unstable sched clock\n");
-		/*
-		 * Don't allow the user to think they can get
-		 * full NO_HZ with this machine.
-		 */
-		WARN_ONCE(tick_nohz_full_running,
-			  "NO_HZ FULL will not work with unstable sched clock");
+	if (current->signal->tick_dep_mask) {
+		trace_tick_dependency(current->signal->tick_dep_mask);
 		return false;
 	}
-#endif
 
 	return true;
 }
 
-static void nohz_full_kick_work_func(struct irq_work *work)
+static void nohz_full_kick_func(struct irq_work *work)
 {
 	/* Empty, the tick restart happens on tick_nohz_irq_exit() */
 }
 
 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
-	.func = nohz_full_kick_work_func,
+	.func = nohz_full_kick_func,
 };
 
 /*
@@ -214,7 +222,7 @@
  * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
  * is NMI safe.
  */
-void tick_nohz_full_kick(void)
+static void tick_nohz_full_kick(void)
 {
 	if (!tick_nohz_full_cpu(smp_processor_id()))
 		return;
@@ -234,27 +242,112 @@
 	irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
 }
 
-static void nohz_full_kick_ipi(void *info)
-{
-	/* Empty, the tick restart happens on tick_nohz_irq_exit() */
-}
-
 /*
  * Kick all full dynticks CPUs in order to force these to re-evaluate
  * their dependency on the tick and restart it if necessary.
  */
-void tick_nohz_full_kick_all(void)
+static void tick_nohz_full_kick_all(void)
 {
+	int cpu;
+
 	if (!tick_nohz_full_running)
 		return;
 
 	preempt_disable();
-	smp_call_function_many(tick_nohz_full_mask,
-			       nohz_full_kick_ipi, NULL, false);
-	tick_nohz_full_kick();
+	for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
+		tick_nohz_full_kick_cpu(cpu);
 	preempt_enable();
 }
 
+static void tick_nohz_dep_set_all(unsigned long *dep,
+				  enum tick_dep_bits bit)
+{
+	unsigned long prev;
+
+	prev = fetch_or(dep, BIT_MASK(bit));
+	if (!prev)
+		tick_nohz_full_kick_all();
+}
+
+/*
+ * Set a global tick dependency. Used by perf events that rely on freq and
+ * by unstable clock.
+ */
+void tick_nohz_dep_set(enum tick_dep_bits bit)
+{
+	tick_nohz_dep_set_all(&tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear(enum tick_dep_bits bit)
+{
+	clear_bit(bit, &tick_dep_mask);
+}
+
+/*
+ * Set per-CPU tick dependency. Used by scheduler and perf events in order to
+ * manage events throttling.
+ */
+void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+	unsigned long prev;
+	struct tick_sched *ts;
+
+	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+	prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
+	if (!prev) {
+		preempt_disable();
+		/* Perf needs local kick that is NMI safe */
+		if (cpu == smp_processor_id()) {
+			tick_nohz_full_kick();
+		} else {
+			/* Remote irq work not NMI-safe */
+			if (!WARN_ON_ONCE(in_nmi()))
+				tick_nohz_full_kick_cpu(cpu);
+		}
+		preempt_enable();
+	}
+}
+
+void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+	clear_bit(bit, &ts->tick_dep_mask);
+}
+
+/*
+ * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
+ * per task timers.
+ */
+void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+	/*
+	 * We could optimize this with just kicking the target running the task
+	 * if that noise matters for nohz full users.
+	 */
+	tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+	clear_bit(bit, &tsk->tick_dep_mask);
+}
+
+/*
+ * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
+ * per process timers.
+ */
+void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+	tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+	clear_bit(bit, &sig->tick_dep_mask);
+}
+
 /*
  * Re-evaluate the need for the tick as we switch the current task.
  * It might need the tick due to per task/process properties:
@@ -263,15 +356,19 @@
 void __tick_nohz_task_switch(void)
 {
 	unsigned long flags;
+	struct tick_sched *ts;
 
 	local_irq_save(flags);
 
 	if (!tick_nohz_full_cpu(smp_processor_id()))
 		goto out;
 
-	if (tick_nohz_tick_stopped() && !can_stop_full_tick())
-		tick_nohz_full_kick();
+	ts = this_cpu_ptr(&tick_cpu_sched);
 
+	if (ts->tick_stopped) {
+		if (current->tick_dep_mask || current->signal->tick_dep_mask)
+			tick_nohz_full_kick();
+	}
 out:
 	local_irq_restore(flags);
 }
@@ -689,7 +786,7 @@
 
 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
 		ts->tick_stopped = 1;
-		trace_tick_stop(1, " ");
+		trace_tick_stop(1, TICK_DEP_MASK_NONE);
 	}
 
 	/*
@@ -740,7 +837,7 @@
 	if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
 		return;
 
-	if (can_stop_full_tick())
+	if (can_stop_full_tick(ts))
 		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
 	else if (ts->tick_stopped)
 		tick_nohz_restart_sched_tick(ts, ktime_get(), 1);

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index a4a8d4e..eb4e325 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h

@@ -60,6 +60,7 @@
 	u64				next_timer;
 	ktime_t				idle_expires;
 	int				do_timer_last;
+	unsigned long			tick_dep_mask;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 34b4ced..9c629bb 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c

@@ -233,6 +233,7 @@
 	u64 tmp, ntpinterval;
 	struct clocksource *old_clock;
 
+	++tk->cs_was_changed_seq;
 	old_clock = tk->tkr_mono.clock;
 	tk->tkr_mono.clock = clock;
 	tk->tkr_mono.read = clock->read;
@@ -298,19 +299,36 @@
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
+static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+					  cycle_t delta)
 {
-	cycle_t delta;
 	s64 nsec;
 
-	delta = timekeeping_get_delta(tkr);
-
-	nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift;
+	nsec = delta * tkr->mult + tkr->xtime_nsec;
+	nsec >>= tkr->shift;
 
 	/* If arch requires, add in get_arch_timeoffset() */
 	return nsec + arch_gettimeoffset();
 }
 
+static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
+{
+	cycle_t delta;
+
+	delta = timekeeping_get_delta(tkr);
+	return timekeeping_delta_to_ns(tkr, delta);
+}
+
+static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
+					    cycle_t cycles)
+{
+	cycle_t delta;
+
+	/* calculate the delta since the last update_wall_time */
+	delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+	return timekeeping_delta_to_ns(tkr, delta);
+}
+
 /**
  * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
  * @tkr: Timekeeping readout base from which we take the update
@@ -857,44 +875,262 @@
 	return tk->xtime_sec;
 }
 
-
-#ifdef CONFIG_NTP_PPS
-
 /**
- * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format
- * @ts_raw:	pointer to the timespec to be set to raw monotonic time
- * @ts_real:	pointer to the timespec to be set to the time of day
- *
- * This function reads both the time of day and raw monotonic time at the
- * same time atomically and stores the resulting timestamps in timespec
- * format.
+ * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
+ * @systime_snapshot:	pointer to struct receiving the system time snapshot
  */
-void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real)
+void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
-	s64 nsecs_raw, nsecs_real;
+	ktime_t base_raw;
+	ktime_t base_real;
+	s64 nsec_raw;
+	s64 nsec_real;
+	cycle_t now;
 
 	WARN_ON_ONCE(timekeeping_suspended);
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		*ts_raw = tk->raw_time;
-		ts_real->tv_sec = tk->xtime_sec;
-		ts_real->tv_nsec = 0;
-
-		nsecs_raw  = timekeeping_get_ns(&tk->tkr_raw);
-		nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
-
+		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
+		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
+		base_real = ktime_add(tk->tkr_mono.base,
+				      tk_core.timekeeper.offs_real);
+		base_raw = tk->tkr_raw.base;
+		nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
+		nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	timespec64_add_ns(ts_raw, nsecs_raw);
-	timespec64_add_ns(ts_real, nsecs_real);
+	systime_snapshot->cycles = now;
+	systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
+	systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
 }
-EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
+EXPORT_SYMBOL_GPL(ktime_get_snapshot);
 
-#endif /* CONFIG_NTP_PPS */
+/* Scale base by mult/div checking for overflow */
+static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
+{
+	u64 tmp, rem;
+
+	tmp = div64_u64_rem(*base, div, &rem);
+
+	if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
+	    ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
+		return -EOVERFLOW;
+	tmp *= mult;
+	rem *= mult;
+
+	do_div(rem, div);
+	*base = tmp + rem;
+	return 0;
+}
+
+/**
+ * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
+ * @history:			Snapshot representing start of history
+ * @partial_history_cycles:	Cycle offset into history (fractional part)
+ * @total_history_cycles:	Total history length in cycles
+ * @discontinuity:		True indicates clock was set on history period
+ * @ts:				Cross timestamp that should be adjusted using
+ *	partial/total ratio
+ *
+ * Helper function used by get_device_system_crosststamp() to correct the
+ * crosstimestamp corresponding to the start of the current interval to the
+ * system counter value (timestamp point) provided by the driver. The
+ * total_history_* quantities are the total history starting at the provided
+ * reference point and ending at the start of the current interval. The cycle
+ * count between the driver timestamp point and the start of the current
+ * interval is partial_history_cycles.
+ */
+static int adjust_historical_crosststamp(struct system_time_snapshot *history,
+					 cycle_t partial_history_cycles,
+					 cycle_t total_history_cycles,
+					 bool discontinuity,
+					 struct system_device_crosststamp *ts)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+	u64 corr_raw, corr_real;
+	bool interp_forward;
+	int ret;
+
+	if (total_history_cycles == 0 || partial_history_cycles == 0)
+		return 0;
+
+	/* Interpolate shortest distance from beginning or end of history */
+	interp_forward = partial_history_cycles > total_history_cycles/2 ?
+		true : false;
+	partial_history_cycles = interp_forward ?
+		total_history_cycles - partial_history_cycles :
+		partial_history_cycles;
+
+	/*
+	 * Scale the monotonic raw time delta by:
+	 *	partial_history_cycles / total_history_cycles
+	 */
+	corr_raw = (u64)ktime_to_ns(
+		ktime_sub(ts->sys_monoraw, history->raw));
+	ret = scale64_check_overflow(partial_history_cycles,
+				     total_history_cycles, &corr_raw);
+	if (ret)
+		return ret;
+
+	/*
+	 * If there is a discontinuity in the history, scale monotonic raw
+	 *	correction by:
+	 *	mult(real)/mult(raw) yielding the realtime correction
+	 * Otherwise, calculate the realtime correction similar to monotonic
+	 *	raw calculation
+	 */
+	if (discontinuity) {
+		corr_real = mul_u64_u32_div
+			(corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
+	} else {
+		corr_real = (u64)ktime_to_ns(
+			ktime_sub(ts->sys_realtime, history->real));
+		ret = scale64_check_overflow(partial_history_cycles,
+					     total_history_cycles, &corr_real);
+		if (ret)
+			return ret;
+	}
+
+	/* Fixup monotonic raw and real time time values */
+	if (interp_forward) {
+		ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
+		ts->sys_realtime = ktime_add_ns(history->real, corr_real);
+	} else {
+		ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
+		ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
+	}
+
+	return 0;
+}
+
+/*
+ * cycle_between - true if test occurs chronologically between before and after
+ */
+static bool cycle_between(cycle_t before, cycle_t test, cycle_t after)
+{
+	if (test > before && test < after)
+		return true;
+	if (test < before && before > after)
+		return true;
+	return false;
+}
+
+/**
+ * get_device_system_crosststamp - Synchronously capture system/device timestamp
+ * @get_time_fn:	Callback to get simultaneous device time and
+ *	system counter from the device driver
+ * @ctx:		Context passed to get_time_fn()
+ * @history_begin:	Historical reference point used to interpolate system
+ *	time when counter provided by the driver is before the current interval
+ * @xtstamp:		Receives simultaneously captured system and device time
+ *
+ * Reads a timestamp from a device and correlates it to system time
+ */
+int get_device_system_crosststamp(int (*get_time_fn)
+				  (ktime_t *device_time,
+				   struct system_counterval_t *sys_counterval,
+				   void *ctx),
+				  void *ctx,
+				  struct system_time_snapshot *history_begin,
+				  struct system_device_crosststamp *xtstamp)
+{
+	struct system_counterval_t system_counterval;
+	struct timekeeper *tk = &tk_core.timekeeper;
+	cycle_t cycles, now, interval_start;
+	unsigned int clock_was_set_seq = 0;
+	ktime_t base_real, base_raw;
+	s64 nsec_real, nsec_raw;
+	u8 cs_was_changed_seq;
+	unsigned long seq;
+	bool do_interp;
+	int ret;
+
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		/*
+		 * Try to synchronously capture device time and a system
+		 * counter value calling back into the device driver
+		 */
+		ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
+		if (ret)
+			return ret;
+
+		/*
+		 * Verify that the clocksource associated with the captured
+		 * system counter value is the same as the currently installed
+		 * timekeeper clocksource
+		 */
+		if (tk->tkr_mono.clock != system_counterval.cs)
+			return -ENODEV;
+		cycles = system_counterval.cycles;
+
+		/*
+		 * Check whether the system counter value provided by the
+		 * device driver is on the current timekeeping interval.
+		 */
+		now = tk->tkr_mono.read(tk->tkr_mono.clock);
+		interval_start = tk->tkr_mono.cycle_last;
+		if (!cycle_between(interval_start, cycles, now)) {
+			clock_was_set_seq = tk->clock_was_set_seq;
+			cs_was_changed_seq = tk->cs_was_changed_seq;
+			cycles = interval_start;
+			do_interp = true;
+		} else {
+			do_interp = false;
+		}
+
+		base_real = ktime_add(tk->tkr_mono.base,
+				      tk_core.timekeeper.offs_real);
+		base_raw = tk->tkr_raw.base;
+
+		nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
+						     system_counterval.cycles);
+		nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
+						    system_counterval.cycles);
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
+	xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
+
+	/*
+	 * Interpolate if necessary, adjusting back from the start of the
+	 * current interval
+	 */
+	if (do_interp) {
+		cycle_t partial_history_cycles, total_history_cycles;
+		bool discontinuity;
+
+		/*
+		 * Check that the counter value occurs after the provided
+		 * history reference and that the history doesn't cross a
+		 * clocksource change
+		 */
+		if (!history_begin ||
+		    !cycle_between(history_begin->cycles,
+				   system_counterval.cycles, cycles) ||
+		    history_begin->cs_was_changed_seq != cs_was_changed_seq)
+			return -EINVAL;
+		partial_history_cycles = cycles - system_counterval.cycles;
+		total_history_cycles = cycles - history_begin->cycles;
+		discontinuity =
+			history_begin->clock_was_set_seq != clock_was_set_seq;
+
+		ret = adjust_historical_crosststamp(history_begin,
+						    partial_history_cycles,
+						    total_history_cycles,
+						    discontinuity, xtstamp);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
 
 /**
  * do_gettimeofday - Returns the time of day in a timeval

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f333e57..05ddc08 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c

@@ -97,16 +97,16 @@
 	struct ftrace_event_field *field;
 	struct list_head *head;
 
+	head = trace_get_fields(call);
+	field = __find_event_field(head, name);
+	if (field)
+		return field;
+
 	field = __find_event_field(&ftrace_generic_fields, name);
 	if (field)
 		return field;
 
-	field = __find_event_field(&ftrace_common_fields, name);
-	if (field)
-		return field;
-
-	head = trace_get_fields(call);
-	return __find_event_field(head, name);
+	return __find_event_field(&ftrace_common_fields, name);
 }
 
 static int __trace_define_field(struct list_head *head, const char *type,
@@ -171,8 +171,10 @@
 {
 	int ret;
 
-	__generic_field(int, cpu, FILTER_OTHER);
-	__generic_field(char *, comm, FILTER_PTR_STRING);
+	__generic_field(int, CPU, FILTER_CPU);
+	__generic_field(int, cpu, FILTER_CPU);
+	__generic_field(char *, COMM, FILTER_COMM);
+	__generic_field(char *, comm, FILTER_COMM);
 
 	return ret;
 }
@@ -869,7 +871,8 @@
 		 * The ftrace subsystem is for showing formats only.
 		 * They can not be enabled or disabled via the event files.
 		 */
-		if (call->class && call->class->reg)
+		if (call->class && call->class->reg &&
+		    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
 			return file;
 	}
 

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f93a219..6816302 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c

@@ -1043,13 +1043,14 @@
 		return -EINVAL;
 	}
 
-	if (is_string_field(field)) {
+	if (field->filter_type == FILTER_COMM) {
+		filter_build_regex(pred);
+		fn = filter_pred_comm;
+		pred->regex.field_len = TASK_COMM_LEN;
+	} else if (is_string_field(field)) {
 		filter_build_regex(pred);
 
-		if (!strcmp(field->name, "comm")) {
-			fn = filter_pred_comm;
-			pred->regex.field_len = TASK_COMM_LEN;
-		} else if (field->filter_type == FILTER_STATIC_STRING) {
+		if (field->filter_type == FILTER_STATIC_STRING) {
 			fn = filter_pred_string;
 			pred->regex.field_len = field->size;
 		} else if (field->filter_type == FILTER_DYN_STRING)
@@ -1072,7 +1073,7 @@
 		}
 		pred->val = val;
 
-		if (!strcmp(field->name, "cpu"))
+		if (field->filter_type == FILTER_CPU)
 			fn = filter_pred_cpu;
 		else
 			fn = select_comparison_fn(pred->op, field->size,

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c995644..21b81a4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c

@@ -30,7 +30,7 @@
 struct trace_kprobe {
 	struct list_head	list;
 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
-	unsigned long 		nhit;
+	unsigned long __percpu *nhit;
 	const char		*symbol;	/* symbol name */
 	struct trace_probe	tp;
 };
@@ -274,6 +274,10 @@
 	if (!tk)
 		return ERR_PTR(ret);
 
+	tk->nhit = alloc_percpu(unsigned long);
+	if (!tk->nhit)
+		goto error;
+
 	if (symbol) {
 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
 		if (!tk->symbol)
@@ -313,6 +317,7 @@
 error:
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 	return ERR_PTR(ret);
 }
@@ -327,6 +332,7 @@
 	kfree(tk->tp.call.class->system);
 	kfree(tk->tp.call.name);
 	kfree(tk->symbol);
+	free_percpu(tk->nhit);
 	kfree(tk);
 }
 
@@ -874,9 +880,14 @@
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
 	struct trace_kprobe *tk = v;
+	unsigned long nhit = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		nhit += *per_cpu_ptr(tk->nhit, cpu);
 
 	seq_printf(m, "  %-44s %15lu %15lu\n",
-		   trace_event_name(&tk->tp.call), tk->nhit,
+		   trace_event_name(&tk->tp.call), nhit,
 		   tk->rp.kp.nmissed);
 
 	return 0;
@@ -1225,7 +1236,7 @@
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kprobe_trace_func(tk, regs);
@@ -1242,7 +1253,7 @@
 {
 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
-	tk->nhit++;
+	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kretprobe_trace_func(tk, ri, regs);

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 202df6c..2a1abba 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c

@@ -156,7 +156,11 @@
 		for (; p < top && i < stack_trace_max.nr_entries; p++) {
 			if (stack_dump_trace[i] == ULONG_MAX)
 				break;
-			if (*p == stack_dump_trace[i]) {
+			/*
+			 * The READ_ONCE_NOCHECK is used to let KASAN know that
+			 * this is not a stack-out-of-bounds error.
+			 */
+			if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
 				stack_dump_trace[x] = stack_dump_trace[i++];
 				this_size = stack_trace_index[x++] =
 					(top - p) * sizeof(unsigned long);

diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 0655afb..d166308 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c

@@ -186,11 +186,11 @@
 
 extern char *__bad_type_size(void);
 
-#define SYSCALL_FIELD(type, name)					\
-	sizeof(type) != sizeof(trace.name) ?				\
+#define SYSCALL_FIELD(type, field, name)				\
+	sizeof(type) != sizeof(trace.field) ?				\
 		__bad_type_size() :					\
-		#type, #name, offsetof(typeof(trace), name),		\
-		sizeof(trace.name), is_signed_type(type)
+		#type, #name, offsetof(typeof(trace), field),		\
+		sizeof(trace.field), is_signed_type(type)
 
 static int __init
 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
@@ -261,7 +261,8 @@
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+				 FILTER_OTHER);
 	if (ret)
 		return ret;
 
@@ -281,11 +282,12 @@
 	struct syscall_trace_exit trace;
 	int ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+				 FILTER_OTHER);
 	if (ret)
 		return ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
+	ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
 				 FILTER_OTHER);
 
 	return ret;

diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 975cb49..f8e26ab 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c

@@ -93,9 +93,11 @@
 {
 	struct mm_struct *mm;
 
-	/* convert pages-usec to Mbyte-usec */
-	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
-	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
+	/* convert pages-nsec/1024 to Mbyte-usec, see __acct_update_integrals */
+	stats->coremem = p->acct_rss_mem1 * PAGE_SIZE;
+	do_div(stats->coremem, 1000 * KB);
+	stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE;
+	do_div(stats->virtmem, 1000 * KB);
 	mm = get_task_mm(p);
 	if (mm) {
 		/* adjust to KB unit */
@@ -123,27 +125,28 @@
 static void __acct_update_integrals(struct task_struct *tsk,
 				    cputime_t utime, cputime_t stime)
 {
-	if (likely(tsk->mm)) {
-		cputime_t time, dtime;
-		struct timeval value;
-		unsigned long flags;
-		u64 delta;
+	cputime_t time, dtime;
+	u64 delta;
 
-		local_irq_save(flags);
-		time = stime + utime;
-		dtime = time - tsk->acct_timexpd;
-		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
-		delta = value.tv_sec;
-		delta = delta * USEC_PER_SEC + value.tv_usec;
+	if (!likely(tsk->mm))
+		return;
 
-		if (delta == 0)
-			goto out;
-		tsk->acct_timexpd = time;
-		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
-		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
-	out:
-		local_irq_restore(flags);
-	}
+	time = stime + utime;
+	dtime = time - tsk->acct_timexpd;
+	/* Avoid division: cputime_t is often in nanoseconds already. */
+	delta = cputime_to_nsecs(dtime);
+
+	if (delta < TICK_NSEC)
+		return;
+
+	tsk->acct_timexpd = time;
+	/*
+	 * Divide by 1024 to avoid overflow, and to avoid division.
+	 * The final unit reported to userspace is Mbyte-usecs,
+	 * the rest of the math is done in xacct_add_tsk.
+	 */
+	tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10;
+	tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10;
 }
 
 /**
@@ -153,9 +156,12 @@
 void acct_update_integrals(struct task_struct *tsk)
 {
 	cputime_t utime, stime;
+	unsigned long flags;
 
+	local_irq_save(flags);
 	task_cputime(tsk, &utime, &stime);
 	__acct_update_integrals(tsk, utime, stime);
+	local_irq_restore(flags);
 }
 
 /**

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8bfd1ac..f28f7fa 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -1442,6 +1442,19 @@
 
 	  Say N if you are unsure.
 
+config CPU_HOTPLUG_STATE_CONTROL
+	bool "Enable CPU hotplug state control"
+	depends on DEBUG_KERNEL
+	depends on HOTPLUG_CPU
+	default n
+	help
+	  Allows to write steps between "offline" and "online" to the CPUs
+	  sysfs target file so states can be stepped granular. This is a debug
+	  option for now as the hotplug machinery cannot be stopped and
+	  restarted at arbitrary points yet.
+
+	  Say N if your are unsure.
+
 config NOTIFIER_ERROR_INJECTION
 	tristate "Notifier error injection"
 	depends on DEBUG_KERNEL

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index d62de8b..1234818 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c

@@ -17,7 +17,7 @@
 #include <linux/atomic.h>
 
 #ifdef CONFIG_X86
-#include <asm/processor.h>	/* for boot_cpu_has below */
+#include <asm/cpufeature.h>	/* for boot_cpu_has below */
 #endif
 
 #define TEST(bit, op, c_op, val)				\

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 5a70f61..81dedaa 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c

@@ -41,6 +41,7 @@
 			break;
 	return i;
 }
+EXPORT_SYMBOL(cpumask_any_but);
 
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK

diff --git a/lib/list_debug.c b/lib/list_debug.c
index 3345a08..3859bf6 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c

@@ -12,13 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/rculist.h>
 
-static struct list_head force_poison;
-void list_force_poison(struct list_head *entry)
-{
-	entry->next = &force_poison;
-	entry->prev = &force_poison;
-}
-
 /*
  * Insert a new entry between two known consecutive entries.
  *
@@ -30,8 +23,6 @@
 			      struct list_head *prev,
 			      struct list_head *next)
 {
-	WARN(new->next == &force_poison || new->prev == &force_poison,
-		"list_add attempted on force-poisoned entry\n");
 	WARN(next->prev != prev,
 		"list_add corruption. next->prev should be "
 		"prev (%p), but was %p. (next=%p).\n",

diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c
index c61b299..915d75d 100644
--- a/lib/test_static_keys.c
+++ b/lib/test_static_keys.c

@@ -46,8 +46,11 @@
 	bool			(*test_key)(void);
 };
 
-#define test_key_func(key, branch) \
-	({bool func(void) { return branch(key); } func;	})
+#define test_key_func(key, branch)	\
+static bool key ## _ ## branch(void)	\
+{					\
+	return branch(&key);		\
+}
 
 static void invert_key(struct static_key *key)
 {
@@ -92,6 +95,25 @@
 	return 0;
 }
 
+test_key_func(old_true_key, static_key_true)
+test_key_func(old_false_key, static_key_false)
+test_key_func(true_key, static_branch_likely)
+test_key_func(true_key, static_branch_unlikely)
+test_key_func(false_key, static_branch_likely)
+test_key_func(false_key, static_branch_unlikely)
+test_key_func(base_old_true_key, static_key_true)
+test_key_func(base_inv_old_true_key, static_key_true)
+test_key_func(base_old_false_key, static_key_false)
+test_key_func(base_inv_old_false_key, static_key_false)
+test_key_func(base_true_key, static_branch_likely)
+test_key_func(base_true_key, static_branch_unlikely)
+test_key_func(base_inv_true_key, static_branch_likely)
+test_key_func(base_inv_true_key, static_branch_unlikely)
+test_key_func(base_false_key, static_branch_likely)
+test_key_func(base_false_key, static_branch_unlikely)
+test_key_func(base_inv_false_key, static_branch_likely)
+test_key_func(base_inv_false_key, static_branch_unlikely)
+
 static int __init test_static_key_init(void)
 {
 	int ret;
@@ -102,95 +124,95 @@
 		{
 			.init_state	= true,
 			.key		= &old_true_key,
-			.test_key	= test_key_func(&old_true_key, static_key_true),
+			.test_key	= &old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &old_false_key,
-			.test_key	= test_key_func(&old_false_key, static_key_false),
+			.test_key	= &old_false_key_static_key_false,
 		},
 		/* internal keys - new keys */
 		{
 			.init_state	= true,
 			.key		= &true_key.key,
-			.test_key	= test_key_func(&true_key, static_branch_likely),
+			.test_key	= &true_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &true_key.key,
-			.test_key	= test_key_func(&true_key, static_branch_unlikely),
+			.test_key	= &true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &false_key.key,
-			.test_key	= test_key_func(&false_key, static_branch_likely),
+			.test_key	= &false_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &false_key.key,
-			.test_key	= test_key_func(&false_key, static_branch_unlikely),
+			.test_key	= &false_key_static_branch_unlikely,
 		},
 		/* external keys - old keys */
 		{
 			.init_state	= true,
 			.key		= &base_old_true_key,
-			.test_key	= test_key_func(&base_old_true_key, static_key_true),
+			.test_key	= &base_old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_old_true_key,
-			.test_key	= test_key_func(&base_inv_old_true_key, static_key_true),
+			.test_key	= &base_inv_old_true_key_static_key_true,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_old_false_key,
-			.test_key	= test_key_func(&base_old_false_key, static_key_false),
+			.test_key	= &base_old_false_key_static_key_false,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_old_false_key,
-			.test_key	= test_key_func(&base_inv_old_false_key, static_key_false),
+			.test_key	= &base_inv_old_false_key_static_key_false,
 		},
 		/* external keys - new keys */
 		{
 			.init_state	= true,
 			.key		= &base_true_key.key,
-			.test_key	= test_key_func(&base_true_key, static_branch_likely),
+			.test_key	= &base_true_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_true_key.key,
-			.test_key	= test_key_func(&base_true_key, static_branch_unlikely),
+			.test_key	= &base_true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_true_key.key,
-			.test_key	= test_key_func(&base_inv_true_key, static_branch_likely),
+			.test_key	= &base_inv_true_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_inv_true_key.key,
-			.test_key	= test_key_func(&base_inv_true_key, static_branch_unlikely),
+			.test_key	= &base_inv_true_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_false_key.key,
-			.test_key	= test_key_func(&base_false_key, static_branch_likely),
+			.test_key	= &base_false_key_static_branch_likely,
 		},
 		{
 			.init_state	= false,
 			.key		= &base_false_key.key,
-			.test_key	= test_key_func(&base_false_key, static_branch_unlikely),
+			.test_key	= &base_false_key_static_branch_unlikely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_false_key.key,
-			.test_key	= test_key_func(&base_inv_false_key, static_branch_likely),
+			.test_key	= &base_inv_false_key_static_branch_likely,
 		},
 		{
 			.init_state	= true,
 			.key		= &base_inv_false_key.key,
-			.test_key	= test_key_func(&base_inv_false_key, static_branch_unlikely),
+			.test_key	= &base_inv_false_key_static_branch_unlikely,
 		},
 	};
 

diff --git a/mm/filemap.c b/mm/filemap.c
index 23edcce..da7a35d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -195,6 +195,30 @@
 	else
 		cleancache_invalidate_page(mapping, page);
 
+	VM_BUG_ON_PAGE(page_mapped(page), page);
+	if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
+		int mapcount;
+
+		pr_alert("BUG: Bad page cache in process %s  pfn:%05lx\n",
+			 current->comm, page_to_pfn(page));
+		dump_page(page, "still mapped when deleted");
+		dump_stack();
+		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+
+		mapcount = page_mapcount(page);
+		if (mapping_exiting(mapping) &&
+		    page_count(page) >= mapcount + 2) {
+			/*
+			 * All vmas have already been torn down, so it's
+			 * a good bet that actually the page is unmapped,
+			 * and we'd prefer not to leak it: if we're wrong,
+			 * some other bad page check should catch it later.
+			 */
+			page_mapcount_reset(page);
+			atomic_sub(mapcount, &page->_count);
+		}
+	}
+
 	page_cache_tree_delete(mapping, page, shadow);
 
 	page->mapping = NULL;
@@ -205,7 +229,6 @@
 		__dec_zone_page_state(page, NR_FILE_PAGES);
 	if (PageSwapBacked(page))
 		__dec_zone_page_state(page, NR_SHMEM);
-	VM_BUG_ON_PAGE(page_mapped(page), page);
 
 	/*
 	 * At this point page must be either written or cleaned by truncate.
@@ -446,7 +469,8 @@
 {
 	int err = 0;
 
-	if (mapping->nrpages) {
+	if ((!dax_mapping(mapping) && mapping->nrpages) ||
+	    (dax_mapping(mapping) && mapping->nrexceptional)) {
 		err = filemap_fdatawrite(mapping);
 		/*
 		 * Even if the above returned error, the pages may be
@@ -482,13 +506,8 @@
 {
 	int err = 0;
 
-	if (dax_mapping(mapping) && mapping->nrexceptional) {
-		err = dax_writeback_mapping_range(mapping, lstart, lend);
-		if (err)
-			return err;
-	}
-
-	if (mapping->nrpages) {
+	if ((!dax_mapping(mapping) && mapping->nrpages) ||
+	    (dax_mapping(mapping) && mapping->nrexceptional)) {
 		err = __filemap_fdatawrite_range(mapping, lstart, lend,
 						 WB_SYNC_ALL);
 		/* See comment of filemap_write_and_wait() */

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1c317b8..e10a4fe 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c

@@ -2836,6 +2836,7 @@
 	pgtable_t pgtable;
 	pmd_t _pmd;
 	bool young, write, dirty;
+	unsigned long addr;
 	int i;
 
 	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
@@ -2865,7 +2866,7 @@
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 	pmd_populate(mm, &_pmd, pgtable);
 
-	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+	for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
 		pte_t entry, *pte;
 		/*
 		 * Note that NUMA hinting access restrictions are not
@@ -2886,9 +2887,9 @@
 		}
 		if (dirty)
 			SetPageDirty(page + i);
-		pte = pte_offset_map(&_pmd, haddr);
+		pte = pte_offset_map(&_pmd, addr);
 		BUG_ON(!pte_none(*pte));
-		set_pte_at(mm, haddr, pte, entry);
+		set_pte_at(mm, addr, pte, entry);
 		atomic_inc(&page[i]._mapcount);
 		pte_unmap(pte);
 	}
@@ -2938,7 +2939,7 @@
 	pmd_populate(mm, pmd, pgtable);
 
 	if (freeze) {
-		for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+		for (i = 0; i < HPAGE_PMD_NR; i++) {
 			page_remove_rmap(page + i, false);
 			put_page(page + i);
 		}

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 01f2b48..aefba5a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c

@@ -2751,7 +2751,7 @@
 	int ret;
 
 	if (!hugepages_supported())
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	table->data = &tmp;
 	table->maxlen = sizeof(unsigned long);
@@ -2792,7 +2792,7 @@
 	int ret;
 
 	if (!hugepages_supported())
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	tmp = h->nr_overcommit_huge_pages;
 
@@ -3502,7 +3502,7 @@
 	 * COW. Warn that such a situation has occurred as it may not be obvious
 	 */
 	if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
-		pr_warning("PID %d killed due to inadequate hugepage pool\n",
+		pr_warn_ratelimited("PID %d killed due to inadequate hugepage pool\n",
 			   current->pid);
 		return ret;
 	}

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index bc0a8d8..1ad20ad 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c

@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
+#include <linux/linkage.h>
 #include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/mm.h>
@@ -60,6 +61,25 @@
 	}
 }
 
+static void __kasan_unpoison_stack(struct task_struct *task, void *sp)
+{
+	void *base = task_stack_page(task);
+	size_t size = sp - base;
+
+	kasan_unpoison_shadow(base, size);
+}
+
+/* Unpoison the entire stack for a task. */
+void kasan_unpoison_task_stack(struct task_struct *task)
+{
+	__kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE);
+}
+
+/* Unpoison the stack for the current task beyond a watermark sp value. */
+asmlinkage void kasan_unpoison_remaining_stack(void *sp)
+{
+	__kasan_unpoison_stack(current, sp);
+}
 
 /*
  * All functions below always inlined so compiler could

diff --git a/mm/memory.c b/mm/memory.c
index 635451a..906d8e3 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -1551,8 +1551,29 @@
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn)
 {
+	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
+/**
+ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vm_insert_pfn, except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * This only makes sense for IO mappings, and it makes no sense for
+ * cow mappings.  In general, using multiple vmas is preferable;
+ * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * impractical.
+ */
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot)
+{
 	int ret;
-	pgprot_t pgprot = vma->vm_page_prot;
 	/*
 	 * Technically, architectures with pte_special can avoid all these
 	 * restrictions (same for remap_pfn_range).  However we would like
@@ -1574,7 +1595,7 @@
 
 	return ret;
 }
-EXPORT_SYMBOL(vm_insert_pfn);
+EXPORT_SYMBOL(vm_insert_pfn_prot);
 
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn)
@@ -3404,8 +3425,18 @@
 	if (unlikely(pmd_none(*pmd)) &&
 	    unlikely(__pte_alloc(mm, vma, pmd, address)))
 		return VM_FAULT_OOM;
-	/* if an huge pmd materialized from under us just retry later */
-	if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
+	/*
+	 * If a huge pmd materialized under us just retry later.  Use
+	 * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+	 * didn't become pmd_trans_huge under us and then back to pmd_none, as
+	 * a result of MADV_DONTNEED running immediately after a huge pmd fault
+	 * in a different thread of this mm, in turn leading to a misleading
+	 * pmd_trans_huge() retval.  All we have to ensure is that it is a
+	 * regular pmd that we can walk with pte_offset_map() and we can do that
+	 * through an atomic read in C, which is what pmd_trans_unstable()
+	 * provides.
+	 */
+	if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
 		return 0;
 	/*
 	 * A regular pmd is established and it can't morph into a huge pmd

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4af58a3..979b18c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c

@@ -138,7 +138,7 @@
 	res->name = "System RAM";
 	res->start = start;
 	res->end = start + size - 1;
-	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 	if (request_resource(&iomem_resource, res) < 0) {
 		pr_debug("System RAM resource %pR cannot be added\n", res);
 		kfree(res);

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4c4187c..9a3f6b9 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c

@@ -532,7 +532,7 @@
 		nid = page_to_nid(page);
 		if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
 			continue;
-		if (PageTail(page) && PageAnon(page)) {
+		if (PageTransCompound(page) && PageAnon(page)) {
 			get_page(page);
 			pte_unmap_unlock(pte, ptl);
 			lock_page(page);

diff --git a/mm/mempool.c b/mm/mempool.c
index 004d42b..7924f4f 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c

@@ -135,8 +135,8 @@
 	void *element = pool->elements[--pool->curr_nr];
 
 	BUG_ON(pool->curr_nr < 0);
-	check_element(pool, element);
 	kasan_unpoison_element(pool, element);
+	check_element(pool, element);
 	return element;
 }
 

diff --git a/mm/migrate.c b/mm/migrate.c
index b1034f9..3ad0fea 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c

@@ -1582,7 +1582,7 @@
 					 (GFP_HIGHUSER_MOVABLE |
 					  __GFP_THISNODE | __GFP_NOMEMALLOC |
 					  __GFP_NORETRY | __GFP_NOWARN) &
-					 ~(__GFP_IO | __GFP_FS), 0);
+					 ~__GFP_RECLAIM, 0);
 
 	return newpage;
 }

diff --git a/mm/mmap.c b/mm/mmap.c
index 76d1ec2..90e3b86 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c

@@ -3066,11 +3066,16 @@
 	pgoff_t pgoff;
 	struct page **pages;
 
-	if (vma->vm_ops == &legacy_special_mapping_vmops)
+	if (vma->vm_ops == &legacy_special_mapping_vmops) {
 		pages = vma->vm_private_data;
-	else
-		pages = ((struct vm_special_mapping *)vma->vm_private_data)->
-			pages;
+	} else {
+		struct vm_special_mapping *sm = vma->vm_private_data;
+
+		if (sm->fault)
+			return sm->fault(sm, vma, vmf);
+
+		pages = sm->pages;
+	}
 
 	for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
 		pgoff--;

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index d5871ac..f066781 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c

@@ -1625,7 +1625,7 @@
 
 		rt = atrtr_find(&at_hint);
 	}
-	err = ENETUNREACH;
+	err = -ENETUNREACH;
 	if (!rt)
 		goto out;
 

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index e6c8382..ccf70be 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c

@@ -527,11 +527,12 @@
 		 * gets dereferenced.
 		 */
 		spin_lock_bh(&bat_priv->gw.list_lock);
-		hlist_del_init_rcu(&gw_node->list);
+		if (!hlist_unhashed(&gw_node->list)) {
+			hlist_del_init_rcu(&gw_node->list);
+			batadv_gw_node_free_ref(gw_node);
+		}
 		spin_unlock_bh(&bat_priv->gw.list_lock);
 
-		batadv_gw_node_free_ref(gw_node);
-
 		curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 		if (gw_node == curr_gw)
 			batadv_gw_reselect(bat_priv);

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 01acccc..57f71071 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c

@@ -76,6 +76,28 @@
 }
 
 /**
+ * batadv_mutual_parents - check if two devices are each others parent
+ * @dev1: 1st net_device
+ * @dev2: 2nd net_device
+ *
+ * veth devices come in pairs and each is the parent of the other!
+ *
+ * Return: true if the devices are each others parent, otherwise false
+ */
+static bool batadv_mutual_parents(const struct net_device *dev1,
+				  const struct net_device *dev2)
+{
+	int dev1_parent_iflink = dev_get_iflink(dev1);
+	int dev2_parent_iflink = dev_get_iflink(dev2);
+
+	if (!dev1_parent_iflink || !dev2_parent_iflink)
+		return false;
+
+	return (dev1_parent_iflink == dev2->ifindex) &&
+	       (dev2_parent_iflink == dev1->ifindex);
+}
+
+/**
  * batadv_is_on_batman_iface - check if a device is a batman iface descendant
  * @net_dev: the device to check
  *
@@ -108,6 +130,9 @@
 	if (WARN(!parent_dev, "Cannot find parent device"))
 		return false;
 
+	if (batadv_mutual_parents(net_dev, parent_dev))
+		return false;
+
 	ret = batadv_is_on_batman_iface(parent_dev);
 
 	return ret;

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index cdfc85f..0e80fd1 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c

@@ -303,9 +303,11 @@
 
 	if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
 		spin_lock_bh(&orig_node->vlan_list_lock);
-		hlist_del_init_rcu(&vlan->list);
+		if (!hlist_unhashed(&vlan->list)) {
+			hlist_del_init_rcu(&vlan->list);
+			batadv_orig_node_vlan_free_ref(vlan);
+		}
 		spin_unlock_bh(&orig_node->vlan_list_lock);
-		batadv_orig_node_vlan_free_ref(vlan);
 	}
 
 	batadv_orig_node_vlan_free_ref(vlan);

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 47bcef754..883c821 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c

@@ -4112,8 +4112,10 @@
 			break;
 		}
 
-		*req_complete = bt_cb(skb)->hci.req_complete;
-		*req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+		if (bt_cb(skb)->hci.req_flags & HCI_REQ_SKB)
+			*req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
+		else
+			*req_complete = bt_cb(skb)->hci.req_complete;
 		kfree_skb(skb);
 	}
 	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 82e3e97..dcea4f4 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c

@@ -723,6 +723,8 @@
 		struct net_bridge_fdb_entry *f;
 
 		hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
+			int err;
+
 			if (idx < cb->args[0])
 				goto skip;
 
@@ -741,12 +743,15 @@
 			if (!filter_dev && f->dst)
 				goto skip;
 
-			if (fdb_fill_info(skb, br, f,
-					  NETLINK_CB(cb->skb).portid,
-					  cb->nlh->nlmsg_seq,
-					  RTM_NEWNEIGH,
-					  NLM_F_MULTI) < 0)
+			err = fdb_fill_info(skb, br, f,
+					    NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq,
+					    RTM_NEWNEIGH,
+					    NLM_F_MULTI);
+			if (err < 0) {
+				cb->args[1] = err;
 				break;
+			}
 skip:
 			++idx;
 		}

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 30e105f..74c278e 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c

@@ -425,8 +425,8 @@
 	mp = br_mdb_ip_get(mdb, group);
 	if (!mp) {
 		mp = br_multicast_new_group(br, port, group);
-		err = PTR_ERR(mp);
-		if (IS_ERR(mp))
+		err = PTR_ERR_OR_ZERO(mp);
+		if (err)
 			return err;
 	}
 

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 61d7617..b82440e 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c

@@ -159,7 +159,7 @@
 		tmppkt = NULL;
 
 		/* Verify that length is correct */
-		err = EPROTO;
+		err = -EPROTO;
 		if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
 			goto out;
 	}

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9cfedf5..9382619 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c

@@ -1197,6 +1197,13 @@
 	return new_piece;
 }
 
+static size_t sizeof_footer(struct ceph_connection *con)
+{
+	return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
+	    sizeof(struct ceph_msg_footer) :
+	    sizeof(struct ceph_msg_footer_old);
+}
+
 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
 {
 	BUG_ON(!msg);
@@ -2335,9 +2342,9 @@
 			ceph_pr_addr(&con->peer_addr.in_addr),
 			seq, con->in_seq + 1);
 		con->in_base_pos = -front_len - middle_len - data_len -
-			sizeof(m->footer);
+			sizeof_footer(con);
 		con->in_tag = CEPH_MSGR_TAG_READY;
-		return 0;
+		return 1;
 	} else if ((s64)seq - (s64)con->in_seq > 1) {
 		pr_err("read_partial_message bad seq %lld expected %lld\n",
 		       seq, con->in_seq + 1);
@@ -2360,10 +2367,10 @@
 			/* skip this message */
 			dout("alloc_msg said skip message\n");
 			con->in_base_pos = -front_len - middle_len - data_len -
-				sizeof(m->footer);
+				sizeof_footer(con);
 			con->in_tag = CEPH_MSGR_TAG_READY;
 			con->in_seq++;
-			return 0;
+			return 1;
 		}
 
 		BUG_ON(!con->in_msg);

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3534e12..5bc0537 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c

@@ -2853,8 +2853,8 @@
 	mutex_lock(&osdc->request_mutex);
 	req = __lookup_request(osdc, tid);
 	if (!req) {
-		pr_warn("%s osd%d tid %llu unknown, skipping\n",
-			__func__, osd->o_osd, tid);
+		dout("%s osd%d tid %llu unknown, skipping\n", __func__,
+		     osd->o_osd, tid);
 		m = NULL;
 		*skip = 1;
 		goto out;

diff --git a/net/core/dev.c b/net/core/dev.c
index 8cba3d8..0ef061b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -5379,12 +5379,12 @@
 {
 	struct netdev_adjacent *lower;
 
-	lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+	lower = list_entry(*iter, struct netdev_adjacent, list);
 
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 
-	*iter = &lower->list;
+	*iter = lower->list.next;
 
 	return lower->dev;
 }
@@ -7422,8 +7422,10 @@
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
-	if (!dev->tx_queue_len)
+	if (!dev->tx_queue_len) {
 		dev->priv_flags |= IFF_NO_QUEUE;
+		dev->tx_queue_len = 1;
+	}
 
 	dev->num_tx_queues = txqs;
 	dev->real_num_tx_queues = txqs;

diff --git a/net/core/filter.c b/net/core/filter.c
index 94d2620..bba502f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c

@@ -1752,7 +1752,7 @@
 	u8 compat[sizeof(struct bpf_tunnel_key)];
 	struct ip_tunnel_info *info;
 
-	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6)))
+	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX)))
 		return -EINVAL;
 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
 		switch (size) {
@@ -1776,7 +1776,7 @@
 	info = &md->u.tun_info;
 	info->mode = IP_TUNNEL_INFO_TX;
 
-	info->key.tun_flags = TUNNEL_KEY;
+	info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM;
 	info->key.tun_id = cpu_to_be64(from->tunnel_id);
 	info->key.tos = from->tunnel_tos;
 	info->key.ttl = from->tunnel_ttl;
@@ -1787,6 +1787,8 @@
 		       sizeof(from->remote_ipv6));
 	} else {
 		info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+		if (flags & BPF_F_ZERO_CSUM_TX)
+			info->key.tun_flags &= ~TUNNEL_CSUM;
 	}
 
 	return 0;

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index eab81bc..12e7003 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c

@@ -399,6 +399,13 @@
 				goto out_bad;
 			proto = eth->h_proto;
 			nhoff += sizeof(*eth);
+
+			/* Cap headers that we access via pointers at the
+			 * end of the Ethernet header as our maximum alignment
+			 * at that point is only 2 bytes.
+			 */
+			if (NET_IP_ALIGN)
+				hlen = nhoff;
 		}
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d735e85..8261d95 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c

@@ -2911,6 +2911,7 @@
 	nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
 out:
 	netif_addr_unlock_bh(dev);
+	cb->args[1] = err;
 	return idx;
 }
 EXPORT_SYMBOL(ndo_dflt_fdb_dump);
@@ -2944,6 +2945,7 @@
 		ops = br_dev->netdev_ops;
 	}
 
+	cb->args[1] = 0;
 	for_each_netdev(net, dev) {
 		if (brport_idx && (dev->ifindex != brport_idx))
 			continue;
@@ -2971,12 +2973,16 @@
 				idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
 							 idx);
 		}
+		if (cb->args[1] == -EMSGSIZE)
+			break;
 
 		if (dev->netdev_ops->ndo_fdb_dump)
 			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
 							    idx);
 		else
 			idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+		if (cb->args[1] == -EMSGSIZE)
+			break;
 
 		cops = NULL;
 	}

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5bf88f5..8616d11 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c

@@ -2948,6 +2948,24 @@
 EXPORT_SYMBOL_GPL(skb_append_pagefrags);
 
 /**
+ *	skb_push_rcsum - push skb and update receive checksum
+ *	@skb: buffer to update
+ *	@len: length of data pulled
+ *
+ *	This function performs an skb_push on the packet and updates
+ *	the CHECKSUM_COMPLETE checksum.  It should be used on
+ *	receive path processing instead of skb_push unless you know
+ *	that the checksum difference is zero (e.g., a valid IP header)
+ *	or you are setting ip_summed to CHECKSUM_NONE.
+ */
+static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
+{
+	skb_push(skb, len);
+	skb_postpush_rcsum(skb, skb->data, len);
+	return skb->data;
+}
+
+/**
  *	skb_pull_rcsum - pull skb and update receive checksum
  *	@skb: buffer to update
  *	@len: length of data pulled
@@ -4084,9 +4102,9 @@
 	if (!pskb_may_pull(skb_chk, offset))
 		goto err;
 
-	__skb_pull(skb_chk, offset);
+	skb_pull_rcsum(skb_chk, offset);
 	ret = skb_chkf(skb_chk);
-	__skb_push(skb_chk, offset);
+	skb_push_rcsum(skb_chk, offset);
 
 	if (ret)
 		goto err;

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5684e14..902d606 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c

@@ -824,26 +824,26 @@
 
 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
-		struct sock *nsk = NULL;
+		struct sock *nsk;
 
 		sk = req->rsk_listener;
-		if (likely(sk->sk_state == DCCP_LISTEN)) {
-			nsk = dccp_check_req(sk, skb, req);
-		} else {
+		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+		sock_hold(sk);
+		nsk = dccp_check_req(sk, skb, req);
 		if (!nsk) {
 			reqsk_put(req);
-			goto discard_it;
+			goto discard_and_relse;
 		}
 		if (nsk == sk) {
-			sock_hold(sk);
 			reqsk_put(req);
 		} else if (dccp_child_process(sk, nsk, skb)) {
 			dccp_v4_ctl_send_reset(sk, skb);
-			goto discard_it;
+			goto discard_and_relse;
 		} else {
+			sock_put(sk);
 			return 0;
 		}
 	}

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c6d050..b8608b7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c

@@ -691,26 +691,26 @@
 
 	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
-		struct sock *nsk = NULL;
+		struct sock *nsk;
 
 		sk = req->rsk_listener;
-		if (likely(sk->sk_state == DCCP_LISTEN)) {
-			nsk = dccp_check_req(sk, skb, req);
-		} else {
+		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+		sock_hold(sk);
+		nsk = dccp_check_req(sk, skb, req);
 		if (!nsk) {
 			reqsk_put(req);
-			goto discard_it;
+			goto discard_and_relse;
 		}
 		if (nsk == sk) {
-			sock_hold(sk);
 			reqsk_put(req);
 		} else if (dccp_child_process(sk, nsk, skb)) {
 			dccp_v6_ctl_send_reset(sk, skb);
-			goto discard_it;
+			goto discard_and_relse;
 		} else {
+			sock_put(sk);
 			return 0;
 		}
 	}

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 40b9ca7..ab24521 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c

@@ -1194,7 +1194,6 @@
 	if (ret) {
 		netdev_err(master, "error %d registering interface %s\n",
 			   ret, slave_dev->name);
-		phy_disconnect(p->phy);
 		ds->ports[port] = NULL;
 		free_netdev(slave_dev);
 		return ret;
@@ -1205,6 +1204,7 @@
 	ret = dsa_slave_phy_setup(p, slave_dev);
 	if (ret) {
 		netdev_err(master, "error %d setting up slave phy\n", ret);
+		unregister_netdev(slave_dev);
 		free_netdev(slave_dev);
 		return ret;
 	}

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebd9d3..f6303b1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c

@@ -1847,7 +1847,7 @@
 	if (err < 0)
 		goto errout;
 
-	err = EINVAL;
+	err = -EINVAL;
 	if (!tb[NETCONFA_IFINDEX])
 		goto errout;
 

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 05e4cba..b3086cf 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c

@@ -356,9 +356,8 @@
 	skb_dst_set(skb, &rt->dst);
 	skb->dev = dev;
 
-	skb->reserved_tailroom = skb_end_offset(skb) -
-				 min(mtu, skb_end_offset(skb));
 	skb_reserve(skb, hlen);
+	skb_tailroom_reserve(skb, mtu, tlen);
 
 	skb_reset_network_header(skb);
 	pip = ip_hdr(skb);

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 46b9c88..6414891 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c

@@ -789,14 +789,16 @@
 	reqsk_put(req);
 }
 
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
-			      struct sock *child)
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+				      struct request_sock *req,
+				      struct sock *child)
 {
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 
 	spin_lock(&queue->rskq_lock);
 	if (unlikely(sk->sk_state != TCP_LISTEN)) {
 		inet_child_forget(sk, req, child);
+		child = NULL;
 	} else {
 		req->sk = child;
 		req->dl_next = NULL;
@@ -808,6 +810,7 @@
 		sk_acceptq_added(sk);
 	}
 	spin_unlock(&queue->rskq_lock);
+	return child;
 }
 EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
 
@@ -817,11 +820,8 @@
 	if (own_req) {
 		inet_csk_reqsk_queue_drop(sk, req);
 		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-		inet_csk_reqsk_queue_add(sk, req, child);
-		/* Warning: caller must not call reqsk_put(req);
-		 * child stole last reference on it.
-		 */
-		return child;
+		if (inet_csk_reqsk_queue_add(sk, req, child))
+			return child;
 	}
 	/* Too bad, another child took ownership of the request, undo. */
 	bh_unlock_sock(child);

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 56fdf4e0d..41ba68d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c

@@ -1054,8 +1054,9 @@
 static void ipgre_tap_setup(struct net_device *dev)
 {
 	ether_setup(dev);
-	dev->netdev_ops		= &gre_tap_netdev_ops;
-	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
+	dev->netdev_ops	= &gre_tap_netdev_ops;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
 	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 64878ef..565bf64 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c

@@ -1236,13 +1236,16 @@
 	if (!skb)
 		return -EINVAL;
 
-	cork->length += size;
 	if ((size + skb->len > mtu) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->dst.dev->features & NETIF_F_UFO)) {
+		if (skb->ip_summed != CHECKSUM_PARTIAL)
+			return -EOPNOTSUPP;
+
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 	}
+	cork->length += size;
 
 	while (size > 0) {
 		if (skb_is_gso(skb)) {

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5f73a7c..a501242 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c

@@ -249,6 +249,8 @@
 		switch (cmsg->cmsg_type) {
 		case IP_RETOPTS:
 			err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+
+			/* Our caller is responsible for freeing ipc->opt */
 			err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
 					     err < 40 ? err : 40);
 			if (err)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 89e8861..336e689 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c

@@ -661,6 +661,8 @@
 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 	connected = (tunnel->parms.iph.daddr != 0);
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	dst = tnl_params->daddr;
 	if (dst == 0) {
 		/* NBMA tunnel */
@@ -758,7 +760,6 @@
 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 			tunnel->err_count--;
 
-			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 			dst_link_failure(skb);
 		} else
 			tunnel->err_count = 0;

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c117b21..d3a2716 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c

@@ -746,8 +746,10 @@
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
-		if (err)
+		if (unlikely(err)) {
+			kfree(ipc.opt);
 			return err;
+		}
 		if (ipc.opt)
 			free = 1;
 	}

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bc35f18..7113bae 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c

@@ -547,8 +547,10 @@
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(net, msg, &ipc, false);
-		if (err)
+		if (unlikely(err)) {
+			kfree(ipc.opt);
 			goto out;
+		}
 		if (ipc.opt)
 			free = 1;
 	}

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 85f184e..02c6229 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c

@@ -129,6 +129,7 @@
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 
+static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
 /*
  *	Interface to generic destination cache.
  */
@@ -755,7 +756,7 @@
 				struct fib_nh *nh = &FIB_RES_NH(res);
 
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
-						      0, 0);
+						0, jiffies + ip_rt_gc_timeout);
 			}
 			if (kill_route)
 				rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1556,6 +1557,36 @@
 #endif
 }
 
+static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+{
+	struct fnhe_hash_bucket *hash;
+	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
+	u32 hval = fnhe_hashfun(daddr);
+
+	spin_lock_bh(&fnhe_lock);
+
+	hash = rcu_dereference_protected(nh->nh_exceptions,
+					 lockdep_is_held(&fnhe_lock));
+	hash += hval;
+
+	fnhe_p = &hash->chain;
+	fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
+	while (fnhe) {
+		if (fnhe->fnhe_daddr == daddr) {
+			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
+				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
+			fnhe_flush_routes(fnhe);
+			kfree_rcu(fnhe, rcu);
+			break;
+		}
+		fnhe_p = &fnhe->fnhe_next;
+		fnhe = rcu_dereference_protected(fnhe->fnhe_next,
+						 lockdep_is_held(&fnhe_lock));
+	}
+
+	spin_unlock_bh(&fnhe_lock);
+}
+
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
 			   const struct fib_result *res,
@@ -1609,11 +1640,20 @@
 
 	fnhe = find_exception(&FIB_RES_NH(*res), daddr);
 	if (do_cache) {
-		if (fnhe)
+		if (fnhe) {
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
-		else
-			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+			if (rth && rth->dst.expires &&
+			    time_after(jiffies, rth->dst.expires)) {
+				ip_del_fnhe(&FIB_RES_NH(*res), daddr);
+				fnhe = NULL;
+			} else {
+				goto rt_cache;
+			}
+		}
 
+		rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+
+rt_cache:
 		if (rt_cache_valid(rth)) {
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
@@ -2014,19 +2054,29 @@
 		struct fib_nh *nh = &FIB_RES_NH(*res);
 
 		fnhe = find_exception(nh, fl4->daddr);
-		if (fnhe)
+		if (fnhe) {
 			prth = &fnhe->fnhe_rth_output;
-		else {
-			if (unlikely(fl4->flowi4_flags &
-				     FLOWI_FLAG_KNOWN_NH &&
-				     !(nh->nh_gw &&
-				       nh->nh_scope == RT_SCOPE_LINK))) {
-				do_cache = false;
-				goto add;
+			rth = rcu_dereference(*prth);
+			if (rth && rth->dst.expires &&
+			    time_after(jiffies, rth->dst.expires)) {
+				ip_del_fnhe(nh, fl4->daddr);
+				fnhe = NULL;
+			} else {
+				goto rt_cache;
 			}
-			prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
 		}
+
+		if (unlikely(fl4->flowi4_flags &
+			     FLOWI_FLAG_KNOWN_NH &&
+			     !(nh->nh_gw &&
+			       nh->nh_scope == RT_SCOPE_LINK))) {
+			do_cache = false;
+			goto add;
+		}
+		prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
 		rth = rcu_dereference(*prth);
+
+rt_cache:
 		if (rt_cache_valid(rth)) {
 			dst_hold(&rth->dst);
 			return rth;
@@ -2569,7 +2619,6 @@
 }
 
 #ifdef CONFIG_SYSCTL
-static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
 static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
 static int ip_rt_gc_min_interval __read_mostly	= HZ / 2;
 static int ip_rt_gc_elasticity __read_mostly	= 8;

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0c36ef4..483ffdf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c

@@ -2950,7 +2950,7 @@
 			struct crypto_hash *hash;
 
 			hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
-			if (IS_ERR_OR_NULL(hash))
+			if (IS_ERR(hash))
 				return;
 			per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
 		}

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1c2a734..3b2c8e9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c

@@ -2896,7 +2896,10 @@
 {
 	const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
 	struct rtt_meas *m = tcp_sk(sk)->rtt_min;
-	struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+	struct rtt_meas rttm = {
+		.rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
+		.ts = now,
+	};
 	u32 elapsed;
 
 	/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f6ff03..487ac67 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c

@@ -1597,28 +1597,30 @@
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
-		struct sock *nsk = NULL;
+		struct sock *nsk;
 
 		sk = req->rsk_listener;
-		if (tcp_v4_inbound_md5_hash(sk, skb))
-			goto discard_and_relse;
-		if (likely(sk->sk_state == TCP_LISTEN)) {
-			nsk = tcp_check_req(sk, skb, req, false);
-		} else {
+		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+			reqsk_put(req);
+			goto discard_it;
+		}
+		if (unlikely(sk->sk_state != TCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+		sock_hold(sk);
+		nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
-			goto discard_it;
+			goto discard_and_relse;
 		}
 		if (nsk == sk) {
-			sock_hold(sk);
 			reqsk_put(req);
 		} else if (tcp_child_process(sk, nsk, skb)) {
 			tcp_v4_send_reset(nsk, skb);
-			goto discard_it;
+			goto discard_and_relse;
 		} else {
+			sock_put(sk);
 			return 0;
 		}
 	}

diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c8cbc2b..a726d78 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c

@@ -550,7 +550,7 @@
 	 */
 	if (crtt > tp->srtt_us) {
 		/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
-		crtt /= 8 * USEC_PER_MSEC;
+		crtt /= 8 * USEC_PER_SEC / HZ;
 		inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
 	} else if (tp->srtt_us == 0) {
 		/* RFC6298: 5.7 We've failed to get a valid RTT sample from

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75632a9..9b02af2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c

@@ -455,7 +455,7 @@
 
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->segs_in = 0;
+		newtp->segs_in = 1;
 
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -815,6 +815,7 @@
 	int ret = 0;
 	int state = child->sk_state;
 
+	tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	if (!sock_owned_by_user(child)) {
 		ret = tcp_rcv_state_process(child, skb);
 		/* Wakeup parent, send SIGIO */

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index be0b218..95d2f19 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c

@@ -1048,8 +1048,10 @@
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sock_net(sk), msg, &ipc,
 				   sk->sk_family == AF_INET6);
-		if (err)
+		if (unlikely(err)) {
+			kfree(ipc.opt);
 			return err;
+		}
 		if (ipc.opt)
 			free = 1;
 		connected = 0;

diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 0ec0881..96599d1 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c

@@ -89,6 +89,8 @@
 	uh->source = src_port;
 	uh->len = htons(skb->len);
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	udp_set_csum(nocheck, skb, src, dst, skb->len);
 
 	iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9efd9ff..bdd7eac 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c

@@ -583,7 +583,7 @@
 	if (err < 0)
 		goto errout;
 
-	err = EINVAL;
+	err = -EINVAL;
 	if (!tb[NETCONFA_IFINDEX])
 		goto errout;
 

diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 5c5d23e..9508a20 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c

@@ -257,7 +257,11 @@
 						*fragoff = _frag_off;
 					return hp->nexthdr;
 				}
-				return -ENOENT;
+				if (!found)
+					return -ENOENT;
+				if (fragoff)
+					*fragoff = _frag_off;
+				break;
 			}
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH) {

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f37f18b..c0d4dc1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c

@@ -777,6 +777,8 @@
 	__u32 mtu;
 	int err;
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 		encap_limit = t->parms.encap_limit;
 
@@ -1512,6 +1514,7 @@
 	dev->destructor = ip6gre_dev_free;
 
 	dev->features |= NETIF_F_NETNS_LOCAL;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 }
 
 static int ip6gre_newlink(struct net *src_net, struct net_device *dev,

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 137fca4..6c5dfec 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c

@@ -1180,6 +1180,8 @@
 	u8 tproto;
 	int err;
 
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
 	tproto = ACCESS_ONCE(t->parms.proto);
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ee56d0..d64ee7e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c

@@ -1574,9 +1574,8 @@
 		return NULL;
 
 	skb->priority = TC_PRIO_CONTROL;
-	skb->reserved_tailroom = skb_end_offset(skb) -
-				 min(mtu, skb_end_offset(skb));
 	skb_reserve(skb, hlen);
+	skb_tailroom_reserve(skb, mtu, tlen);
 
 	if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:

diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 31ba7ca..051b6a6 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c

@@ -21,6 +21,10 @@
 #include <net/ipv6.h>
 #include <net/netfilter/ipv6/nf_nat_masquerade.h>
 
+#define MAX_WORK_COUNT	16
+
+static atomic_t v6_worker_count;
+
 unsigned int
 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
 		       const struct net_device *out)
@@ -78,14 +82,78 @@
 	.notifier_call	= masq_device_event,
 };
 
+struct masq_dev_work {
+	struct work_struct work;
+	struct net *net;
+	int ifindex;
+};
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+	struct masq_dev_work *w;
+	long index;
+
+	w = container_of(work, struct masq_dev_work, work);
+
+	index = w->ifindex;
+	nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+
+	put_net(w->net);
+	kfree(w);
+	atomic_dec(&v6_worker_count);
+	module_put(THIS_MODULE);
+}
+
+/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
+ * schedule.
+ *
+ * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * time if there are lots of conntracks and the system
+ * handles high softirq load, so it frequently calls cond_resched
+ * while iterating the conntrack table.
+ *
+ * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount
+ * of ipv6 addresses being deleted), we also need to add an upper
+ * limit to the number of queued work items.
+ */
 static int masq_inet_event(struct notifier_block *this,
 			   unsigned long event, void *ptr)
 {
 	struct inet6_ifaddr *ifa = ptr;
-	struct netdev_notifier_info info;
+	const struct net_device *dev;
+	struct masq_dev_work *w;
+	struct net *net;
 
-	netdev_notifier_info_init(&info, ifa->idev->dev);
-	return masq_device_event(this, event, &info);
+	if (event != NETDEV_DOWN ||
+	    atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
+		return NOTIFY_DONE;
+
+	dev = ifa->idev->dev;
+	net = maybe_get_net(dev_net(dev));
+	if (!net)
+		return NOTIFY_DONE;
+
+	if (!try_module_get(THIS_MODULE))
+		goto err_module;
+
+	w = kmalloc(sizeof(*w), GFP_ATOMIC);
+	if (w) {
+		atomic_inc(&v6_worker_count);
+
+		INIT_WORK(&w->work, iterate_cleanup_work);
+		w->ifindex = dev->ifindex;
+		w->net = net;
+		schedule_work(&w->work);
+
+		return NOTIFY_DONE;
+	}
+
+	module_put(THIS_MODULE);
+ err_module:
+	put_net(net);
+	return NOTIFY_DONE;
 }
 
 static struct notifier_block masq_inet_notifier = {

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1a5a70f..5c8c842 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c

@@ -1387,7 +1387,7 @@
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
-		struct sock *nsk = NULL;
+		struct sock *nsk;
 
 		sk = req->rsk_listener;
 		tcp_v6_fill_cb(skb, hdr, th);
@@ -1395,24 +1395,24 @@
 			reqsk_put(req);
 			goto discard_it;
 		}
-		if (likely(sk->sk_state == TCP_LISTEN)) {
-			nsk = tcp_check_req(sk, skb, req, false);
-		} else {
+		if (unlikely(sk->sk_state != TCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
 		}
+		sock_hold(sk);
+		nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
-			goto discard_it;
+			goto discard_and_relse;
 		}
 		if (nsk == sk) {
-			sock_hold(sk);
 			reqsk_put(req);
 			tcp_v6_restore_cb(skb);
 		} else if (tcp_child_process(sk, nsk, skb)) {
 			tcp_v6_send_reset(nsk, skb);
-			goto discard_it;
+			goto discard_and_relse;
 		} else {
+			sock_put(sk);
 			return 0;
 		}
 	}

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 22e28a4..422dd01 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c

@@ -962,11 +962,9 @@
 		ret = udpv6_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
-		/* a return value > 0 means to resubmit the input, but
-		 * it wants the return to be -protocol, or 0
-		 */
+		/* a return value > 0 means to resubmit the input */
 		if (ret > 0)
-			return -ret;
+			return ret;
 
 		return 0;
 	}

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f93c5be..2caaa84 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c

@@ -124,8 +124,13 @@
 	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
 				  NLM_F_ACK, tunnel, cmd);
 
-	if (ret >= 0)
-		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+	if (ret >= 0) {
+		ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+		/* We don't care if no one is listening */
+		if (ret == -ESRCH)
+			ret = 0;
+		return ret;
+	}
 
 	nlmsg_free(msg);
 
@@ -147,8 +152,13 @@
 	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
 				   NLM_F_ACK, session, cmd);
 
-	if (ret >= 0)
-		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+	if (ret >= 0) {
+		ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
+		/* We don't care if no one is listening */
+		if (ret == -ESRCH)
+			ret = 0;
+		return ret;
+	}
 
 	nlmsg_free(msg);
 

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 10ad4ac..367784b 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c

@@ -291,7 +291,7 @@
 	}
 
 	/* prepare A-MPDU MLME for Rx aggregation */
-	tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL);
+	tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
 	if (!tid_agg_rx)
 		goto end;
 

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b84f6aa..f006f4a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h

@@ -92,7 +92,7 @@
 	u16 extra_len;
 	u16 last_frag;
 	u8 rx_queue;
-	bool ccmp; /* Whether fragments were encrypted with CCMP */
+	bool check_sequential_pn; /* needed for CCMP/GCMP */
 	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
 };
 

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3ece7d1..b54f398 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c

@@ -711,7 +711,7 @@
 	 * computing cur_tp
 	 */
 	tmp_mrs = &mi->r[idx].stats;
-	tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma);
+	tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
 	tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
 
 	return tmp_cur_tp;

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3928dbd..370d677 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c

@@ -414,15 +414,16 @@
 	    (max_tp_group != MINSTREL_CCK_GROUP))
 		return;
 
+	max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
+	max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
+	max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
+
 	if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
 		cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
 						    mrs->prob_ewma);
 		if (cur_tp_avg > tmp_tp_avg)
 			mi->max_prob_rate = index;
 
-		max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
-		max_gpr_idx = mg->max_group_prob_rate %	MCS_GROUP_RATES;
-		max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
 		max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group,
 							max_gpr_idx,
 							max_gpr_prob);
@@ -431,7 +432,7 @@
 	} else {
 		if (mrs->prob_ewma > tmp_prob)
 			mi->max_prob_rate = index;
-		if (mrs->prob_ewma > mg->rates[mg->max_group_prob_rate].prob_ewma)
+		if (mrs->prob_ewma > max_gpr_prob)
 			mg->max_group_prob_rate = index;
 	}
 }
@@ -691,7 +692,7 @@
 	if (likely(sta->ampdu_mlme.tid_tx[tid]))
 		return;
 
-	ieee80211_start_tx_ba_session(pubsta, tid, 5000);
+	ieee80211_start_tx_ba_session(pubsta, tid, 0);
 }
 
 static void
@@ -871,7 +872,7 @@
 	 *  - if station is in dynamic SMPS (and streams > 1)
 	 *  - for fallback rates, to increase chances of getting through
 	 */
-	if (offset > 0 &&
+	if (offset > 0 ||
 	    (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC &&
 	     group->streams > 1)) {
 		ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
@@ -1334,7 +1335,8 @@
 	prob = mi->groups[i].rates[j].prob_ewma;
 
 	/* convert tp_avg from pkt per second in kbps */
-	tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024;
+	tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
+	tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024;
 
 	return tp_avg;
 }

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index bc08185..60d093f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c

@@ -1753,7 +1753,7 @@
 	entry->seq = seq;
 	entry->rx_queue = rx_queue;
 	entry->last_frag = frag;
-	entry->ccmp = 0;
+	entry->check_sequential_pn = false;
 	entry->extra_len = 0;
 
 	return entry;
@@ -1849,15 +1849,27 @@
 						 rx->seqno_idx, &(rx->skb));
 		if (rx->key &&
 		    (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
-		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) &&
+		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
 		    ieee80211_has_protected(fc)) {
 			int queue = rx->security_idx;
-			/* Store CCMP PN so that we can verify that the next
-			 * fragment has a sequential PN value. */
-			entry->ccmp = 1;
+
+			/* Store CCMP/GCMP PN so that we can verify that the
+			 * next fragment has a sequential PN value.
+			 */
+			entry->check_sequential_pn = true;
 			memcpy(entry->last_pn,
 			       rx->key->u.ccmp.rx_pn[queue],
 			       IEEE80211_CCMP_PN_LEN);
+			BUILD_BUG_ON(offsetof(struct ieee80211_key,
+					      u.ccmp.rx_pn) !=
+				     offsetof(struct ieee80211_key,
+					      u.gcmp.rx_pn));
+			BUILD_BUG_ON(sizeof(rx->key->u.ccmp.rx_pn[queue]) !=
+				     sizeof(rx->key->u.gcmp.rx_pn[queue]));
+			BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
+				     IEEE80211_GCMP_PN_LEN);
 		}
 		return RX_QUEUED;
 	}
@@ -1872,15 +1884,21 @@
 		return RX_DROP_MONITOR;
 	}
 
-	/* Verify that MPDUs within one MSDU have sequential PN values.
-	 * (IEEE 802.11i, 8.3.3.4.5) */
-	if (entry->ccmp) {
+	/* "The receiver shall discard MSDUs and MMPDUs whose constituent
+	 *  MPDU PN values are not incrementing in steps of 1."
+	 * see IEEE P802.11-REVmc/D5.0, 12.5.3.4.4, item d (for CCMP)
+	 * and IEEE P802.11-REVmc/D5.0, 12.5.5.4.4, item d (for GCMP)
+	 */
+	if (entry->check_sequential_pn) {
 		int i;
 		u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
 		int queue;
+
 		if (!rx->key ||
 		    (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
-		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256))
+		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
+		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
+		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
 			return RX_DROP_UNUSABLE;
 		memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
 		for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -3366,6 +3384,7 @@
 				return false;
 			/* ignore action frames to TDLS-peers */
 			if (ieee80211_is_action(hdr->frame_control) &&
+			    !is_broadcast_ether_addr(bssid) &&
 			    !ether_addr_equal(bssid, hdr->addr1))
 				return false;
 		}

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8c067e6..95e757c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig

@@ -891,7 +891,7 @@
 	depends on IPV6 || IPV6=n
 	depends on !NF_CONNTRACK || NF_CONNTRACK
 	select NF_DUP_IPV4
-	select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
+	select NF_DUP_IPV6 if IPV6
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 58882de..f60b4fd 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c

@@ -1412,6 +1412,7 @@
 		}
 		spin_unlock(lockp);
 		local_bh_enable();
+		cond_resched();
 	}
 
 	for_each_possible_cpu(cpu) {
@@ -1424,6 +1425,7 @@
 				set_bit(IPS_DYING_BIT, &ct->status);
 		}
 		spin_unlock_bh(&pcpu->lock);
+		cond_resched();
 	}
 	return NULL;
 found:
@@ -1440,6 +1442,8 @@
 	struct nf_conn *ct;
 	unsigned int bucket = 0;
 
+	might_sleep();
+
 	while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
 		if (del_timer(&ct->timeout))
@@ -1448,6 +1452,7 @@
 		/* ... else the timer will get him soon. */
 
 		nf_ct_put(ct);
+		cond_resched();
 	}
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a7ba233..857ae89 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c

@@ -311,14 +311,14 @@
 #endif
 		{
 			nfnl_unlock(subsys_id);
-			netlink_ack(skb, nlh, -EOPNOTSUPP);
+			netlink_ack(oskb, nlh, -EOPNOTSUPP);
 			return kfree_skb(skb);
 		}
 	}
 
 	if (!ss->commit || !ss->abort) {
 		nfnl_unlock(subsys_id);
-		netlink_ack(skb, nlh, -EOPNOTSUPP);
+		netlink_ack(oskb, nlh, -EOPNOTSUPP);
 		return kfree_skb(skb);
 	}
 
@@ -328,10 +328,12 @@
 		nlh = nlmsg_hdr(skb);
 		err = 0;
 
-		if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
-		    skb->len < nlh->nlmsg_len) {
-			err = -EINVAL;
-			goto ack;
+		if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+		    skb->len < nlh->nlmsg_len ||
+		    nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+			nfnl_err_reset(&err_list);
+			status |= NFNL_BATCH_FAILURE;
+			goto done;
 		}
 
 		/* Only requests are handled by the kernel */
@@ -406,7 +408,7 @@
 				 * pointing to the batch header.
 				 */
 				nfnl_err_reset(&err_list);
-				netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+				netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
 				status |= NFNL_BATCH_FAILURE;
 				goto done;
 			}

diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 94837d2..2671b9d 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c

@@ -312,7 +312,7 @@
 			hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 				untimeout(h, timeout);
 		}
-		nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+		spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
 	}
 	local_bh_enable();
 }

diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index c7808fc..c9743f7 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c

@@ -100,7 +100,7 @@
 
 	cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
 	if (cpu_stats == NULL)
-		return ENOMEM;
+		return -ENOMEM;
 
 	preempt_disable();
 	this_cpu = this_cpu_ptr(cpu_stats);
@@ -138,7 +138,7 @@
 	cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
 					      GFP_ATOMIC);
 	if (cpu_stats == NULL)
-		return ENOMEM;
+		return -ENOMEM;
 
 	preempt_disable();
 	this_cpu = this_cpu_ptr(cpu_stats);

diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 3eff7b6..6e57a39 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c

@@ -38,7 +38,7 @@
 	return XT_CONTINUE;
 }
 
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 static unsigned int
 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -131,7 +131,7 @@
 		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.name       = "TEE",
 		.revision   = 1,

diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index de9cb19..5eb7694 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c

@@ -90,7 +90,7 @@
 	int err;
 	struct vxlan_config conf = {
 		.no_share = true,
-		.flags = VXLAN_F_COLLECT_METADATA,
+		.flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
 		/* Don't restrict the packets that can be sent by MTU */
 		.mtu = IP_MAX_MTU,
 	};

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d058696..6b70399 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c

@@ -62,6 +62,7 @@
 	struct xt_tgdtor_param par = {
 		.target   = t->u.kernel.target,
 		.targinfo = t->data,
+		.family   = NFPROTO_IPV4,
 	};
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
@@ -195,6 +196,7 @@
 	par.hooknum  = ipt->tcfi_hook;
 	par.target   = ipt->tcfi_t->u.kernel.target;
 	par.targinfo = ipt->tcfi_t->data;
+	par.family   = NFPROTO_IPV4;
 	ret = par.target->target(skb, &par);
 
 	switch (ret) {

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b5c2cf2..af1acf0 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c

@@ -1852,6 +1852,7 @@
 	}
 
 	tp = old_tp;
+	protocol = tc_skb_protocol(skb);
 	goto reclassify;
 #endif
 }

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ec52912..ce46f1c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c

@@ -526,6 +526,8 @@
 		}
 		return 0;
 	}
+	if (addr1->v6.sin6_port != addr2->v6.sin6_port)
+		return 0;
 	if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
 		return 0;
 	/* If this is a linklocal address, compare the scope_id. */

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ded7d93..963dffc 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c

@@ -482,7 +482,7 @@
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 {
 	struct sctp_association *assoc;
-	struct sctp_transport *tsp;
+	struct sctp_transport *transport, *tsp;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
@@ -490,10 +490,10 @@
 		return 0;
 	}
 
-	tsp = (struct sctp_transport *)v;
-	if (!sctp_transport_hold(tsp))
+	transport = (struct sctp_transport *)v;
+	if (!sctp_transport_hold(transport))
 		return 0;
-	assoc = tsp->asoc;
+	assoc = transport->asoc;
 
 	list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
 				transports) {
@@ -546,7 +546,7 @@
 		seq_printf(seq, "\n");
 	}
 
-	sctp_transport_put(tsp);
+	sctp_transport_put(transport);
 
 	return 0;
 }

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index ab0d538..1099e99 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c

@@ -60,6 +60,8 @@
 #include <net/inet_common.h>
 #include <net/inet_ecn.h>
 
+#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
+
 /* Global data structures. */
 struct sctp_globals sctp_globals __read_mostly;
 
@@ -1355,6 +1357,8 @@
 	unsigned long limit;
 	int max_share;
 	int order;
+	int num_entries;
+	int max_entry_order;
 
 	sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
 
@@ -1407,14 +1411,24 @@
 
 	/* Size and allocate the association hash table.
 	 * The methodology is similar to that of the tcp hash tables.
+	 * Though not identical.  Start by getting a goal size
 	 */
 	if (totalram_pages >= (128 * 1024))
 		goal = totalram_pages >> (22 - PAGE_SHIFT);
 	else
 		goal = totalram_pages >> (24 - PAGE_SHIFT);
 
-	for (order = 0; (1UL << order) < goal; order++)
-		;
+	/* Then compute the page order for said goal */
+	order = get_order(goal);
+
+	/* Now compute the required page order for the maximum sized table we
+	 * want to create
+	 */
+	max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
+				    sizeof(struct sctp_bind_hashbucket));
+
+	/* Limit the page order by that maximum hash table size */
+	order = min(order, max_entry_order);
 
 	/* Allocate and initialize the endpoint hash table.  */
 	sctp_ep_hashsize = 64;
@@ -1430,20 +1444,35 @@
 		INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
 	}
 
-	/* Allocate and initialize the SCTP port hash table.  */
+	/* Allocate and initialize the SCTP port hash table.
+	 * Note that order is initalized to start at the max sized
+	 * table we want to support.  If we can't get that many pages
+	 * reduce the order and try again
+	 */
 	do {
-		sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
-					sizeof(struct sctp_bind_hashbucket);
-		if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
-			continue;
 		sctp_port_hashtable = (struct sctp_bind_hashbucket *)
 			__get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
 	} while (!sctp_port_hashtable && --order > 0);
+
 	if (!sctp_port_hashtable) {
 		pr_err("Failed bind hash alloc\n");
 		status = -ENOMEM;
 		goto err_bhash_alloc;
 	}
+
+	/* Now compute the number of entries that will fit in the
+	 * port hash space we allocated
+	 */
+	num_entries = (1UL << order) * PAGE_SIZE /
+		      sizeof(struct sctp_bind_hashbucket);
+
+	/* And finish by rounding it down to the nearest power of two
+	 * this wastes some memory of course, but its needed because
+	 * the hash function operates based on the assumption that
+	 * that the number of entries is a power of two
+	 */
+	sctp_port_hashsize = rounddown_pow_of_two(num_entries);
+
 	for (i = 0; i < sctp_port_hashsize; i++) {
 		spin_lock_init(&sctp_port_hashtable[i].lock);
 		INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
@@ -1452,7 +1481,8 @@
 	if (sctp_transport_hashtable_init())
 		goto err_thash_alloc;
 
-	pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize);
+	pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize,
+		num_entries);
 
 	sctp_sysctl_register();
 

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 799e65b..cabf586 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c

@@ -740,7 +740,7 @@
 		default:
 			printk(KERN_CRIT "%s: bad return from "
 				"gss_fill_context: %zd\n", __func__, err);
-			BUG();
+			gss_msg->msg.errno = -EIO;
 		}
 		goto err_release_msg;
 	}

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2b32fd6..273bc3a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c

@@ -1225,7 +1225,7 @@
 	if (bp[0] == '\\' && bp[1] == 'x') {
 		/* HEX STRING */
 		bp += 2;
-		while (len < bufsize) {
+		while (len < bufsize - 1) {
 			int h, l;
 
 			h = hex_to_bin(bp[0]);

diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index cc1251d..2dcd764 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c

@@ -341,6 +341,8 @@
 	rqst->rq_reply_bytes_recvd = 0;
 	rqst->rq_bytes_sent = 0;
 	rqst->rq_xid = headerp->rm_xid;
+
+	rqst->rq_private_buf.len = size;
 	set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
 
 	buf = &rqst->rq_rcv_buf;

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 47f7da5..8b5833c 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c

@@ -1093,8 +1093,11 @@
 		.cb = cb,
 		.idx = idx,
 	};
+	int err;
 
-	switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
+	err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
+				      switchdev_port_fdb_dump_cb);
+	cb->args[1] = err;
 	return dump.idx;
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0c2944f..347cdc9 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c

@@ -1973,8 +1973,10 @@
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_LINK_GET);
-	if (!hdr)
+	if (!hdr) {
+		tipc_bcast_unlock(net);
 		return -EMSGSIZE;
+	}
 
 	attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
 	if (!attrs)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index fa97d96..9d7a16f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c

@@ -346,12 +346,6 @@
 	skb_queue_head_init(&n->bc_entry.inputq2);
 	for (i = 0; i < MAX_BEARERS; i++)
 		spin_lock_init(&n->links[i].lock);
-	hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
-	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
-		if (n->addr < temp_node->addr)
-			break;
-	}
-	list_add_tail_rcu(&n->list, &temp_node->list);
 	n->state = SELF_DOWN_PEER_LEAVING;
 	n->signature = INVALID_NODE_SIG;
 	n->active_links[0] = INVALID_BEARER_ID;
@@ -372,6 +366,12 @@
 	tipc_node_get(n);
 	setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
 	n->keepalive_intv = U32_MAX;
+	hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
+	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+		if (n->addr < temp_node->addr)
+			break;
+	}
+	list_add_tail_rcu(&n->list, &temp_node->list);
 exit:
 	spin_unlock_bh(&tn->node_list_lock);
 	return n;

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 69c2905..4d420bb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c

@@ -673,7 +673,7 @@
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	struct iov_iter save = msg->msg_iter;
 	uint mtu;
 	int rc;
@@ -687,14 +687,16 @@
 	msg_set_nameupper(mhdr, seq->upper);
 	msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
 
+	skb_queue_head_init(&pktchain);
+
 new_mtu:
 	mtu = tipc_bcast_get_mtu(net);
-	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
 	if (unlikely(rc < 0))
 		return rc;
 
 	do {
-		rc = tipc_bcast_xmit(net, pktchain);
+		rc = tipc_bcast_xmit(net, &pktchain);
 		if (likely(!rc))
 			return dsz;
 
@@ -704,7 +706,7 @@
 			if (!rc)
 				continue;
 		}
-		__skb_queue_purge(pktchain);
+		__skb_queue_purge(&pktchain);
 		if (rc == -EMSGSIZE) {
 			msg->msg_iter = save;
 			goto new_mtu;
@@ -863,7 +865,7 @@
 	struct net *net = sock_net(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
 	u32 dnode, dport;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	struct sk_buff *skb;
 	struct tipc_name_seq *seq;
 	struct iov_iter save;
@@ -924,17 +926,18 @@
 		msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
 	}
 
+	skb_queue_head_init(&pktchain);
 	save = m->msg_iter;
 new_mtu:
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
-	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
 	if (rc < 0)
 		return rc;
 
 	do {
-		skb = skb_peek(pktchain);
+		skb = skb_peek(&pktchain);
 		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
-		rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+		rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
 		if (likely(!rc)) {
 			if (sock->state != SS_READY)
 				sock->state = SS_CONNECTING;
@@ -946,7 +949,7 @@
 			if (!rc)
 				continue;
 		}
-		__skb_queue_purge(pktchain);
+		__skb_queue_purge(&pktchain);
 		if (rc == -EMSGSIZE) {
 			m->msg_iter = save;
 			goto new_mtu;
@@ -1016,7 +1019,7 @@
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head *pktchain = &sk->sk_write_queue;
+	struct sk_buff_head pktchain;
 	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	u32 portid = tsk->portid;
 	int rc = -EINVAL;
@@ -1044,17 +1047,19 @@
 
 	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 	dnode = tsk_peer_node(tsk);
+	skb_queue_head_init(&pktchain);
 
 next:
 	save = m->msg_iter;
 	mtu = tsk->max_pkt;
 	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
-	rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
+	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
 	if (unlikely(rc < 0))
 		return rc;
+
 	do {
 		if (likely(!tsk_conn_cong(tsk))) {
-			rc = tipc_node_xmit(net, pktchain, dnode, portid);
+			rc = tipc_node_xmit(net, &pktchain, dnode, portid);
 			if (likely(!rc)) {
 				tsk->sent_unacked++;
 				sent += send;
@@ -1063,7 +1068,7 @@
 				goto next;
 			}
 			if (rc == -EMSGSIZE) {
-				__skb_queue_purge(pktchain);
+				__skb_queue_purge(&pktchain);
 				tsk->max_pkt = tipc_node_get_mtu(net, dnode,
 								 portid);
 				m->msg_iter = save;
@@ -1077,7 +1082,7 @@
 		rc = tipc_wait_for_sndpkt(sock, &timeo);
 	} while (!rc);
 
-	__skb_queue_purge(pktchain);
+	__skb_queue_purge(&pktchain);
 	return sent ? sent : rc;
 }
 

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 69ee2ee..f9ff73a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c

@@ -296,7 +296,8 @@
 	if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub))
 		return tipc_conn_terminate(tn->topsrv, subscrb->conid);
 
-	tipc_nametbl_subscribe(sub);
+	if (sub)
+		tipc_nametbl_subscribe(sub);
 }
 
 /* Handle one request to establish a new subscriber */

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 29be035..f75f847 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c

@@ -1781,7 +1781,12 @@
 			goto out_unlock;
 	}
 
-	if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+	/* other == sk && unix_peer(other) != sk if
+	 * - unix_peer(sk) == NULL, destination address bound to sk
+	 * - unix_peer(sk) == sk by time of get but disconnected before lock
+	 */
+	if (other != sk &&
+	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
 		if (timeo) {
 			timeo = unix_wait_for_peer(other, timeo);
 
@@ -2277,13 +2282,15 @@
 	size_t size = state->size;
 	unsigned int last_len;
 
-	err = -EINVAL;
-	if (sk->sk_state != TCP_ESTABLISHED)
+	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+		err = -EINVAL;
 		goto out;
+	}
 
-	err = -EOPNOTSUPP;
-	if (flags & MSG_OOB)
+	if (unlikely(flags & MSG_OOB)) {
+		err = -EOPNOTSUPP;
 		goto out;
+	}
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
 	timeo = sock_rcvtimeo(sk, noblock);
@@ -2305,6 +2312,7 @@
 		bool drop_skb;
 		struct sk_buff *skb, *last;
 
+redo:
 		unix_state_lock(sk);
 		if (sock_flag(sk, SOCK_DEAD)) {
 			err = -ECONNRESET;
@@ -2329,9 +2337,11 @@
 				goto unlock;
 
 			unix_state_unlock(sk);
-			err = -EAGAIN;
-			if (!timeo)
+			if (!timeo) {
+				err = -EAGAIN;
 				break;
+			}
+
 			mutex_unlock(&u->readlock);
 
 			timeo = unix_stream_data_wait(sk, timeo, last,
@@ -2344,7 +2354,7 @@
 			}
 
 			mutex_lock(&u->readlock);
-			continue;
+			goto redo;
 unlock:
 			unix_state_unlock(sk);
 			break;

diff --git a/net/unix/diag.c b/net/unix/diag.c
index c512f64..4d96797 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c

@@ -220,7 +220,7 @@
 	return skb->len;
 }
 
-static struct sock *unix_lookup_by_ino(int ino)
+static struct sock *unix_lookup_by_ino(unsigned int ino)
 {
 	int i;
 	struct sock *sk;

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220..bbe65dc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c

@@ -1557,8 +1557,6 @@
 	if (err < 0)
 		goto out;
 
-	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
 	while (total_written < len) {
 		ssize_t written;
 
@@ -1578,7 +1576,9 @@
 				goto out_wait;
 
 			release_sock(sk);
+			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 			timeout = schedule_timeout(timeout);
+			finish_wait(sk_sleep(sk), &wait);
 			lock_sock(sk);
 			if (signal_pending(current)) {
 				err = sock_intr_errno(timeout);
@@ -1588,8 +1588,6 @@
 				goto out_wait;
 			}
 
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
 
 		/* These checks occur both as part of and after the loop
@@ -1635,7 +1633,6 @@
 out_wait:
 	if (total_written > 0)
 		err = total_written;
-	finish_wait(sk_sleep(sk), &wait);
 out:
 	release_sock(sk);
 	return err;
@@ -1716,7 +1713,6 @@
 	if (err < 0)
 		goto out;
 
-	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	while (1) {
 		s64 ready = vsock_stream_has_data(vsk);
@@ -1727,7 +1723,7 @@
 			 */
 
 			err = -ENOMEM;
-			goto out_wait;
+			goto out;
 		} else if (ready > 0) {
 			ssize_t read;
 
@@ -1750,7 +1746,7 @@
 					vsk, target, read,
 					!(flags & MSG_PEEK), &recv_data);
 			if (err < 0)
-				goto out_wait;
+				goto out;
 
 			if (read >= target || flags & MSG_PEEK)
 				break;
@@ -1773,7 +1769,9 @@
 				break;
 
 			release_sock(sk);
+			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 			timeout = schedule_timeout(timeout);
+			finish_wait(sk_sleep(sk), &wait);
 			lock_sock(sk);
 
 			if (signal_pending(current)) {
@@ -1783,9 +1781,6 @@
 				err = -EAGAIN;
 				break;
 			}
-
-			prepare_to_wait(sk_sleep(sk), &wait,
-					TASK_INTERRUPTIBLE);
 		}
 	}
 
@@ -1816,8 +1811,6 @@
 		err = copied;
 	}
 
-out_wait:
-	finish_wait(sk_sleep(sk), &wait);
 out:
 	release_sock(sk);
 	return err;

diff --git a/net/wireless/core.c b/net/wireless/core.c
index b091551..8f0bac7 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c

@@ -1147,6 +1147,8 @@
 		return NOTIFY_DONE;
 	}
 
+	wireless_nlevent_flush();
+
 	return NOTIFY_OK;
 }
 

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d4786f2..711cb7a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c

@@ -7547,7 +7547,7 @@
 
 		if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) &&
 		    no_ht) {
-			kfree(connkeys);
+			kzfree(connkeys);
 			return -EINVAL;
 		}
 	}

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 8020b5b..d49ed76 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c

@@ -917,6 +917,12 @@
 
 	nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
 
+	/* stop critical protocol if supported */
+	if (rdev->ops->crit_proto_stop && rdev->crit_proto_nlportid) {
+		rdev->crit_proto_nlportid = 0;
+		rdev_crit_proto_stop(rdev, wdev);
+	}
+
 	/*
 	 * Delete all the keys ... pairwise keys can't really
 	 * exist any more anyway, but default keys might.

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c8717c1..b50ee5d 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c

@@ -342,6 +342,40 @@
 
 /* IW event code */
 
+void wireless_nlevent_flush(void)
+{
+	struct sk_buff *skb;
+	struct net *net;
+
+	ASSERT_RTNL();
+
+	for_each_net(net) {
+		while ((skb = skb_dequeue(&net->wext_nlevents)))
+			rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
+				    GFP_KERNEL);
+	}
+}
+EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
+
+static int wext_netdev_notifier_call(struct notifier_block *nb,
+				     unsigned long state, void *ptr)
+{
+	/*
+	 * When a netdev changes state in any way, flush all pending messages
+	 * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
+	 * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
+	 * or similar - all of which could otherwise happen due to delays from
+	 * schedule_work().
+	 */
+	wireless_nlevent_flush();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block wext_netdev_notifier = {
+	.notifier_call = wext_netdev_notifier_call,
+};
+
 static int __net_init wext_pernet_init(struct net *net)
 {
 	skb_queue_head_init(&net->wext_nlevents);
@@ -360,7 +394,12 @@
 
 static int __init wireless_nlevent_init(void)
 {
-	return register_pernet_subsys(&wext_pernet_ops);
+	int err = register_pernet_subsys(&wext_pernet_ops);
+
+	if (err)
+		return err;
+
+	return register_netdevice_notifier(&wext_netdev_notifier);
 }
 
 subsys_initcall(wireless_nlevent_init);
@@ -368,17 +407,8 @@
 /* Process events generated by the wireless layer or the driver. */
 static void wireless_nlevent_process(struct work_struct *work)
 {
-	struct sk_buff *skb;
-	struct net *net;
-
 	rtnl_lock();
-
-	for_each_net(net) {
-		while ((skb = skb_dequeue(&net->wext_nlevents)))
-			rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
-				    GFP_KERNEL);
-	}
-
+	wireless_nlevent_flush();
 	rtnl_unlock();
 }
 

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 0147c91..874132b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl

@@ -269,7 +269,8 @@
 			__init_refok|
 			__kprobes|
 			__ref|
-			__rcu
+			__rcu|
+			__private
 		}x;
 our $InitAttributePrefix = qr{__(?:mem|cpu|dev|net_|)};
 our $InitAttributeData = qr{$InitAttributePrefix(?:initdata\b)};

diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
index d154f08..7bfe9fa 100755
--- a/scripts/ld-version.sh
+++ b/scripts/ld-version.sh

@@ -1,7 +1,7 @@
 #!/usr/bin/awk -f
 # extract linker version number from stdin and turn into single number
 	{
-	gsub(".*)", "");
+	gsub(".*\\)", "");
 	gsub(".*version ", "");
 	gsub("-.*", "");
 	split($1,a, ".");

diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index c2423d9..7b29fb1 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c

@@ -209,6 +209,35 @@
 	return 0;
 }
 
+static void x86_sort_relative_table(char *extab_image, int image_size)
+{
+	int i;
+
+	i = 0;
+	while (i < image_size) {
+		uint32_t *loc = (uint32_t *)(extab_image + i);
+
+		w(r(loc) + i, loc);
+		w(r(loc + 1) + i + 4, loc + 1);
+		w(r(loc + 2) + i + 8, loc + 2);
+
+		i += sizeof(uint32_t) * 3;
+	}
+
+	qsort(extab_image, image_size / 12, 12, compare_relative_table);
+
+	i = 0;
+	while (i < image_size) {
+		uint32_t *loc = (uint32_t *)(extab_image + i);
+
+		w(r(loc) - i, loc);
+		w(r(loc + 1) - (i + 4), loc + 1);
+		w(r(loc + 2) - (i + 8), loc + 2);
+
+		i += sizeof(uint32_t) * 3;
+	}
+}
+
 static void sort_relative_table(char *extab_image, int image_size)
 {
 	int i;
@@ -281,6 +310,9 @@
 		break;
 	case EM_386:
 	case EM_X86_64:
+		custom_sort = x86_sort_relative_table;
+		break;
+
 	case EM_S390:
 		custom_sort = sort_relative_table;
 		break;

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f8110cf..f1ab715 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c

@@ -3249,7 +3249,7 @@
 
 static void selinux_inode_getsecid(struct inode *inode, u32 *secid)
 {
-	struct inode_security_struct *isec = inode_security(inode);
+	struct inode_security_struct *isec = inode_security_novalidate(inode);
 	*secid = isec->sid;
 }
 

diff --git a/sound/arm/pxa2xx-pcm-lib.c b/sound/arm/pxa2xx-pcm-lib.c
index e9b98af..e8da3b8 100644
--- a/sound/arm/pxa2xx-pcm-lib.c
+++ b/sound/arm/pxa2xx-pcm-lib.c

@@ -141,10 +141,8 @@
 	struct vm_area_struct *vma)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
-	return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-				     runtime->dma_area,
-				     runtime->dma_addr,
-				     runtime->dma_bytes);
+	return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+			   runtime->dma_addr, runtime->dma_bytes);
 }
 EXPORT_SYMBOL(pxa2xx_pcm_mmap);
 
@@ -156,8 +154,7 @@
 	buf->dev.type = SNDRV_DMA_TYPE_DEV;
 	buf->dev.dev = pcm->card->dev;
 	buf->private_data = NULL;
-	buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-					   &buf->addr, GFP_KERNEL);
+	buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
 	if (!buf->area)
 		return -ENOMEM;
 	buf->bytes = size;
@@ -178,8 +175,7 @@
 		buf = &substream->dma_buffer;
 		if (!buf->area)
 			continue;
-		dma_free_writecombine(pcm->card->dev, buf->bytes,
-				      buf->area, buf->addr);
+		dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
 		buf->area = NULL;
 	}
 }

diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
index b9c0910..0608f21 100644
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c

@@ -170,6 +170,19 @@
         unsigned char reserved[128];
 };
 
+#ifdef CONFIG_X86_X32
+/* x32 has a different alignment for 64bit values from ia32 */
+struct snd_ctl_elem_value_x32 {
+	struct snd_ctl_elem_id id;
+	unsigned int indirect;	/* bit-field causes misalignment */
+	union {
+		s32 integer[128];
+		unsigned char data[512];
+		s64 integer64[64];
+	} value;
+	unsigned char reserved[128];
+};
+#endif /* CONFIG_X86_X32 */
 
 /* get the value type and count of the control */
 static int get_ctl_type(struct snd_card *card, struct snd_ctl_elem_id *id,
@@ -219,9 +232,11 @@
 
 static int copy_ctl_value_from_user(struct snd_card *card,
 				    struct snd_ctl_elem_value *data,
-				    struct snd_ctl_elem_value32 __user *data32,
+				    void __user *userdata,
+				    void __user *valuep,
 				    int *typep, int *countp)
 {
+	struct snd_ctl_elem_value32 __user *data32 = userdata;
 	int i, type, size;
 	int uninitialized_var(count);
 	unsigned int indirect;
@@ -239,8 +254,9 @@
 	if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN ||
 	    type == SNDRV_CTL_ELEM_TYPE_INTEGER) {
 		for (i = 0; i < count; i++) {
+			s32 __user *intp = valuep;
 			int val;
-			if (get_user(val, &data32->value.integer[i]))
+			if (get_user(val, &intp[i]))
 				return -EFAULT;
 			data->value.integer.value[i] = val;
 		}
@@ -250,8 +266,7 @@
 			dev_err(card->dev, "snd_ioctl32_ctl_elem_value: unknown type %d\n", type);
 			return -EINVAL;
 		}
-		if (copy_from_user(data->value.bytes.data,
-				   data32->value.data, size))
+		if (copy_from_user(data->value.bytes.data, valuep, size))
 			return -EFAULT;
 	}
 
@@ -261,7 +276,8 @@
 }
 
 /* restore the value to 32bit */
-static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32,
+static int copy_ctl_value_to_user(void __user *userdata,
+				  void __user *valuep,
 				  struct snd_ctl_elem_value *data,
 				  int type, int count)
 {
@@ -270,22 +286,22 @@
 	if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN ||
 	    type == SNDRV_CTL_ELEM_TYPE_INTEGER) {
 		for (i = 0; i < count; i++) {
+			s32 __user *intp = valuep;
 			int val;
 			val = data->value.integer.value[i];
-			if (put_user(val, &data32->value.integer[i]))
+			if (put_user(val, &intp[i]))
 				return -EFAULT;
 		}
 	} else {
 		size = get_elem_size(type, count);
-		if (copy_to_user(data32->value.data,
-				 data->value.bytes.data, size))
+		if (copy_to_user(valuep, data->value.bytes.data, size))
 			return -EFAULT;
 	}
 	return 0;
 }
 
-static int snd_ctl_elem_read_user_compat(struct snd_card *card, 
-					 struct snd_ctl_elem_value32 __user *data32)
+static int ctl_elem_read_user(struct snd_card *card,
+			      void __user *userdata, void __user *valuep)
 {
 	struct snd_ctl_elem_value *data;
 	int err, type, count;
@@ -294,7 +310,9 @@
 	if (data == NULL)
 		return -ENOMEM;
 
-	if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0)
+	err = copy_ctl_value_from_user(card, data, userdata, valuep,
+				       &type, &count);
+	if (err < 0)
 		goto error;
 
 	snd_power_lock(card);
@@ -303,14 +321,15 @@
 		err = snd_ctl_elem_read(card, data);
 	snd_power_unlock(card);
 	if (err >= 0)
-		err = copy_ctl_value_to_user(data32, data, type, count);
+		err = copy_ctl_value_to_user(userdata, valuep, data,
+					     type, count);
  error:
 	kfree(data);
 	return err;
 }
 
-static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file,
-					  struct snd_ctl_elem_value32 __user *data32)
+static int ctl_elem_write_user(struct snd_ctl_file *file,
+			       void __user *userdata, void __user *valuep)
 {
 	struct snd_ctl_elem_value *data;
 	struct snd_card *card = file->card;
@@ -320,7 +339,9 @@
 	if (data == NULL)
 		return -ENOMEM;
 
-	if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0)
+	err = copy_ctl_value_from_user(card, data, userdata, valuep,
+				       &type, &count);
+	if (err < 0)
 		goto error;
 
 	snd_power_lock(card);
@@ -329,12 +350,39 @@
 		err = snd_ctl_elem_write(card, file, data);
 	snd_power_unlock(card);
 	if (err >= 0)
-		err = copy_ctl_value_to_user(data32, data, type, count);
+		err = copy_ctl_value_to_user(userdata, valuep, data,
+					     type, count);
  error:
 	kfree(data);
 	return err;
 }
 
+static int snd_ctl_elem_read_user_compat(struct snd_card *card,
+					 struct snd_ctl_elem_value32 __user *data32)
+{
+	return ctl_elem_read_user(card, data32, &data32->value);
+}
+
+static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file,
+					  struct snd_ctl_elem_value32 __user *data32)
+{
+	return ctl_elem_write_user(file, data32, &data32->value);
+}
+
+#ifdef CONFIG_X86_X32
+static int snd_ctl_elem_read_user_x32(struct snd_card *card,
+				      struct snd_ctl_elem_value_x32 __user *data32)
+{
+	return ctl_elem_read_user(card, data32, &data32->value);
+}
+
+static int snd_ctl_elem_write_user_x32(struct snd_ctl_file *file,
+				       struct snd_ctl_elem_value_x32 __user *data32)
+{
+	return ctl_elem_write_user(file, data32, &data32->value);
+}
+#endif /* CONFIG_X86_X32 */
+
 /* add or replace a user control */
 static int snd_ctl_elem_add_compat(struct snd_ctl_file *file,
 				   struct snd_ctl_elem_info32 __user *data32,
@@ -393,6 +441,10 @@
 	SNDRV_CTL_IOCTL_ELEM_WRITE32 = _IOWR('U', 0x13, struct snd_ctl_elem_value32),
 	SNDRV_CTL_IOCTL_ELEM_ADD32 = _IOWR('U', 0x17, struct snd_ctl_elem_info32),
 	SNDRV_CTL_IOCTL_ELEM_REPLACE32 = _IOWR('U', 0x18, struct snd_ctl_elem_info32),
+#ifdef CONFIG_X86_X32
+	SNDRV_CTL_IOCTL_ELEM_READ_X32 = _IOWR('U', 0x12, struct snd_ctl_elem_value_x32),
+	SNDRV_CTL_IOCTL_ELEM_WRITE_X32 = _IOWR('U', 0x13, struct snd_ctl_elem_value_x32),
+#endif /* CONFIG_X86_X32 */
 };
 
 static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -431,6 +483,12 @@
 		return snd_ctl_elem_add_compat(ctl, argp, 0);
 	case SNDRV_CTL_IOCTL_ELEM_REPLACE32:
 		return snd_ctl_elem_add_compat(ctl, argp, 1);
+#ifdef CONFIG_X86_X32
+	case SNDRV_CTL_IOCTL_ELEM_READ_X32:
+		return snd_ctl_elem_read_user_x32(ctl->card, argp);
+	case SNDRV_CTL_IOCTL_ELEM_WRITE_X32:
+		return snd_ctl_elem_write_user_x32(ctl, argp);
+#endif /* CONFIG_X86_X32 */
 	}
 
 	down_read(&snd_ioctl_rwsem);

diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 9630e9f..1f64ab0 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c

@@ -183,6 +183,14 @@
 	return err;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has the same struct as x86-64 for snd_pcm_channel_info */
+static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream,
+				     struct snd_pcm_channel_info __user *src);
+#define snd_pcm_ioctl_channel_info_x32(s, p)	\
+	snd_pcm_channel_info_user(s, p)
+#endif /* CONFIG_X86_X32 */
+
 struct snd_pcm_status32 {
 	s32 state;
 	struct compat_timespec trigger_tstamp;
@@ -243,6 +251,71 @@
 	return err;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_pcm_status_x32 {
+	s32 state;
+	u32 rsvd; /* alignment */
+	struct timespec trigger_tstamp;
+	struct timespec tstamp;
+	u32 appl_ptr;
+	u32 hw_ptr;
+	s32 delay;
+	u32 avail;
+	u32 avail_max;
+	u32 overrange;
+	s32 suspended_state;
+	u32 audio_tstamp_data;
+	struct timespec audio_tstamp;
+	struct timespec driver_tstamp;
+	u32 audio_tstamp_accuracy;
+	unsigned char reserved[52-2*sizeof(struct timespec)];
+} __packed;
+
+#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst))
+
+static int snd_pcm_status_user_x32(struct snd_pcm_substream *substream,
+				   struct snd_pcm_status_x32 __user *src,
+				   bool ext)
+{
+	struct snd_pcm_status status;
+	int err;
+
+	memset(&status, 0, sizeof(status));
+	/*
+	 * with extension, parameters are read/write,
+	 * get audio_tstamp_data from user,
+	 * ignore rest of status structure
+	 */
+	if (ext && get_user(status.audio_tstamp_data,
+				(u32 __user *)(&src->audio_tstamp_data)))
+		return -EFAULT;
+	err = snd_pcm_status(substream, &status);
+	if (err < 0)
+		return err;
+
+	if (clear_user(src, sizeof(*src)))
+		return -EFAULT;
+	if (put_user(status.state, &src->state) ||
+	    put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) ||
+	    put_timespec(&status.tstamp, &src->tstamp) ||
+	    put_user(status.appl_ptr, &src->appl_ptr) ||
+	    put_user(status.hw_ptr, &src->hw_ptr) ||
+	    put_user(status.delay, &src->delay) ||
+	    put_user(status.avail, &src->avail) ||
+	    put_user(status.avail_max, &src->avail_max) ||
+	    put_user(status.overrange, &src->overrange) ||
+	    put_user(status.suspended_state, &src->suspended_state) ||
+	    put_user(status.audio_tstamp_data, &src->audio_tstamp_data) ||
+	    put_timespec(&status.audio_tstamp, &src->audio_tstamp) ||
+	    put_timespec(&status.driver_tstamp, &src->driver_tstamp) ||
+	    put_user(status.audio_tstamp_accuracy, &src->audio_tstamp_accuracy))
+		return -EFAULT;
+
+	return err;
+}
+#endif /* CONFIG_X86_X32 */
+
 /* both for HW_PARAMS and HW_REFINE */
 static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
 					  int refine, 
@@ -469,6 +542,93 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_pcm_mmap_status_x32 {
+	s32 state;
+	s32 pad1;
+	u32 hw_ptr;
+	u32 pad2; /* alignment */
+	struct timespec tstamp;
+	s32 suspended_state;
+	struct timespec audio_tstamp;
+} __packed;
+
+struct snd_pcm_mmap_control_x32 {
+	u32 appl_ptr;
+	u32 avail_min;
+};
+
+struct snd_pcm_sync_ptr_x32 {
+	u32 flags;
+	u32 rsvd; /* alignment */
+	union {
+		struct snd_pcm_mmap_status_x32 status;
+		unsigned char reserved[64];
+	} s;
+	union {
+		struct snd_pcm_mmap_control_x32 control;
+		unsigned char reserved[64];
+	} c;
+} __packed;
+
+static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
+				      struct snd_pcm_sync_ptr_x32 __user *src)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	volatile struct snd_pcm_mmap_status *status;
+	volatile struct snd_pcm_mmap_control *control;
+	u32 sflags;
+	struct snd_pcm_mmap_control scontrol;
+	struct snd_pcm_mmap_status sstatus;
+	snd_pcm_uframes_t boundary;
+	int err;
+
+	if (snd_BUG_ON(!runtime))
+		return -EINVAL;
+
+	if (get_user(sflags, &src->flags) ||
+	    get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
+	    get_user(scontrol.avail_min, &src->c.control.avail_min))
+		return -EFAULT;
+	if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
+		err = snd_pcm_hwsync(substream);
+		if (err < 0)
+			return err;
+	}
+	status = runtime->status;
+	control = runtime->control;
+	boundary = recalculate_boundary(runtime);
+	if (!boundary)
+		boundary = 0x7fffffff;
+	snd_pcm_stream_lock_irq(substream);
+	/* FIXME: we should consider the boundary for the sync from app */
+	if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
+		control->appl_ptr = scontrol.appl_ptr;
+	else
+		scontrol.appl_ptr = control->appl_ptr % boundary;
+	if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
+		control->avail_min = scontrol.avail_min;
+	else
+		scontrol.avail_min = control->avail_min;
+	sstatus.state = status->state;
+	sstatus.hw_ptr = status->hw_ptr % boundary;
+	sstatus.tstamp = status->tstamp;
+	sstatus.suspended_state = status->suspended_state;
+	sstatus.audio_tstamp = status->audio_tstamp;
+	snd_pcm_stream_unlock_irq(substream);
+	if (put_user(sstatus.state, &src->s.status.state) ||
+	    put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
+	    put_timespec(&sstatus.tstamp, &src->s.status.tstamp) ||
+	    put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
+	    put_timespec(&sstatus.audio_tstamp, &src->s.status.audio_tstamp) ||
+	    put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
+	    put_user(scontrol.avail_min, &src->c.control.avail_min))
+		return -EFAULT;
+
+	return 0;
+}
+#endif /* CONFIG_X86_X32 */
 
 /*
  */
@@ -487,7 +647,12 @@
 	SNDRV_PCM_IOCTL_WRITEN_FRAMES32 = _IOW('A', 0x52, struct snd_xfern32),
 	SNDRV_PCM_IOCTL_READN_FRAMES32 = _IOR('A', 0x53, struct snd_xfern32),
 	SNDRV_PCM_IOCTL_SYNC_PTR32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr32),
-
+#ifdef CONFIG_X86_X32
+	SNDRV_PCM_IOCTL_CHANNEL_INFO_X32 = _IOR('A', 0x32, struct snd_pcm_channel_info),
+	SNDRV_PCM_IOCTL_STATUS_X32 = _IOR('A', 0x20, struct snd_pcm_status_x32),
+	SNDRV_PCM_IOCTL_STATUS_EXT_X32 = _IOWR('A', 0x24, struct snd_pcm_status_x32),
+	SNDRV_PCM_IOCTL_SYNC_PTR_X32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr_x32),
+#endif /* CONFIG_X86_X32 */
 };
 
 static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -559,6 +724,16 @@
 		return snd_pcm_ioctl_rewind_compat(substream, argp);
 	case SNDRV_PCM_IOCTL_FORWARD32:
 		return snd_pcm_ioctl_forward_compat(substream, argp);
+#ifdef CONFIG_X86_X32
+	case SNDRV_PCM_IOCTL_STATUS_X32:
+		return snd_pcm_status_user_x32(substream, argp, false);
+	case SNDRV_PCM_IOCTL_STATUS_EXT_X32:
+		return snd_pcm_status_user_x32(substream, argp, true);
+	case SNDRV_PCM_IOCTL_SYNC_PTR_X32:
+		return snd_pcm_ioctl_sync_ptr_x32(substream, argp);
+	case SNDRV_PCM_IOCTL_CHANNEL_INFO_X32:
+		return snd_pcm_ioctl_channel_info_x32(substream, argp);
+#endif /* CONFIG_X86_X32 */
 	}
 
 	return -ENOIOCTLCMD;

diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c
index 5268c1f..f69764d 100644
--- a/sound/core/rawmidi_compat.c
+++ b/sound/core/rawmidi_compat.c

@@ -85,8 +85,7 @@
 	if (err < 0)
 		return err;
 
-	if (put_user(status.tstamp.tv_sec, &src->tstamp.tv_sec) ||
-	    put_user(status.tstamp.tv_nsec, &src->tstamp.tv_nsec) ||
+	if (compat_put_timespec(&status.tstamp, &src->tstamp) ||
 	    put_user(status.avail, &src->avail) ||
 	    put_user(status.xruns, &src->xruns))
 		return -EFAULT;
@@ -94,9 +93,58 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_rawmidi_status_x32 {
+	s32 stream;
+	u32 rsvd; /* alignment */
+	struct timespec tstamp;
+	u32 avail;
+	u32 xruns;
+	unsigned char reserved[16];
+} __attribute__((packed));
+
+#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst))
+
+static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile,
+					struct snd_rawmidi_status_x32 __user *src)
+{
+	int err;
+	struct snd_rawmidi_status status;
+
+	if (rfile->output == NULL)
+		return -EINVAL;
+	if (get_user(status.stream, &src->stream))
+		return -EFAULT;
+
+	switch (status.stream) {
+	case SNDRV_RAWMIDI_STREAM_OUTPUT:
+		err = snd_rawmidi_output_status(rfile->output, &status);
+		break;
+	case SNDRV_RAWMIDI_STREAM_INPUT:
+		err = snd_rawmidi_input_status(rfile->input, &status);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err < 0)
+		return err;
+
+	if (put_timespec(&status.tstamp, &src->tstamp) ||
+	    put_user(status.avail, &src->avail) ||
+	    put_user(status.xruns, &src->xruns))
+		return -EFAULT;
+
+	return 0;
+}
+#endif /* CONFIG_X86_X32 */
+
 enum {
 	SNDRV_RAWMIDI_IOCTL_PARAMS32 = _IOWR('W', 0x10, struct snd_rawmidi_params32),
 	SNDRV_RAWMIDI_IOCTL_STATUS32 = _IOWR('W', 0x20, struct snd_rawmidi_status32),
+#ifdef CONFIG_X86_X32
+	SNDRV_RAWMIDI_IOCTL_STATUS_X32 = _IOWR('W', 0x20, struct snd_rawmidi_status_x32),
+#endif /* CONFIG_X86_X32 */
 };
 
 static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -115,6 +163,10 @@
 		return snd_rawmidi_ioctl_params_compat(rfile, argp);
 	case SNDRV_RAWMIDI_IOCTL_STATUS32:
 		return snd_rawmidi_ioctl_status_compat(rfile, argp);
+#ifdef CONFIG_X86_X32
+	case SNDRV_RAWMIDI_IOCTL_STATUS_X32:
+		return snd_rawmidi_ioctl_status_x32(rfile, argp);
+#endif /* CONFIG_X86_X32 */
 	}
 	return -ENOIOCTLCMD;
 }

diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c
index 8db156b..8cdf489 100644
--- a/sound/core/seq/oss/seq_oss.c
+++ b/sound/core/seq/oss/seq_oss.c

@@ -149,8 +149,6 @@
 	if ((dp = file->private_data) == NULL)
 		return 0;
 
-	snd_seq_oss_drain_write(dp);
-
 	mutex_lock(&register_mutex);
 	snd_seq_oss_release(dp);
 	mutex_unlock(&register_mutex);

diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h
index b439243..d7b4d01 100644
--- a/sound/core/seq/oss/seq_oss_device.h
+++ b/sound/core/seq/oss/seq_oss_device.h

@@ -127,7 +127,6 @@
 unsigned int snd_seq_oss_poll(struct seq_oss_devinfo *dp, struct file *file, poll_table * wait);
 
 void snd_seq_oss_reset(struct seq_oss_devinfo *dp);
-void snd_seq_oss_drain_write(struct seq_oss_devinfo *dp);
 
 /* */
 void snd_seq_oss_process_queue(struct seq_oss_devinfo *dp, abstime_t time);

diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
index 6779e82b..92c96a9 100644
--- a/sound/core/seq/oss/seq_oss_init.c
+++ b/sound/core/seq/oss/seq_oss_init.c

@@ -436,22 +436,6 @@
 
 
 /*
- * Wait until the queue is empty (if we don't have nonblock)
- */
-void
-snd_seq_oss_drain_write(struct seq_oss_devinfo *dp)
-{
-	if (! dp->timer->running)
-		return;
-	if (is_write_mode(dp->file_mode) && !is_nonblock_mode(dp->file_mode) &&
-	    dp->writeq) {
-		while (snd_seq_oss_writeq_sync(dp->writeq))
-			;
-	}
-}
-
-
-/*
  * reset sequencer devices
  */
 void

diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c
index e05802a..2e90822 100644
--- a/sound/core/timer_compat.c
+++ b/sound/core/timer_compat.c

@@ -70,13 +70,14 @@
 					struct snd_timer_status32 __user *_status)
 {
 	struct snd_timer_user *tu;
-	struct snd_timer_status status;
+	struct snd_timer_status32 status;
 	
 	tu = file->private_data;
 	if (snd_BUG_ON(!tu->timeri))
 		return -ENXIO;
 	memset(&status, 0, sizeof(status));
-	status.tstamp = tu->tstamp;
+	status.tstamp.tv_sec = tu->tstamp.tv_sec;
+	status.tstamp.tv_nsec = tu->tstamp.tv_nsec;
 	status.resolution = snd_timer_resolution(tu->timeri);
 	status.lost = tu->timeri->lost;
 	status.overrun = tu->overrun;
@@ -88,12 +89,21 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_X32
+/* X32 ABI has the same struct as x86-64 */
+#define snd_timer_user_status_x32(file, s) \
+	snd_timer_user_status(file, s)
+#endif /* CONFIG_X86_X32 */
+
 /*
  */
 
 enum {
 	SNDRV_TIMER_IOCTL_INFO32 = _IOR('T', 0x11, struct snd_timer_info32),
 	SNDRV_TIMER_IOCTL_STATUS32 = _IOW('T', 0x14, struct snd_timer_status32),
+#ifdef CONFIG_X86_X32
+	SNDRV_TIMER_IOCTL_STATUS_X32 = _IOW('T', 0x14, struct snd_timer_status),
+#endif /* CONFIG_X86_X32 */
 };
 
 static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -122,6 +132,10 @@
 		return snd_timer_user_info_compat(file, argp);
 	case SNDRV_TIMER_IOCTL_STATUS32:
 		return snd_timer_user_status_compat(file, argp);
+#ifdef CONFIG_X86_X32
+	case SNDRV_TIMER_IOCTL_STATUS_X32:
+		return snd_timer_user_status_x32(file, argp);
+#endif /* CONFIG_X86_X32 */
 	}
 	return -ENOIOCTLCMD;
 }

diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index b5a17cb..8c48623 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c

@@ -426,18 +426,22 @@
  * @bus: HD-audio core bus
  * @status: INTSTS register value
  * @ask: callback to be called for woken streams
+ *
+ * Returns the bits of handled streams, or zero if no stream is handled.
  */
-void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
+int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
 				    void (*ack)(struct hdac_bus *,
 						struct hdac_stream *))
 {
 	struct hdac_stream *azx_dev;
 	u8 sd_status;
+	int handled = 0;
 
 	list_for_each_entry(azx_dev, &bus->stream_list, list) {
 		if (status & azx_dev->sd_int_sta_mask) {
 			sd_status = snd_hdac_stream_readb(azx_dev, SD_STS);
 			snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK);
+			handled |= 1 << azx_dev->index;
 			if (!azx_dev->substream || !azx_dev->running ||
 			    !(sd_status & SD_INT_COMPLETE))
 				continue;
@@ -445,6 +449,7 @@
 				ack(bus, azx_dev);
 		}
 	}
+	return handled;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_bus_handle_stream_irq);
 

diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 37cf9ce..27de801 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c

@@ -930,6 +930,8 @@
 	struct azx *chip = dev_id;
 	struct hdac_bus *bus = azx_bus(chip);
 	u32 status;
+	bool active, handled = false;
+	int repeat = 0; /* count for avoiding endless loop */
 
 #ifdef CONFIG_PM
 	if (azx_has_pm_runtime(chip))
@@ -939,33 +941,36 @@
 
 	spin_lock(&bus->reg_lock);
 
-	if (chip->disabled) {
-		spin_unlock(&bus->reg_lock);
-		return IRQ_NONE;
-	}
+	if (chip->disabled)
+		goto unlock;
 
-	status = azx_readl(chip, INTSTS);
-	if (status == 0 || status == 0xffffffff) {
-		spin_unlock(&bus->reg_lock);
-		return IRQ_NONE;
-	}
+	do {
+		status = azx_readl(chip, INTSTS);
+		if (status == 0 || status == 0xffffffff)
+			break;
 
-	snd_hdac_bus_handle_stream_irq(bus, status, stream_update);
+		handled = true;
+		active = false;
+		if (snd_hdac_bus_handle_stream_irq(bus, status, stream_update))
+			active = true;
 
-	/* clear rirb int */
-	status = azx_readb(chip, RIRBSTS);
-	if (status & RIRB_INT_MASK) {
-		if (status & RIRB_INT_RESPONSE) {
-			if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND)
-				udelay(80);
-			snd_hdac_bus_update_rirb(bus);
+		/* clear rirb int */
+		status = azx_readb(chip, RIRBSTS);
+		if (status & RIRB_INT_MASK) {
+			active = true;
+			if (status & RIRB_INT_RESPONSE) {
+				if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND)
+					udelay(80);
+				snd_hdac_bus_update_rirb(bus);
+			}
+			azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
 		}
-		azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
-	}
+	} while (active && ++repeat < 10);
 
+ unlock:
 	spin_unlock(&bus->reg_lock);
 
-	return IRQ_HANDLED;
+	return IRQ_RETVAL(handled);
 }
 EXPORT_SYMBOL_GPL(azx_interrupt);
 

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index ce6b97f..e5240cb 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c

@@ -363,7 +363,10 @@
 					((pci)->device == 0x0d0c) || \
 					((pci)->device == 0x160c))
 
-#define IS_BROXTON(pci)	((pci)->device == 0x5a98)
+#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
+#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
+#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
 
 static char *driver_short_names[] = {
 	[AZX_DRIVER_ICH] = "HDA Intel",
@@ -540,13 +543,13 @@
 
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, true);
-	if (IS_BROXTON(pci)) {
+	if (IS_SKL_PLUS(pci)) {
 		pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
 		val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
 		pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
 	}
 	azx_init_chip(chip, full_reset);
-	if (IS_BROXTON(pci)) {
+	if (IS_SKL_PLUS(pci)) {
 		pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
 		val = val | INTEL_HDA_CGCTL_MISCBDCGE;
 		pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
@@ -555,7 +558,7 @@
 		snd_hdac_set_codec_wakeup(bus, false);
 
 	/* reduce dma latency to avoid noise */
-	if (IS_BROXTON(pci))
+	if (IS_BXT(pci))
 		bxt_reduce_dma_latency(chip);
 }
 
@@ -977,11 +980,6 @@
 /* put codec down to D3 at hibernation for Intel SKL+;
  * otherwise BIOS may still access the codec and screw up the driver
  */
-#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
-#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
-#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
-
 static int azx_freeze_noirq(struct device *dev)
 {
 	struct pci_dev *pci = to_pci_dev(dev);

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 8ee78db..bcbc4ee 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c

@@ -2477,13 +2477,6 @@
 			is_broxton(codec))
 		codec->core.link_power_control = 1;
 
-	if (codec_has_acomp(codec)) {
-		codec->depop_delay = 0;
-		spec->i915_audio_ops.audio_ptr = codec;
-		spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
-		snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
-	}
-
 	if (hdmi_parse_codec(codec) < 0) {
 		if (spec->i915_bound)
 			snd_hdac_i915_exit(&codec->bus->core);
@@ -2505,6 +2498,18 @@
 
 	init_channel_allocations();
 
+	if (codec_has_acomp(codec)) {
+		codec->depop_delay = 0;
+		spec->i915_audio_ops.audio_ptr = codec;
+		/* intel_audio_codec_enable() or intel_audio_codec_disable()
+		 * will call pin_eld_notify with using audio_ptr pointer
+		 * We need make sure audio_ptr is really setup
+		 */
+		wmb();
+		spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
+		snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
+	}
+
 	return 0;
 }
 

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index efd4980..93d2156 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -3801,6 +3801,10 @@
 
 static void alc_headset_mode_default(struct hda_codec *codec)
 {
+	static struct coef_fw coef0225[] = {
+		UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+		{}
+	};
 	static struct coef_fw coef0255[] = {
 		WRITE_COEF(0x45, 0xc089),
 		WRITE_COEF(0x45, 0xc489),
@@ -3842,6 +3846,9 @@
 	};
 
 	switch (codec->core.vendor_id) {
+	case 0x10ec0225:
+		alc_process_coef_fw(codec, coef0225);
+		break;
 	case 0x10ec0255:
 	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
@@ -4749,6 +4756,9 @@
 	ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
 	ALC293_FIXUP_LENOVO_SPK_NOISE,
 	ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
+	ALC255_FIXUP_DELL_SPK_NOISE,
+	ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+	ALC280_FIXUP_HP_HEADSET_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5368,6 +5378,29 @@
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc233_fixup_lenovo_line2_mic_hotkey,
 	},
+	[ALC255_FIXUP_DELL_SPK_NOISE] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_disable_aamix,
+		.chained = true,
+		.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
+	[ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_VERBS,
+		.v.verbs = (const struct hda_verb[]) {
+			/* Disable pass-through path for FRONT 14h */
+			{ 0x20, AC_VERB_SET_COEF_INDEX, 0x36 },
+			{ 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 },
+			{}
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
+	[ALC280_FIXUP_HP_HEADSET_MIC] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_disable_aamix,
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MIC,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5379,6 +5412,7 @@
 	SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK),
 	SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK),
+	SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
 	SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
 	SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
 	SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
@@ -5410,6 +5444,7 @@
 	SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
 	SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
 	SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+	SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -5470,6 +5505,7 @@
 	SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+	SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -5638,10 +5674,10 @@
 	{0x21, 0x03211020}
 
 static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
-	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC225_STANDARD_PINS,
 		{0x14, 0x901701a0}),
-	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+	SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC225_STANDARD_PINS,
 		{0x14, 0x901701b0}),
 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,

diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 2875b4f..7c8941b 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c

@@ -2879,7 +2879,7 @@
 {
 	struct hdsp *hdsp = snd_kcontrol_chip(kcontrol);
 
-	ucontrol->value.enumerated.item[0] = hdsp_dds_offset(hdsp);
+	ucontrol->value.integer.value[0] = hdsp_dds_offset(hdsp);
 	return 0;
 }
 
@@ -2891,7 +2891,7 @@
 
 	if (!snd_hdsp_use_is_exclusive(hdsp))
 		return -EBUSY;
-	val = ucontrol->value.enumerated.item[0];
+	val = ucontrol->value.integer.value[0];
 	spin_lock_irq(&hdsp->lock);
 	if (val != hdsp_dds_offset(hdsp))
 		change = (hdsp_set_dds_offset(hdsp, val) == 0) ? 1 : 0;

diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index 8bc8016..a4a999a 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c

@@ -1601,6 +1601,9 @@
 {
 	u64 n;
 
+	if (snd_BUG_ON(rate <= 0))
+		return;
+
 	if (rate >= 112000)
 		rate /= 4;
 	else if (rate >= 56000)
@@ -2215,6 +2218,8 @@
 		} else {
 			/* slave mode, return external sample rate */
 			rate = hdspm_external_sample_rate(hdspm);
+			if (!rate)
+				rate = hdspm->system_sample_rate;
 		}
 	}
 
@@ -2260,8 +2265,11 @@
 					    ucontrol)
 {
 	struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
+	int rate = ucontrol->value.integer.value[0];
 
-	hdspm_set_dds_value(hdspm, ucontrol->value.enumerated.item[0]);
+	if (rate < 27000 || rate > 207000)
+		return -EINVAL;
+	hdspm_set_dds_value(hdspm, ucontrol->value.integer.value[0]);
 	return 0;
 }
 
@@ -4449,7 +4457,7 @@
 {
 	struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
 
-	ucontrol->value.enumerated.item[0] = hdspm->tco->term;
+	ucontrol->value.integer.value[0] = hdspm->tco->term;
 
 	return 0;
 }
@@ -4460,8 +4468,8 @@
 {
 	struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
 
-	if (hdspm->tco->term != ucontrol->value.enumerated.item[0]) {
-		hdspm->tco->term = ucontrol->value.enumerated.item[0];
+	if (hdspm->tco->term != ucontrol->value.integer.value[0]) {
+		hdspm->tco->term = ucontrol->value.integer.value[0];
 
 		hdspm_tco_write(hdspm);
 

diff --git a/sound/soc/codecs/ab8500-codec.c b/sound/soc/codecs/ab8500-codec.c
index affb192..faae693 100644
--- a/sound/soc/codecs/ab8500-codec.c
+++ b/sound/soc/codecs/ab8500-codec.c

@@ -1130,7 +1130,7 @@
 	struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(codec->dev);
 
 	mutex_lock(&drvdata->ctrl_lock);
-	ucontrol->value.integer.value[0] = drvdata->sid_status;
+	ucontrol->value.enumerated.item[0] = drvdata->sid_status;
 	mutex_unlock(&drvdata->ctrl_lock);
 
 	return 0;
@@ -1147,7 +1147,7 @@
 
 	dev_dbg(codec->dev, "%s: Enter\n", __func__);
 
-	if (ucontrol->value.integer.value[0] != SID_APPLY_FIR) {
+	if (ucontrol->value.enumerated.item[0] != SID_APPLY_FIR) {
 		dev_err(codec->dev,
 			"%s: ERROR: This control supports '%s' only!\n",
 			__func__, enum_sid_state[SID_APPLY_FIR]);
@@ -1199,7 +1199,7 @@
 	struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(codec->dev);
 
 	mutex_lock(&drvdata->ctrl_lock);
-	ucontrol->value.integer.value[0] = drvdata->anc_status;
+	ucontrol->value.enumerated.item[0] = drvdata->anc_status;
 	mutex_unlock(&drvdata->ctrl_lock);
 
 	return 0;
@@ -1220,7 +1220,7 @@
 
 	mutex_lock(&drvdata->ctrl_lock);
 
-	req = ucontrol->value.integer.value[0];
+	req = ucontrol->value.enumerated.item[0];
 	if (req >= ARRAY_SIZE(enum_anc_state)) {
 		status = -EINVAL;
 		goto cleanup;

diff --git a/sound/soc/codecs/adau17x1.h b/sound/soc/codecs/adau17x1.h
index e13583e..5ae87a0 100644
--- a/sound/soc/codecs/adau17x1.h
+++ b/sound/soc/codecs/adau17x1.h

@@ -103,9 +103,9 @@
 #define ADAU17X1_CLOCK_CONTROL_CORECLK_SRC_PLL	BIT(3)
 #define ADAU17X1_CLOCK_CONTROL_SYSCLK_EN	BIT(0)
 
-#define ADAU17X1_SERIAL_PORT1_BCLK32		(0x0 << 5)
-#define ADAU17X1_SERIAL_PORT1_BCLK48		(0x1 << 5)
-#define ADAU17X1_SERIAL_PORT1_BCLK64		(0x2 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK64		(0x0 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK32		(0x1 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK48		(0x2 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK128		(0x3 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK256		(0x4 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK_MASK		(0x7 << 5)

diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c
index b395152..35488f1 100644
--- a/sound/soc/codecs/cs42l51.c
+++ b/sound/soc/codecs/cs42l51.c

@@ -60,15 +60,15 @@
 	switch (value) {
 	default:
 	case 0:
-		ucontrol->value.integer.value[0] = 0;
+		ucontrol->value.enumerated.item[0] = 0;
 		break;
 	/* same value : (L+R)/2 and (R+L)/2 */
 	case 1:
 	case 2:
-		ucontrol->value.integer.value[0] = 1;
+		ucontrol->value.enumerated.item[0] = 1;
 		break;
 	case 3:
-		ucontrol->value.integer.value[0] = 2;
+		ucontrol->value.enumerated.item[0] = 2;
 		break;
 	}
 
@@ -85,7 +85,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	unsigned char val;
 
-	switch (ucontrol->value.integer.value[0]) {
+	switch (ucontrol->value.enumerated.item[0]) {
 	default:
 	case 0:
 		val = CHAN_MIX_NORMAL;

diff --git a/sound/soc/codecs/da732x.c b/sound/soc/codecs/da732x.c
index 1d5a89c..461506a 100644
--- a/sound/soc/codecs/da732x.c
+++ b/sound/soc/codecs/da732x.c

@@ -334,7 +334,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct soc_enum *enum_ctrl = (struct soc_enum *)kcontrol->private_value;
 	unsigned int reg = enum_ctrl->reg;
-	unsigned int sel = ucontrol->value.integer.value[0];
+	unsigned int sel = ucontrol->value.enumerated.item[0];
 	unsigned int bits;
 
 	switch (sel) {
@@ -368,13 +368,13 @@
 
 	switch (val) {
 	case DA732X_HPF_VOICE_EN:
-		ucontrol->value.integer.value[0] = DA732X_HPF_VOICE;
+		ucontrol->value.enumerated.item[0] = DA732X_HPF_VOICE;
 		break;
 	case DA732X_HPF_MUSIC_EN:
-		ucontrol->value.integer.value[0] = DA732X_HPF_MUSIC;
+		ucontrol->value.enumerated.item[0] = DA732X_HPF_MUSIC;
 		break;
 	default:
-		ucontrol->value.integer.value[0] = DA732X_HPF_DISABLED;
+		ucontrol->value.enumerated.item[0] = DA732X_HPF_DISABLED;
 		break;
 	}
 

diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index 20dcc49..fc22804 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c

@@ -1496,7 +1496,7 @@
        struct max98088_pdata *pdata = max98088->pdata;
        int channel = max98088_get_channel(codec, kcontrol->id.name);
        struct max98088_cdata *cdata;
-       int sel = ucontrol->value.integer.value[0];
+	int sel = ucontrol->value.enumerated.item[0];
 
        if (channel < 0)
 	       return channel;

diff --git a/sound/soc/codecs/max98095.c b/sound/soc/codecs/max98095.c
index 1fedac5..3577003 100644
--- a/sound/soc/codecs/max98095.c
+++ b/sound/soc/codecs/max98095.c

@@ -1499,7 +1499,7 @@
 	struct max98095_pdata *pdata = max98095->pdata;
 	int channel = max98095_get_eq_channel(kcontrol->id.name);
 	struct max98095_cdata *cdata;
-	unsigned int sel = ucontrol->value.integer.value[0];
+	unsigned int sel = ucontrol->value.enumerated.item[0];
 	struct max98095_eq_cfg *coef_set;
 	int fs, best, best_val, i;
 	int regmask, regsave;
@@ -1653,7 +1653,7 @@
 	struct max98095_pdata *pdata = max98095->pdata;
 	int channel = max98095_get_bq_channel(codec, kcontrol->id.name);
 	struct max98095_cdata *cdata;
-	unsigned int sel = ucontrol->value.integer.value[0];
+	unsigned int sel = ucontrol->value.enumerated.item[0];
 	struct max98095_biquad_cfg *coef_set;
 	int fs, best, best_val, i;
 	int regmask, regsave;

diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c
index 781398fb..f7a6ce7 100644
--- a/sound/soc/codecs/tlv320dac33.c
+++ b/sound/soc/codecs/tlv320dac33.c

@@ -446,7 +446,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec);
 
-	ucontrol->value.integer.value[0] = dac33->fifo_mode;
+	ucontrol->value.enumerated.item[0] = dac33->fifo_mode;
 
 	return 0;
 }
@@ -458,17 +458,16 @@
 	struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec);
 	int ret = 0;
 
-	if (dac33->fifo_mode == ucontrol->value.integer.value[0])
+	if (dac33->fifo_mode == ucontrol->value.enumerated.item[0])
 		return 0;
 	/* Do not allow changes while stream is running*/
 	if (snd_soc_codec_is_active(codec))
 		return -EPERM;
 
-	if (ucontrol->value.integer.value[0] < 0 ||
-	    ucontrol->value.integer.value[0] >= DAC33_FIFO_LAST_MODE)
+	if (ucontrol->value.enumerated.item[0] >= DAC33_FIFO_LAST_MODE)
 		ret = -EINVAL;
 	else
-		dac33->fifo_mode = ucontrol->value.integer.value[0];
+		dac33->fifo_mode = ucontrol->value.enumerated.item[0];
 
 	return ret;
 }

diff --git a/sound/soc/codecs/wl1273.c b/sound/soc/codecs/wl1273.c
index 7693c11..1b79778 100644
--- a/sound/soc/codecs/wl1273.c
+++ b/sound/soc/codecs/wl1273.c

@@ -175,7 +175,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wl1273_priv *wl1273 = snd_soc_codec_get_drvdata(codec);
 
-	ucontrol->value.integer.value[0] = wl1273->mode;
+	ucontrol->value.enumerated.item[0] = wl1273->mode;
 
 	return 0;
 }
@@ -193,18 +193,17 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wl1273_priv *wl1273 = snd_soc_codec_get_drvdata(codec);
 
-	if (wl1273->mode == ucontrol->value.integer.value[0])
+	if (wl1273->mode == ucontrol->value.enumerated.item[0])
 		return 0;
 
 	/* Do not allow changes while stream is running */
 	if (snd_soc_codec_is_active(codec))
 		return -EPERM;
 
-	if (ucontrol->value.integer.value[0] < 0 ||
-	    ucontrol->value.integer.value[0] >=  ARRAY_SIZE(wl1273_audio_route))
+	if (ucontrol->value.enumerated.item[0] >=  ARRAY_SIZE(wl1273_audio_route))
 		return -EINVAL;
 
-	wl1273->mode = ucontrol->value.integer.value[0];
+	wl1273->mode = ucontrol->value.enumerated.item[0];
 
 	return 1;
 }
@@ -219,7 +218,7 @@
 
 	dev_dbg(codec->dev, "%s: enter.\n", __func__);
 
-	ucontrol->value.integer.value[0] = wl1273->core->audio_mode;
+	ucontrol->value.enumerated.item[0] = wl1273->core->audio_mode;
 
 	return 0;
 }
@@ -233,7 +232,7 @@
 
 	dev_dbg(codec->dev, "%s: enter.\n", __func__);
 
-	val = ucontrol->value.integer.value[0];
+	val = ucontrol->value.enumerated.item[0];
 	if (wl1273->core->audio_mode == val)
 		return 0;
 

diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index 61299ca..6f1024f 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c

@@ -233,7 +233,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 
-	ucontrol->value.integer.value[0] = wm8753->dai_func;
+	ucontrol->value.enumerated.item[0] = wm8753->dai_func;
 	return 0;
 }
 
@@ -244,7 +244,7 @@
 	struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 	u16 ioctl;
 
-	if (wm8753->dai_func == ucontrol->value.integer.value[0])
+	if (wm8753->dai_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
 	if (snd_soc_codec_is_active(codec))
@@ -252,7 +252,7 @@
 
 	ioctl = snd_soc_read(codec, WM8753_IOCTL);
 
-	wm8753->dai_func = ucontrol->value.integer.value[0];
+	wm8753->dai_func = ucontrol->value.enumerated.item[0];
 
 	if (((ioctl >> 2) & 0x3) == wm8753->dai_func)
 		return 1;

diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index 8172e49..edd7a77 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c

@@ -396,7 +396,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
 	struct wm8904_pdata *pdata = wm8904->pdata;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (value >= pdata->num_drc_cfgs)
 		return -EINVAL;
@@ -467,7 +467,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
 	struct wm8904_pdata *pdata = wm8904->pdata;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (value >= pdata->num_retune_mobile_cfgs)
 		return -EINVAL;

diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
index c799cca..6b864c0 100644
--- a/sound/soc/codecs/wm8958-dsp2.c
+++ b/sound/soc/codecs/wm8958-dsp2.c

@@ -459,7 +459,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 	int reg;
 
 	/* Don't allow on the fly reconfiguration */
@@ -549,7 +549,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 	int reg;
 
 	/* Don't allow on the fly reconfiguration */
@@ -582,7 +582,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 	int reg;
 
 	/* Don't allow on the fly reconfiguration */
@@ -749,7 +749,7 @@
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 	int reg;
 
 	/* Don't allow on the fly reconfiguration */

diff --git a/sound/soc/codecs/wm8983.c b/sound/soc/codecs/wm8983.c
index 7350ff6..0c002a5 100644
--- a/sound/soc/codecs/wm8983.c
+++ b/sound/soc/codecs/wm8983.c

@@ -497,9 +497,9 @@
 
 	reg = snd_soc_read(codec, WM8983_EQ1_LOW_SHELF);
 	if (reg & WM8983_EQ3DMODE)
-		ucontrol->value.integer.value[0] = 1;
+		ucontrol->value.enumerated.item[0] = 1;
 	else
-		ucontrol->value.integer.value[0] = 0;
+		ucontrol->value.enumerated.item[0] = 0;
 
 	return 0;
 }
@@ -511,18 +511,18 @@
 	unsigned int regpwr2, regpwr3;
 	unsigned int reg_eq;
 
-	if (ucontrol->value.integer.value[0] != 0
-	    && ucontrol->value.integer.value[0] != 1)
+	if (ucontrol->value.enumerated.item[0] != 0
+	    && ucontrol->value.enumerated.item[0] != 1)
 		return -EINVAL;
 
 	reg_eq = snd_soc_read(codec, WM8983_EQ1_LOW_SHELF);
 	switch ((reg_eq & WM8983_EQ3DMODE) >> WM8983_EQ3DMODE_SHIFT) {
 	case 0:
-		if (!ucontrol->value.integer.value[0])
+		if (!ucontrol->value.enumerated.item[0])
 			return 0;
 		break;
 	case 1:
-		if (ucontrol->value.integer.value[0])
+		if (ucontrol->value.enumerated.item[0])
 			return 0;
 		break;
 	}
@@ -537,7 +537,7 @@
 	/* set the desired eqmode */
 	snd_soc_update_bits(codec, WM8983_EQ1_LOW_SHELF,
 			    WM8983_EQ3DMODE_MASK,
-			    ucontrol->value.integer.value[0]
+			    ucontrol->value.enumerated.item[0]
 			    << WM8983_EQ3DMODE_SHIFT);
 	/* restore DAC/ADC configuration */
 	snd_soc_write(codec, WM8983_POWER_MANAGEMENT_2, regpwr2);

diff --git a/sound/soc/codecs/wm8985.c b/sound/soc/codecs/wm8985.c
index 9918152..6ac76fe 100644
--- a/sound/soc/codecs/wm8985.c
+++ b/sound/soc/codecs/wm8985.c

@@ -531,9 +531,9 @@
 
 	reg = snd_soc_read(codec, WM8985_EQ1_LOW_SHELF);
 	if (reg & WM8985_EQ3DMODE)
-		ucontrol->value.integer.value[0] = 1;
+		ucontrol->value.enumerated.item[0] = 1;
 	else
-		ucontrol->value.integer.value[0] = 0;
+		ucontrol->value.enumerated.item[0] = 0;
 
 	return 0;
 }
@@ -545,18 +545,18 @@
 	unsigned int regpwr2, regpwr3;
 	unsigned int reg_eq;
 
-	if (ucontrol->value.integer.value[0] != 0
-			&& ucontrol->value.integer.value[0] != 1)
+	if (ucontrol->value.enumerated.item[0] != 0
+			&& ucontrol->value.enumerated.item[0] != 1)
 		return -EINVAL;
 
 	reg_eq = snd_soc_read(codec, WM8985_EQ1_LOW_SHELF);
 	switch ((reg_eq & WM8985_EQ3DMODE) >> WM8985_EQ3DMODE_SHIFT) {
 	case 0:
-		if (!ucontrol->value.integer.value[0])
+		if (!ucontrol->value.enumerated.item[0])
 			return 0;
 		break;
 	case 1:
-		if (ucontrol->value.integer.value[0])
+		if (ucontrol->value.enumerated.item[0])
 			return 0;
 		break;
 	}
@@ -573,7 +573,7 @@
 	/* set the desired eqmode */
 	snd_soc_update_bits(codec, WM8985_EQ1_LOW_SHELF,
 			    WM8985_EQ3DMODE_MASK,
-			    ucontrol->value.integer.value[0]
+			    ucontrol->value.enumerated.item[0]
 			    << WM8985_EQ3DMODE_SHIFT);
 	/* restore DAC/ADC configuration */
 	snd_soc_write(codec, WM8985_POWER_MANAGEMENT_2, regpwr2);

diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 2ccbb32..a18aecb 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c

@@ -362,7 +362,7 @@
 	struct wm8994 *control = wm8994->wm8994;
 	struct wm8994_pdata *pdata = &control->pdata;
 	int drc = wm8994_get_drc(kcontrol->id.name);
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (drc < 0)
 		return drc;
@@ -469,7 +469,7 @@
 	struct wm8994 *control = wm8994->wm8994;
 	struct wm8994_pdata *pdata = &control->pdata;
 	int block = wm8994_get_retune_mobile_block(kcontrol->id.name);
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (block < 0)
 		return block;

diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c
index 8d7d6c0..f99b34f 100644
--- a/sound/soc/codecs/wm8996.c
+++ b/sound/soc/codecs/wm8996.c

@@ -416,7 +416,7 @@
 	struct wm8996_priv *wm8996 = snd_soc_codec_get_drvdata(codec);
 	struct wm8996_pdata *pdata = &wm8996->pdata;
 	int block = wm8996_get_retune_mobile_block(kcontrol->id.name);
-	int value = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.enumerated.item[0];
 
 	if (block < 0)
 		return block;

diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index ccb3b15..363b3b6 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c

@@ -344,9 +344,9 @@
 
 	reg = snd_soc_read(codec, WM9081_ANALOGUE_SPEAKER_2);
 	if (reg & WM9081_SPK_MODE)
-		ucontrol->value.integer.value[0] = 1;
+		ucontrol->value.enumerated.item[0] = 1;
 	else
-		ucontrol->value.integer.value[0] = 0;
+		ucontrol->value.enumerated.item[0] = 0;
 
 	return 0;
 }
@@ -365,7 +365,7 @@
 	unsigned int reg2 = snd_soc_read(codec, WM9081_ANALOGUE_SPEAKER_2);
 
 	/* Are we changing anything? */
-	if (ucontrol->value.integer.value[0] ==
+	if (ucontrol->value.enumerated.item[0] ==
 	    ((reg2 & WM9081_SPK_MODE) != 0))
 		return 0;
 
@@ -373,7 +373,7 @@
 	if (reg_pwr & WM9081_SPK_ENA)
 		return -EINVAL;
 
-	if (ucontrol->value.integer.value[0]) {
+	if (ucontrol->value.enumerated.item[0]) {
 		/* Class AB */
 		reg2 &= ~(WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL);
 		reg2 |= WM9081_SPK_MODE;

diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c
index 79e1436..9849643 100644
--- a/sound/soc/codecs/wm9713.c
+++ b/sound/soc/codecs/wm9713.c

@@ -1212,7 +1212,7 @@
 	if (IS_ERR(wm9713->ac97))
 		return PTR_ERR(wm9713->ac97);
 
-	regmap = devm_regmap_init_ac97(wm9713->ac97, &wm9713_regmap_config);
+	regmap = regmap_init_ac97(wm9713->ac97, &wm9713_regmap_config);
 	if (IS_ERR(regmap)) {
 		snd_soc_free_ac97_codec(wm9713->ac97);
 		return PTR_ERR(regmap);

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 33806d4..b9195b9 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c

@@ -586,7 +586,7 @@
 	struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
 	struct wm_adsp *dsp = snd_soc_codec_get_drvdata(codec);
 
-	ucontrol->value.integer.value[0] = dsp[e->shift_l].fw;
+	ucontrol->value.enumerated.item[0] = dsp[e->shift_l].fw;
 
 	return 0;
 }
@@ -599,10 +599,10 @@
 	struct wm_adsp *dsp = snd_soc_codec_get_drvdata(codec);
 	int ret = 0;
 
-	if (ucontrol->value.integer.value[0] == dsp[e->shift_l].fw)
+	if (ucontrol->value.enumerated.item[0] == dsp[e->shift_l].fw)
 		return 0;
 
-	if (ucontrol->value.integer.value[0] >= WM_ADSP_NUM_FW)
+	if (ucontrol->value.enumerated.item[0] >= WM_ADSP_NUM_FW)
 		return -EINVAL;
 
 	mutex_lock(&dsp[e->shift_l].pwr_lock);
@@ -610,7 +610,7 @@
 	if (dsp[e->shift_l].running || dsp[e->shift_l].compr)
 		ret = -EBUSY;
 	else
-		dsp[e->shift_l].fw = ucontrol->value.integer.value[0];
+		dsp[e->shift_l].fw = ucontrol->value.enumerated.item[0];
 
 	mutex_unlock(&dsp[e->shift_l].pwr_lock);
 

diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index ed8de10..40dfd8a 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c

@@ -112,6 +112,20 @@
 	struct fsl_ssi_reg_val tx;
 };
 
+static const struct reg_default fsl_ssi_reg_defaults[] = {
+	{CCSR_SSI_SCR,     0x00000000},
+	{CCSR_SSI_SIER,    0x00003003},
+	{CCSR_SSI_STCR,    0x00000200},
+	{CCSR_SSI_SRCR,    0x00000200},
+	{CCSR_SSI_STCCR,   0x00040000},
+	{CCSR_SSI_SRCCR,   0x00040000},
+	{CCSR_SSI_SACNT,   0x00000000},
+	{CCSR_SSI_STMSK,   0x00000000},
+	{CCSR_SSI_SRMSK,   0x00000000},
+	{CCSR_SSI_SACCEN,  0x00000000},
+	{CCSR_SSI_SACCDIS, 0x00000000},
+};
+
 static bool fsl_ssi_readable_reg(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -176,7 +190,8 @@
 	.val_bits = 32,
 	.reg_stride = 4,
 	.val_format_endian = REGMAP_ENDIAN_NATIVE,
-	.num_reg_defaults_raw = CCSR_SSI_SACCDIS / sizeof(uint32_t) + 1,
+	.reg_defaults = fsl_ssi_reg_defaults,
+	.num_reg_defaults = ARRAY_SIZE(fsl_ssi_reg_defaults),
 	.readable_reg = fsl_ssi_readable_reg,
 	.volatile_reg = fsl_ssi_volatile_reg,
 	.precious_reg = fsl_ssi_precious_reg,
@@ -186,7 +201,6 @@
 
 struct fsl_ssi_soc_data {
 	bool imx;
-	bool imx21regs; /* imx21-class SSI - no SACC{ST,EN,DIS} regs */
 	bool offline_config;
 	u32 sisr_write_mask;
 };
@@ -289,7 +303,6 @@
 
 static struct fsl_ssi_soc_data fsl_ssi_imx21 = {
 	.imx = true,
-	.imx21regs = true,
 	.offline_config = true,
 	.sisr_write_mask = 0,
 };
@@ -573,12 +586,8 @@
 	 */
 	regmap_write(regs, CCSR_SSI_SACNT,
 			CCSR_SSI_SACNT_AC97EN | CCSR_SSI_SACNT_FV);
-
-	/* no SACC{ST,EN,DIS} regs on imx21-class SSI */
-	if (!ssi_private->soc->imx21regs) {
-		regmap_write(regs, CCSR_SSI_SACCDIS, 0xff);
-		regmap_write(regs, CCSR_SSI_SACCEN, 0x300);
-	}
+	regmap_write(regs, CCSR_SSI_SACCDIS, 0xff);
+	regmap_write(regs, CCSR_SSI_SACCEN, 0x300);
 
 	/*
 	 * Enable SSI, Transmit and Receive. AC97 has to communicate with the
@@ -1388,7 +1397,6 @@
 	struct resource *res;
 	void __iomem *iomem;
 	char name[64];
-	struct regmap_config regconfig = fsl_ssi_regconfig;
 
 	of_id = of_match_device(fsl_ssi_ids, &pdev->dev);
 	if (!of_id || !of_id->data)
@@ -1436,25 +1444,15 @@
 		return PTR_ERR(iomem);
 	ssi_private->ssi_phys = res->start;
 
-	if (ssi_private->soc->imx21regs) {
-		/*
-		 * According to datasheet imx21-class SSI
-		 * don't have SACC{ST,EN,DIS} regs.
-		 */
-		regconfig.max_register = CCSR_SSI_SRMSK;
-		regconfig.num_reg_defaults_raw =
-			CCSR_SSI_SRMSK / sizeof(uint32_t) + 1;
-	}
-
 	ret = of_property_match_string(np, "clock-names", "ipg");
 	if (ret < 0) {
 		ssi_private->has_ipg_clk_name = false;
 		ssi_private->regs = devm_regmap_init_mmio(&pdev->dev, iomem,
-			&regconfig);
+			&fsl_ssi_regconfig);
 	} else {
 		ssi_private->has_ipg_clk_name = true;
 		ssi_private->regs = devm_regmap_init_mmio_clk(&pdev->dev,
-			"ipg", iomem, &regconfig);
+			"ipg", iomem, &fsl_ssi_regconfig);
 	}
 	if (IS_ERR(ssi_private->regs)) {
 		dev_err(&pdev->dev, "Failed to init register map\n");

diff --git a/sound/soc/fsl/imx-pcm-fiq.c b/sound/soc/fsl/imx-pcm-fiq.c
index 49d7513..e63cd5e 100644
--- a/sound/soc/fsl/imx-pcm-fiq.c
+++ b/sound/soc/fsl/imx-pcm-fiq.c

@@ -217,8 +217,8 @@
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	int ret;
 
-	ret = dma_mmap_writecombine(substream->pcm->card->dev, vma,
-		runtime->dma_area, runtime->dma_addr, runtime->dma_bytes);
+	ret = dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+			  runtime->dma_addr, runtime->dma_bytes);
 
 	pr_debug("%s: ret: %d %p %pad 0x%08x\n", __func__, ret,
 			runtime->dma_area,
@@ -247,8 +247,7 @@
 	buf->dev.type = SNDRV_DMA_TYPE_DEV;
 	buf->dev.dev = pcm->card->dev;
 	buf->private_data = NULL;
-	buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-					   &buf->addr, GFP_KERNEL);
+	buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
 	if (!buf->area)
 		return -ENOMEM;
 	buf->bytes = size;
@@ -330,8 +329,7 @@
 		if (!buf->area)
 			continue;
 
-		dma_free_writecombine(pcm->card->dev, buf->bytes,
-				      buf->area, buf->addr);
+		dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
 		buf->area = NULL;
 	}
 }

diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
index 2d3afdd..a7b96a9 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c

@@ -367,8 +367,12 @@
 	}
 	card->dev = &pdev->dev;
 	sprintf(codec_name, "i2c-%s:00", drv->acpi_card->codec_id);
+
 	/* set correct codec name */
-	strcpy((char *)card->dai_link[2].codec_name, codec_name);
+	for (i = 0; i < ARRAY_SIZE(cht_dailink); i++)
+		if (!strcmp(card->dai_link[i].codec_name, "i2c-10EC5645:00"))
+			card->dai_link[i].codec_name = kstrdup(codec_name, GFP_KERNEL);
+
 	snd_soc_card_set_drvdata(card, drv);
 	ret_val = devm_snd_soc_register_card(&pdev->dev, card);
 	if (ret_val) {

diff --git a/sound/soc/intel/boards/mfld_machine.c b/sound/soc/intel/boards/mfld_machine.c
index 49c09a0..34f46c7 100644
--- a/sound/soc/intel/boards/mfld_machine.c
+++ b/sound/soc/intel/boards/mfld_machine.c

@@ -94,7 +94,7 @@
 static int headset_get_switch(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = hs_switch;
+	ucontrol->value.enumerated.item[0] = hs_switch;
 	return 0;
 }
 
@@ -104,12 +104,12 @@
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 	struct snd_soc_dapm_context *dapm = &card->dapm;
 
-	if (ucontrol->value.integer.value[0] == hs_switch)
+	if (ucontrol->value.enumerated.item[0] == hs_switch)
 		return 0;
 
 	snd_soc_dapm_mutex_lock(dapm);
 
-	if (ucontrol->value.integer.value[0]) {
+	if (ucontrol->value.enumerated.item[0]) {
 		pr_debug("hs_set HS path\n");
 		snd_soc_dapm_enable_pin_unlocked(dapm, "Headphones");
 		snd_soc_dapm_disable_pin_unlocked(dapm, "EPOUT");
@@ -123,7 +123,7 @@
 
 	snd_soc_dapm_mutex_unlock(dapm);
 
-	hs_switch = ucontrol->value.integer.value[0];
+	hs_switch = ucontrol->value.enumerated.item[0];
 
 	return 0;
 }
@@ -148,7 +148,7 @@
 static int lo_get_switch(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = lo_dac;
+	ucontrol->value.enumerated.item[0] = lo_dac;
 	return 0;
 }
 
@@ -158,7 +158,7 @@
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 	struct snd_soc_dapm_context *dapm = &card->dapm;
 
-	if (ucontrol->value.integer.value[0] == lo_dac)
+	if (ucontrol->value.enumerated.item[0] == lo_dac)
 		return 0;
 
 	snd_soc_dapm_mutex_lock(dapm);
@@ -168,7 +168,7 @@
 	 */
 	lo_enable_out_pins(dapm);
 
-	switch (ucontrol->value.integer.value[0]) {
+	switch (ucontrol->value.enumerated.item[0]) {
 	case 0:
 		pr_debug("set vibra path\n");
 		snd_soc_dapm_disable_pin_unlocked(dapm, "VIB1OUT");
@@ -202,7 +202,7 @@
 
 	snd_soc_dapm_mutex_unlock(dapm);
 
-	lo_dac = ucontrol->value.integer.value[0];
+	lo_dac = ucontrol->value.enumerated.item[0];
 	return 0;
 }
 

diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index a294fee..5a4837d 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c

@@ -978,7 +978,7 @@
 				return -EFAULT;
 		} else {
 			if (copy_from_user(ac->params,
-					   data + 2 * sizeof(u32), size))
+					   data + 2, size))
 				return -EFAULT;
 		}
 

diff --git a/sound/soc/nuc900/nuc900-pcm.c b/sound/soc/nuc900/nuc900-pcm.c
index e093261..2cca055 100644
--- a/sound/soc/nuc900/nuc900-pcm.c
+++ b/sound/soc/nuc900/nuc900-pcm.c

@@ -267,10 +267,8 @@
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 
-	return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-					runtime->dma_area,
-					runtime->dma_addr,
-					runtime->dma_bytes);
+	return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+			   runtime->dma_addr, runtime->dma_bytes);
 }
 
 static struct snd_pcm_ops nuc900_dma_ops = {

diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c
index 190f868..fdecb70 100644
--- a/sound/soc/omap/n810.c
+++ b/sound/soc/omap/n810.c

@@ -133,7 +133,7 @@
 static int n810_get_spk(struct snd_kcontrol *kcontrol,
 			struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = n810_spk_func;
+	ucontrol->value.enumerated.item[0] = n810_spk_func;
 
 	return 0;
 }
@@ -143,10 +143,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (n810_spk_func == ucontrol->value.integer.value[0])
+	if (n810_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	n810_spk_func = ucontrol->value.integer.value[0];
+	n810_spk_func = ucontrol->value.enumerated.item[0];
 	n810_ext_control(&card->dapm);
 
 	return 1;
@@ -155,7 +155,7 @@
 static int n810_get_jack(struct snd_kcontrol *kcontrol,
 			 struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = n810_jack_func;
+	ucontrol->value.enumerated.item[0] = n810_jack_func;
 
 	return 0;
 }
@@ -165,10 +165,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (n810_jack_func == ucontrol->value.integer.value[0])
+	if (n810_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	n810_jack_func = ucontrol->value.integer.value[0];
+	n810_jack_func = ucontrol->value.enumerated.item[0];
 	n810_ext_control(&card->dapm);
 
 	return 1;
@@ -177,7 +177,7 @@
 static int n810_get_input(struct snd_kcontrol *kcontrol,
 			  struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = n810_dmic_func;
+	ucontrol->value.enumerated.item[0] = n810_dmic_func;
 
 	return 0;
 }
@@ -187,10 +187,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (n810_dmic_func == ucontrol->value.integer.value[0])
+	if (n810_dmic_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	n810_dmic_func = ucontrol->value.integer.value[0];
+	n810_dmic_func = ucontrol->value.enumerated.item[0];
 	n810_ext_control(&card->dapm);
 
 	return 1;

diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c
index 6bb623a..99381a2 100644
--- a/sound/soc/omap/omap-pcm.c
+++ b/sound/soc/omap/omap-pcm.c

@@ -156,10 +156,8 @@
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 
-	return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-				     runtime->dma_area,
-				     runtime->dma_addr,
-				     runtime->dma_bytes);
+	return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+			   runtime->dma_addr, runtime->dma_bytes);
 }
 
 static struct snd_pcm_ops omap_pcm_ops = {
@@ -183,8 +181,7 @@
 	buf->dev.type = SNDRV_DMA_TYPE_DEV;
 	buf->dev.dev = pcm->card->dev;
 	buf->private_data = NULL;
-	buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-					   &buf->addr, GFP_KERNEL);
+	buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
 	if (!buf->area)
 		return -ENOMEM;
 
@@ -207,8 +204,7 @@
 		if (!buf->area)
 			continue;
 
-		dma_free_writecombine(pcm->card->dev, buf->bytes,
-				      buf->area, buf->addr);
+		dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
 		buf->area = NULL;
 	}
 }

diff --git a/sound/soc/omap/rx51.c b/sound/soc/omap/rx51.c
index 5e21f08..5494924 100644
--- a/sound/soc/omap/rx51.c
+++ b/sound/soc/omap/rx51.c

@@ -132,7 +132,7 @@
 static int rx51_get_spk(struct snd_kcontrol *kcontrol,
 			struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = rx51_spk_func;
+	ucontrol->value.enumerated.item[0] = rx51_spk_func;
 
 	return 0;
 }
@@ -142,10 +142,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (rx51_spk_func == ucontrol->value.integer.value[0])
+	if (rx51_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	rx51_spk_func = ucontrol->value.integer.value[0];
+	rx51_spk_func = ucontrol->value.enumerated.item[0];
 	rx51_ext_control(&card->dapm);
 
 	return 1;
@@ -180,7 +180,7 @@
 static int rx51_get_input(struct snd_kcontrol *kcontrol,
 			  struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = rx51_dmic_func;
+	ucontrol->value.enumerated.item[0] = rx51_dmic_func;
 
 	return 0;
 }
@@ -190,10 +190,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (rx51_dmic_func == ucontrol->value.integer.value[0])
+	if (rx51_dmic_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	rx51_dmic_func = ucontrol->value.integer.value[0];
+	rx51_dmic_func = ucontrol->value.enumerated.item[0];
 	rx51_ext_control(&card->dapm);
 
 	return 1;
@@ -202,7 +202,7 @@
 static int rx51_get_jack(struct snd_kcontrol *kcontrol,
 			 struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = rx51_jack_func;
+	ucontrol->value.enumerated.item[0] = rx51_jack_func;
 
 	return 0;
 }
@@ -212,10 +212,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (rx51_jack_func == ucontrol->value.integer.value[0])
+	if (rx51_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	rx51_jack_func = ucontrol->value.integer.value[0];
+	rx51_jack_func = ucontrol->value.enumerated.item[0];
 	rx51_ext_control(&card->dapm);
 
 	return 1;

diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c
index c97dc13..dcbb7aa 100644
--- a/sound/soc/pxa/corgi.c
+++ b/sound/soc/pxa/corgi.c

@@ -163,7 +163,7 @@
 static int corgi_get_jack(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = corgi_jack_func;
+	ucontrol->value.enumerated.item[0] = corgi_jack_func;
 	return 0;
 }
 
@@ -172,10 +172,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (corgi_jack_func == ucontrol->value.integer.value[0])
+	if (corgi_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	corgi_jack_func = ucontrol->value.integer.value[0];
+	corgi_jack_func = ucontrol->value.enumerated.item[0];
 	corgi_ext_control(&card->dapm);
 	return 1;
 }
@@ -183,7 +183,7 @@
 static int corgi_get_spk(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = corgi_spk_func;
+	ucontrol->value.enumerated.item[0] = corgi_spk_func;
 	return 0;
 }
 
@@ -192,10 +192,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (corgi_spk_func == ucontrol->value.integer.value[0])
+	if (corgi_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	corgi_spk_func = ucontrol->value.integer.value[0];
+	corgi_spk_func = ucontrol->value.enumerated.item[0];
 	corgi_ext_control(&card->dapm);
 	return 1;
 }

diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c
index 241d0be..62b8377 100644
--- a/sound/soc/pxa/magician.c
+++ b/sound/soc/pxa/magician.c

@@ -308,17 +308,17 @@
 static int magician_get_input(struct snd_kcontrol *kcontrol,
 			      struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = magician_in_sel;
+	ucontrol->value.enumerated.item[0] = magician_in_sel;
 	return 0;
 }
 
 static int magician_set_input(struct snd_kcontrol *kcontrol,
 			      struct snd_ctl_elem_value *ucontrol)
 {
-	if (magician_in_sel == ucontrol->value.integer.value[0])
+	if (magician_in_sel == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	magician_in_sel = ucontrol->value.integer.value[0];
+	magician_in_sel = ucontrol->value.enumerated.item[0];
 
 	switch (magician_in_sel) {
 	case MAGICIAN_MIC:

diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c
index 84d0e2e..4b3b714 100644
--- a/sound/soc/pxa/poodle.c
+++ b/sound/soc/pxa/poodle.c

@@ -138,7 +138,7 @@
 static int poodle_get_jack(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = poodle_jack_func;
+	ucontrol->value.enumerated.item[0] = poodle_jack_func;
 	return 0;
 }
 
@@ -147,10 +147,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (poodle_jack_func == ucontrol->value.integer.value[0])
+	if (poodle_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	poodle_jack_func = ucontrol->value.integer.value[0];
+	poodle_jack_func = ucontrol->value.enumerated.item[0];
 	poodle_ext_control(&card->dapm);
 	return 1;
 }
@@ -158,7 +158,7 @@
 static int poodle_get_spk(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = poodle_spk_func;
+	ucontrol->value.enumerated.item[0] = poodle_spk_func;
 	return 0;
 }
 
@@ -167,10 +167,10 @@
 {
 	struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-	if (poodle_spk_func == ucontrol->value.integer.value[0])
+	if (poodle_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	poodle_spk_func = ucontrol->value.integer.value[0];
+	poodle_spk_func = ucontrol->value.enumerated.item[0];
 	poodle_ext_control(&card->dapm);
 	return 1;
 }

diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c
index b002226..0e02634 100644
--- a/sound/soc/pxa/spitz.c
+++ b/sound/soc/pxa/spitz.c

@@ -164,7 +164,7 @@
 static int spitz_get_jack(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = spitz_jack_func;
+	ucontrol->value.enumerated.item[0] = spitz_jack_func;
 	return 0;
 }
 
@@ -173,10 +173,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (spitz_jack_func == ucontrol->value.integer.value[0])
+	if (spitz_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	spitz_jack_func = ucontrol->value.integer.value[0];
+	spitz_jack_func = ucontrol->value.enumerated.item[0];
 	spitz_ext_control(&card->dapm);
 	return 1;
 }
@@ -184,7 +184,7 @@
 static int spitz_get_spk(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = spitz_spk_func;
+	ucontrol->value.enumerated.item[0] = spitz_spk_func;
 	return 0;
 }
 
@@ -193,10 +193,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (spitz_spk_func == ucontrol->value.integer.value[0])
+	if (spitz_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	spitz_spk_func = ucontrol->value.integer.value[0];
+	spitz_spk_func = ucontrol->value.enumerated.item[0];
 	spitz_ext_control(&card->dapm);
 	return 1;
 }

diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c
index 49518dd..c508f02 100644
--- a/sound/soc/pxa/tosa.c
+++ b/sound/soc/pxa/tosa.c

@@ -95,7 +95,7 @@
 static int tosa_get_jack(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = tosa_jack_func;
+	ucontrol->value.enumerated.item[0] = tosa_jack_func;
 	return 0;
 }
 
@@ -104,10 +104,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (tosa_jack_func == ucontrol->value.integer.value[0])
+	if (tosa_jack_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	tosa_jack_func = ucontrol->value.integer.value[0];
+	tosa_jack_func = ucontrol->value.enumerated.item[0];
 	tosa_ext_control(&card->dapm);
 	return 1;
 }
@@ -115,7 +115,7 @@
 static int tosa_get_spk(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	ucontrol->value.integer.value[0] = tosa_spk_func;
+	ucontrol->value.enumerated.item[0] = tosa_spk_func;
 	return 0;
 }
 
@@ -124,10 +124,10 @@
 {
 	struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-	if (tosa_spk_func == ucontrol->value.integer.value[0])
+	if (tosa_spk_func == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	tosa_spk_func = ucontrol->value.integer.value[0];
+	tosa_spk_func = ucontrol->value.enumerated.item[0];
 	tosa_ext_control(&card->dapm);
 	return 1;
 }

diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c
index 00b6c9d..e5101e0 100644
--- a/sound/soc/qcom/lpass-cpu.c
+++ b/sound/soc/qcom/lpass-cpu.c

@@ -355,7 +355,6 @@
 	.readable_reg = lpass_cpu_regmap_readable,
 	.volatile_reg = lpass_cpu_regmap_volatile,
 	.cache_type = REGCACHE_FLAT,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev)

diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 84d9e77..70a2559 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c

@@ -481,10 +481,11 @@
 	unsigned int cdcon_mask = 1 << i2s_regs->cdclkcon_off;
 	unsigned int rsrc_mask = 1 << i2s_regs->rclksrc_off;
 	u32 mod, mask, val = 0;
+	unsigned long flags;
 
-	spin_lock(i2s->lock);
+	spin_lock_irqsave(i2s->lock, flags);
 	mod = readl(i2s->addr + I2SMOD);
-	spin_unlock(i2s->lock);
+	spin_unlock_irqrestore(i2s->lock, flags);
 
 	switch (clk_id) {
 	case SAMSUNG_I2S_OPCLK:
@@ -575,11 +576,11 @@
 		return -EINVAL;
 	}
 
-	spin_lock(i2s->lock);
+	spin_lock_irqsave(i2s->lock, flags);
 	mod = readl(i2s->addr + I2SMOD);
 	mod = (mod & ~mask) | val;
 	writel(mod, i2s->addr + I2SMOD);
-	spin_unlock(i2s->lock);
+	spin_unlock_irqrestore(i2s->lock, flags);
 
 	return 0;
 }
@@ -590,6 +591,7 @@
 	struct i2s_dai *i2s = to_info(dai);
 	int lrp_shift, sdf_shift, sdf_mask, lrp_rlow, mod_slave;
 	u32 mod, tmp = 0;
+	unsigned long flags;
 
 	lrp_shift = i2s->variant_regs->lrp_off;
 	sdf_shift = i2s->variant_regs->sdf_off;
@@ -649,7 +651,7 @@
 		return -EINVAL;
 	}
 
-	spin_lock(i2s->lock);
+	spin_lock_irqsave(i2s->lock, flags);
 	mod = readl(i2s->addr + I2SMOD);
 	/*
 	 * Don't change the I2S mode if any controller is active on this
@@ -657,7 +659,7 @@
 	 */
 	if (any_active(i2s) &&
 		((mod & (sdf_mask | lrp_rlow | mod_slave)) != tmp)) {
-		spin_unlock(i2s->lock);
+		spin_unlock_irqrestore(i2s->lock, flags);
 		dev_err(&i2s->pdev->dev,
 				"%s:%d Other DAI busy\n", __func__, __LINE__);
 		return -EAGAIN;
@@ -666,7 +668,7 @@
 	mod &= ~(sdf_mask | lrp_rlow | mod_slave);
 	mod |= tmp;
 	writel(mod, i2s->addr + I2SMOD);
-	spin_unlock(i2s->lock);
+	spin_unlock_irqrestore(i2s->lock, flags);
 
 	return 0;
 }
@@ -676,6 +678,7 @@
 {
 	struct i2s_dai *i2s = to_info(dai);
 	u32 mod, mask = 0, val = 0;
+	unsigned long flags;
 
 	if (!is_secondary(i2s))
 		mask |= (MOD_DC2_EN | MOD_DC1_EN);
@@ -744,11 +747,11 @@
 		return -EINVAL;
 	}
 
-	spin_lock(i2s->lock);
+	spin_lock_irqsave(i2s->lock, flags);
 	mod = readl(i2s->addr + I2SMOD);
 	mod = (mod & ~mask) | val;
 	writel(mod, i2s->addr + I2SMOD);
-	spin_unlock(i2s->lock);
+	spin_unlock_irqrestore(i2s->lock, flags);
 
 	samsung_asoc_init_dma_data(dai, &i2s->dma_playback, &i2s->dma_capture);
 

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 0d37079..581175a 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c

@@ -3573,7 +3573,7 @@
 {
 	struct snd_soc_dapm_widget *w = snd_kcontrol_chip(kcontrol);
 
-	ucontrol->value.integer.value[0] = w->params_select;
+	ucontrol->value.enumerated.item[0] = w->params_select;
 
 	return 0;
 }
@@ -3587,13 +3587,13 @@
 	if (w->power)
 		return -EBUSY;
 
-	if (ucontrol->value.integer.value[0] == w->params_select)
+	if (ucontrol->value.enumerated.item[0] == w->params_select)
 		return 0;
 
-	if (ucontrol->value.integer.value[0] >= w->num_params)
+	if (ucontrol->value.enumerated.item[0] >= w->num_params)
 		return -EINVAL;
 
-	w->params_select = ucontrol->value.integer.value[0];
+	w->params_select = ucontrol->value.enumerated.item[0];
 
 	return 0;
 }

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 4f6ce1c..c458d60 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c

@@ -1124,6 +1124,7 @@
 	case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
 	case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
 	case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
+	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
 	case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
 	case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */

diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 4a96473..ee566e8 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build

@@ -85,7 +85,7 @@
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_i_c)
 
-$(OUTPUT)%.i: %.S FORCE
+$(OUTPUT)%.s: %.S FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_i_c)
 

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 02db3cd..6b77072 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature

@@ -27,7 +27,7 @@
 #   the rule that uses them - an example for that is the 'bionic'
 #   feature check. ]
 #
-FEATURE_TESTS ?=			\
+FEATURE_TESTS_BASIC :=			\
 	backtrace			\
 	dwarf				\
 	fortify-source			\
@@ -46,6 +46,7 @@
 	libpython			\
 	libpython-version		\
 	libslang			\
+	libcrypto			\
 	libunwind			\
 	pthread-attr-setaffinity-np	\
 	stackprotector-all		\
@@ -56,6 +57,25 @@
 	get_cpuid			\
 	bpf
 
+# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
+# of all feature tests
+FEATURE_TESTS_EXTRA :=			\
+	bionic				\
+	compile-32			\
+	compile-x32			\
+	cplus-demangle			\
+	hello				\
+	libbabeltrace			\
+	liberty				\
+	liberty-z			\
+	libunwind-debug-frame
+
+FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
+
+ifeq ($(FEATURE_TESTS),all)
+  FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA)
+endif
+
 FEATURE_DISPLAY ?=			\
 	dwarf				\
 	glibc				\
@@ -68,6 +88,7 @@
 	libperl				\
 	libpython			\
 	libslang			\
+	libcrypto			\
 	libunwind			\
 	libdw-dwarf-unwind		\
 	zlib				\
@@ -100,6 +121,14 @@
   # test-all.c passed - just set all the core feature flags to 1:
   #
   $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
+  #
+  # test-all.c does not comprise these tests, so we need to
+  # for this case to get features proper values
+  #
+  $(call feature_check,compile-32)
+  $(call feature_check,compile-x32)
+  $(call feature_check,bionic)
+  $(call feature_check,libbabeltrace)
 else
   $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
 endif

diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index bf8f035..c5f4c41 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile

@@ -23,6 +23,7 @@
 	test-libpython.bin		\
 	test-libpython-version.bin	\
 	test-libslang.bin		\
+	test-libcrypto.bin		\
 	test-libunwind.bin		\
 	test-libunwind-debug-frame.bin	\
 	test-pthread-attr-setaffinity-np.bin	\
@@ -105,6 +106,9 @@
 $(OUTPUT)test-libslang.bin:
 	$(BUILD) -I/usr/include/slang -lslang
 
+$(OUTPUT)test-libcrypto.bin:
+	$(BUILD) -lcrypto
+
 $(OUTPUT)test-gtk2.bin:
 	$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
 

diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 81025ca..e499a36 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c

@@ -129,6 +129,10 @@
 # include "test-bpf.c"
 #undef main
 
+#define main main_test_libcrypto
+# include "test-libcrypto.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
 	main_test_libpython();
@@ -158,6 +162,7 @@
 	main_test_lzma();
 	main_test_get_cpuid();
 	main_test_bpf();
+	main_test_libcrypto();
 
 	return 0;
 }

diff --git a/tools/build/feature/test-compile.c b/tools/build/feature/test-compile.c
index 31dbf45..c54e655 100644
--- a/tools/build/feature/test-compile.c
+++ b/tools/build/feature/test-compile.c

@@ -1,4 +1,6 @@
+#include <stdio.h>
 int main(void)
 {
+	printf("Hello World!\n");
 	return 0;
 }

diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c
new file mode 100644
index 0000000..bd79dc7
--- /dev/null
+++ b/tools/build/feature/test-libcrypto.c

@@ -0,0 +1,17 @@
+#include <openssl/sha.h>
+#include <openssl/md5.h>
+
+int main(void)
+{
+	MD5_CTX context;
+	unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
+	unsigned char dat[] = "12345";
+
+	MD5_Init(&context);
+	MD5_Update(&context, &dat[0], sizeof(dat));
+	MD5_Final(&md[0], &context);
+
+	SHA1(&dat[0], sizeof(dat), &md[0]);
+
+	return 0;
+}

diff --git a/tools/lib/api/Build b/tools/lib/api/Build
index e8b8a23..954c644 100644
--- a/tools/lib/api/Build
+++ b/tools/lib/api/Build

@@ -1,3 +1,4 @@
 libapi-y += fd/
 libapi-y += fs/
 libapi-y += cpu.o
+libapi-y += debug.o

diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index d85904d..bbc82c6 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile

@@ -18,6 +18,7 @@
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS += -I$(srctree)/tools/lib/api
 
 RM = rm -f
 

diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h
new file mode 100644
index 0000000..188f7880
--- /dev/null
+++ b/tools/lib/api/debug-internal.h

@@ -0,0 +1,20 @@
+#ifndef __API_DEBUG_INTERNAL_H__
+#define __API_DEBUG_INTERNAL_H__
+
+#include "debug.h"
+
+#define __pr(func, fmt, ...)	\
+do {				\
+	if ((func))		\
+		(func)("libapi: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+extern libapi_print_fn_t __pr_warning;
+extern libapi_print_fn_t __pr_info;
+extern libapi_print_fn_t __pr_debug;
+
+#define pr_warning(fmt, ...)	__pr(__pr_warning, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...)	__pr(__pr_info, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...)	__pr(__pr_debug, fmt, ##__VA_ARGS__)
+
+#endif /* __API_DEBUG_INTERNAL_H__ */

diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c
new file mode 100644
index 0000000..5fa5cf5
--- /dev/null
+++ b/tools/lib/api/debug.c

@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include "debug.h"
+#include "debug-internal.h"
+
+static int __base_pr(const char *format, ...)
+{
+	va_list args;
+	int err;
+
+	va_start(args, format);
+	err = vfprintf(stderr, format, args);
+	va_end(args);
+	return err;
+}
+
+libapi_print_fn_t __pr_warning = __base_pr;
+libapi_print_fn_t __pr_info    = __base_pr;
+libapi_print_fn_t __pr_debug;
+
+void libapi_set_print(libapi_print_fn_t warn,
+		      libapi_print_fn_t info,
+		      libapi_print_fn_t debug)
+{
+	__pr_warning = warn;
+	__pr_info    = info;
+	__pr_debug   = debug;
+}

diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h
new file mode 100644
index 0000000..a0872f6
--- /dev/null
+++ b/tools/lib/api/debug.h

@@ -0,0 +1,10 @@
+#ifndef __API_DEBUG_H__
+#define __API_DEBUG_H__
+
+typedef int (*libapi_print_fn_t)(const char *, ...);
+
+void libapi_set_print(libapi_print_fn_t warn,
+		      libapi_print_fn_t info,
+		      libapi_print_fn_t debug);
+
+#endif /* __API_DEBUG_H__ */

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 459599d..ef78c22 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c

@@ -13,6 +13,7 @@
 #include <sys/mount.h>
 
 #include "fs.h"
+#include "debug-internal.h"
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
@@ -300,6 +301,56 @@
 	return err;
 }
 
+#define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
+
+int filename__read_str(const char *filename, char **buf, size_t *sizep)
+{
+	size_t size = 0, alloc_size = 0;
+	void *bf = NULL, *nbf;
+	int fd, n, err = 0;
+	char sbuf[STRERR_BUFSIZE];
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0)
+		return -errno;
+
+	do {
+		if (size == alloc_size) {
+			alloc_size += BUFSIZ;
+			nbf = realloc(bf, alloc_size);
+			if (!nbf) {
+				err = -ENOMEM;
+				break;
+			}
+
+			bf = nbf;
+		}
+
+		n = read(fd, bf + size, alloc_size - size);
+		if (n < 0) {
+			if (size) {
+				pr_warning("read failed %d: %s\n", errno,
+					 strerror_r(errno, sbuf, sizeof(sbuf)));
+				err = 0;
+			} else
+				err = -errno;
+
+			break;
+		}
+
+		size += n;
+	} while (n > 0);
+
+	if (!err) {
+		*sizep = size;
+		*buf   = bf;
+	} else
+		free(bf);
+
+	close(fd);
+	return err;
+}
+
 int sysfs__read_ull(const char *entry, unsigned long long *value)
 {
 	char path[PATH_MAX];
@@ -326,6 +377,19 @@
 	return filename__read_int(path, value);
 }
 
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+	return filename__read_str(path, buf, sizep);
+}
+
 int sysctl__read_int(const char *sysctl, int *value)
 {
 	char path[PATH_MAX];

diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index d024a7f..9f65980 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h

@@ -2,6 +2,7 @@
 #define __API_FS__
 
 #include <stdbool.h>
+#include <unistd.h>
 
 /*
  * On most systems <limits.h> would have given us this, but  not on some systems
@@ -26,8 +27,10 @@
 
 int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
+int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
 int sysctl__read_int(const char *sysctl, int *value);
 int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 #endif /* __API_FS__ */

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8334a5a..7e543c3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c

@@ -201,6 +201,7 @@
 			Elf_Data *data;
 		} *reloc;
 		int nr_reloc;
+		int maps_shndx;
 	} efile;
 	/*
 	 * All loaded bpf_object is linked in a list, which is
@@ -350,6 +351,7 @@
 	 */
 	obj->efile.obj_buf = obj_buf;
 	obj->efile.obj_buf_sz = obj_buf_sz;
+	obj->efile.maps_shndx = -1;
 
 	obj->loaded = false;
 
@@ -529,12 +531,12 @@
 }
 
 static int
-bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
+bpf_object__init_maps_name(struct bpf_object *obj)
 {
 	int i;
 	Elf_Data *symbols = obj->efile.symbols;
 
-	if (!symbols || maps_shndx < 0)
+	if (!symbols || obj->efile.maps_shndx < 0)
 		return -EINVAL;
 
 	for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
@@ -544,7 +546,7 @@
 
 		if (!gelf_getsym(symbols, i, &sym))
 			continue;
-		if (sym.st_shndx != maps_shndx)
+		if (sym.st_shndx != obj->efile.maps_shndx)
 			continue;
 
 		map_name = elf_strptr(obj->efile.elf,
@@ -572,7 +574,7 @@
 	Elf *elf = obj->efile.elf;
 	GElf_Ehdr *ep = &obj->efile.ehdr;
 	Elf_Scn *scn = NULL;
-	int idx = 0, err = 0, maps_shndx = -1;
+	int idx = 0, err = 0;
 
 	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
 	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
@@ -625,7 +627,7 @@
 		else if (strcmp(name, "maps") == 0) {
 			err = bpf_object__init_maps(obj, data->d_buf,
 						    data->d_size);
-			maps_shndx = idx;
+			obj->efile.maps_shndx = idx;
 		} else if (sh.sh_type == SHT_SYMTAB) {
 			if (obj->efile.symbols) {
 				pr_warning("bpf: multiple SYMTAB in %s\n",
@@ -674,8 +676,8 @@
 		pr_warning("Corrupted ELF file: index of strtab invalid\n");
 		return LIBBPF_ERRNO__FORMAT;
 	}
-	if (maps_shndx >= 0)
-		err = bpf_object__init_maps_name(obj, maps_shndx);
+	if (obj->efile.maps_shndx >= 0)
+		err = bpf_object__init_maps_name(obj);
 out:
 	return err;
 }
@@ -697,7 +699,8 @@
 static int
 bpf_program__collect_reloc(struct bpf_program *prog,
 			   size_t nr_maps, GElf_Shdr *shdr,
-			   Elf_Data *data, Elf_Data *symbols)
+			   Elf_Data *data, Elf_Data *symbols,
+			   int maps_shndx)
 {
 	int i, nrels;
 
@@ -724,9 +727,6 @@
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
-		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-		pr_debug("relocation: insn_idx=%u\n", insn_idx);
-
 		if (!gelf_getsym(symbols,
 				 GELF_R_SYM(rel.r_info),
 				 &sym)) {
@@ -735,6 +735,15 @@
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
+		if (sym.st_shndx != maps_shndx) {
+			pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
+				   prog->section_name, sym.st_shndx);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+
+		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+		pr_debug("relocation: insn_idx=%u\n", insn_idx);
+
 		if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
 			pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
 				   insn_idx, insns[insn_idx].code);
@@ -863,7 +872,8 @@
 
 		err = bpf_program__collect_reloc(prog, nr_maps,
 						 shdr, data,
-						 obj->efile.symbols);
+						 obj->efile.symbols,
+						 obj->efile.maps_shndx);
 		if (err)
 			return err;
 	}

diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
index 90d2bae..1d57af5 100644
--- a/tools/lib/lockdep/Makefile
+++ b/tools/lib/lockdep/Makefile

@@ -100,7 +100,7 @@
 
 do_compile_shared_library =			\
 	($(print_shared_lib_compile)		\
-	$(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -s $@ liblockdep.so))
+	$(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so))
 
 do_build_static_lib =				\
 	($(print_static_lib_build)		\

diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c
index 9be6633..d1c89cc 100644
--- a/tools/lib/lockdep/common.c
+++ b/tools/lib/lockdep/common.c

@@ -11,11 +11,6 @@
 bool debug_locks = true;
 bool debug_locks_silent;
 
-__attribute__((constructor)) static void liblockdep_init(void)
-{
-	lockdep_init();
-}
-
 __attribute__((destructor)) static void liblockdep_exit(void)
 {
 	debug_check_no_locks_held();

diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h
index a60c14b..6e66277 100644
--- a/tools/lib/lockdep/include/liblockdep/common.h
+++ b/tools/lib/lockdep/include/liblockdep/common.h

@@ -44,7 +44,6 @@
 void lock_release(struct lockdep_map *lock, int nested,
 			unsigned long ip);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
-extern void lockdep_init(void);
 
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
 	{ .name = (_name), .key = (void *)(_key), }

diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
index f42b7e9..a0a2e3a 100644
--- a/tools/lib/lockdep/lockdep.c
+++ b/tools/lib/lockdep/lockdep.c

@@ -1,2 +1,8 @@
 #include <linux/lockdep.h>
+
+/* Trivial API wrappers, we don't (yet) have RCU in user-space: */
+#define hlist_for_each_entry_rcu	hlist_for_each_entry
+#define hlist_add_head_rcu		hlist_add_head
+#define hlist_del_rcu			hlist_del
+
 #include "../../../kernel/locking/lockdep.c"

diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
index 21cdf86..5284484 100644
--- a/tools/lib/lockdep/preload.c
+++ b/tools/lib/lockdep/preload.c

@@ -439,7 +439,5 @@
 	ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
 #endif
 
-	lockdep_init();
-
 	__init_state = done;
 }

diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c
index 0f782ff..18211a5 100644
--- a/tools/lib/lockdep/tests/AA.c
+++ b/tools/lib/lockdep/tests/AA.c

@@ -1,13 +1,13 @@
 #include <liblockdep/mutex.h>
 
-void main(void)
+int main(void)
 {
-	pthread_mutex_t a, b;
+	pthread_mutex_t a;
 
 	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
 
 	pthread_mutex_lock(&a);
-	pthread_mutex_lock(&b);
 	pthread_mutex_lock(&a);
+
+	return 0;
 }

diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c
new file mode 100644
index 0000000..0f782ff
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABA.c

@@ -0,0 +1,13 @@
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+	pthread_mutex_t a, b;
+
+	pthread_mutex_init(&a, NULL);
+	pthread_mutex_init(&b, NULL);
+
+	pthread_mutex_lock(&a);
+	pthread_mutex_lock(&b);
+	pthread_mutex_lock(&a);
+}

diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c
new file mode 100644
index 0000000..cd807d7
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA_2threads.c

@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <pthread.h>
+
+pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER;
+pthread_barrier_t bar;
+
+void *ba_lock(void *arg)
+{
+	int ret, i;
+
+	pthread_mutex_lock(&b);
+
+	if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+		pthread_barrier_destroy(&bar);
+
+	pthread_mutex_lock(&a);
+
+	pthread_mutex_unlock(&a);
+	pthread_mutex_unlock(&b);
+}
+
+int main(void)
+{
+	pthread_t t;
+
+	pthread_barrier_init(&bar, NULL, 2);
+
+	if (pthread_create(&t, NULL, ba_lock, NULL)) {
+		fprintf(stderr, "pthread_create() failed\n");
+		return 1;
+	}
+	pthread_mutex_lock(&a);
+
+	if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+		pthread_barrier_destroy(&bar);
+
+	pthread_mutex_lock(&b);
+
+	pthread_mutex_unlock(&b);
+	pthread_mutex_unlock(&a);
+
+	pthread_join(t, NULL);
+
+	return 0;
+}

diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h
index 6386dc3..fd3e56a 100644
--- a/tools/lib/lockdep/uinclude/linux/compiler.h
+++ b/tools/lib/lockdep/uinclude/linux/compiler.h

@@ -3,6 +3,7 @@
 
 #define __used		__attribute__((__unused__))
 #define unlikely
+#define READ_ONCE(x) (x)
 #define WRITE_ONCE(x, val) x=(val)
 #define RCU_INIT_POINTER(p, v) p=(v)
 

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index c3bd294..190cc88 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c

@@ -1951,6 +1951,7 @@
 		   strcmp(token, "*") == 0 ||
 		   strcmp(token, "^") == 0 ||
 		   strcmp(token, "/") == 0 ||
+		   strcmp(token, "%") == 0 ||
 		   strcmp(token, "<") == 0 ||
 		   strcmp(token, ">") == 0 ||
 		   strcmp(token, "<=") == 0 ||
@@ -2397,6 +2398,12 @@
 				break;
 			*val = left + right;
 			break;
+		case '~':
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			*val = ~right;
+			break;
 		default:
 			do_warning("unknown op '%s'", arg->op.op);
 			ret = 0;
@@ -2634,6 +2641,7 @@
 
 free_field:
 	free_arg(arg->hex.field);
+	arg->hex.field = NULL;
 out:
 	*tok = NULL;
 	return EVENT_ERROR;
@@ -2658,8 +2666,10 @@
 
 free_size:
 	free_arg(arg->int_array.count);
+	arg->int_array.count = NULL;
 free_field:
 	free_arg(arg->int_array.field);
+	arg->int_array.field = NULL;
 out:
 	*tok = NULL;
 	return EVENT_ERROR;
@@ -3689,6 +3699,9 @@
 		case '/':
 			val = left / right;
 			break;
+		case '%':
+			val = left % right;
+			break;
 		case '*':
 			val = left * right;
 			break;
@@ -4971,7 +4984,7 @@
 						break;
 					}
 				}
-				if (pevent->long_size == 8 && ls &&
+				if (pevent->long_size == 8 && ls == 1 &&
 				    sizeof(long) != 8) {
 					char *p;
 
@@ -5335,19 +5348,74 @@
 	return false;
 }
 
-void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-			struct pevent_record *record, bool use_trace_clock)
+/**
+ * pevent_find_event_by_record - return the event from a given record
+ * @pevent: a handle to the pevent
+ * @record: The record to get the event from
+ *
+ * Returns the associated event for a given record, or NULL if non is
+ * is found.
+ */
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
 {
-	static const char *spaces = "                    "; /* 20 spaces */
-	struct event_format *event;
+	int type;
+
+	if (record->size < 0) {
+		do_warning("ug! negative record size %d", record->size);
+		return NULL;
+	}
+
+	type = trace_parse_common_type(pevent, record->data);
+
+	return pevent_find_event(pevent, type);
+}
+
+/**
+ * pevent_print_event_task - Write the event task comm, pid and CPU
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the tasks comm, pid and CPU to @s.
+ */
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record)
+{
+	void *data = record->data;
+	const char *comm;
+	int pid;
+
+	pid = parse_common_pid(pevent, data);
+	comm = find_cmdline(pevent, pid);
+
+	if (pevent->latency_format) {
+		trace_seq_printf(s, "%8.8s-%-5d %3d",
+		       comm, pid, record->cpu);
+	} else
+		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+}
+
+/**
+ * pevent_print_event_time - Write the event timestamp
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ * @use_trace_clock: Set to parse according to the @pevent->trace_clock
+ *
+ * Writes the timestamp of the record into @s.
+ */
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record,
+			     bool use_trace_clock)
+{
 	unsigned long secs;
 	unsigned long usecs;
 	unsigned long nsecs;
-	const char *comm;
-	void *data = record->data;
-	int type;
-	int pid;
-	int len;
 	int p;
 	bool use_usec_format;
 
@@ -5358,28 +5426,11 @@
 		nsecs = record->ts - secs * NSECS_PER_SEC;
 	}
 
-	if (record->size < 0) {
-		do_warning("ug! negative record size %d", record->size);
-		return;
-	}
-
-	type = trace_parse_common_type(pevent, data);
-
-	event = pevent_find_event(pevent, type);
-	if (!event) {
-		do_warning("ug! no event found for type %d", type);
-		return;
-	}
-
-	pid = parse_common_pid(pevent, data);
-	comm = find_cmdline(pevent, pid);
-
 	if (pevent->latency_format) {
-		trace_seq_printf(s, "%8.8s-%-5d %3d",
-		       comm, pid, record->cpu);
+		trace_seq_printf(s, " %3d", record->cpu);
 		pevent_data_lat_fmt(pevent, s, record);
 	} else
-		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+		trace_seq_printf(s, " [%03d]", record->cpu);
 
 	if (use_usec_format) {
 		if (pevent->flags & PEVENT_NSEC_OUTPUT) {
@@ -5387,14 +5438,36 @@
 			p = 9;
 		} else {
 			usecs = (nsecs + 500) / NSECS_PER_USEC;
+			/* To avoid usecs larger than 1 sec */
+			if (usecs >= 1000000) {
+				usecs -= 1000000;
+				secs++;
+			}
 			p = 6;
 		}
 
-		trace_seq_printf(s, " %5lu.%0*lu: %s: ",
-					secs, p, usecs, event->name);
+		trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs);
 	} else
-		trace_seq_printf(s, " %12llu: %s: ",
-					record->ts, event->name);
+		trace_seq_printf(s, " %12llu:", record->ts);
+}
+
+/**
+ * pevent_print_event_data - Write the event data section
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the parsing of the record's data to @s.
+ */
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record)
+{
+	static const char *spaces = "                    "; /* 20 spaces */
+	int len;
+
+	trace_seq_printf(s, " %s: ", event->name);
 
 	/* Space out the event names evenly. */
 	len = strlen(event->name);
@@ -5404,6 +5477,23 @@
 	pevent_event_info(s, event, record);
 }
 
+void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
+			struct pevent_record *record, bool use_trace_clock)
+{
+	struct event_format *event;
+
+	event = pevent_find_event_by_record(pevent, record);
+	if (!event) {
+		do_warning("ug! no event found for type %d",
+			   trace_parse_common_type(pevent, record->data));
+		return;
+	}
+
+	pevent_print_event_task(pevent, s, event, record);
+	pevent_print_event_time(pevent, s, event, record, use_trace_clock);
+	pevent_print_event_data(pevent, s, event, record);
+}
+
 static int events_id_cmp(const void *a, const void *b)
 {
 	struct event_format * const * ea = a;

diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 706d9bc..9ffde37 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h

@@ -628,6 +628,16 @@
 				 unsigned long long addr);
 int pevent_pid_is_registered(struct pevent *pevent, int pid);
 
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record);
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record,
+			     bool use_trace_clock);
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+			     struct event_format *event,
+			     struct pevent_record *record);
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 			struct pevent_record *record, bool use_trace_clock);
 
@@ -694,6 +704,9 @@
 struct event_format *
 pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
 
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record);
+
 void pevent_data_lat_fmt(struct pevent *pevent,
 			 struct trace_seq *s, struct pevent_record *record);
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);

diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index b9ca1e3..15949e2 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt

@@ -8,7 +8,7 @@
 SYNOPSIS
 --------
 [verse]
-'perf config' -l | --list
+'perf config' [<file-option>] -l | --list
 
 DESCRIPTION
 -----------
@@ -21,6 +21,14 @@
 --list::
 	Show current config variables, name and value, for all sections.
 
+--user::
+	For writing and reading options: write to user
+	'$HOME/.perfconfig' file or read it.
+
+--system::
+	For writing and reading options: write to system-wide
+	'$(sysconfdir)/perfconfig' or read it.
+
 CONFIGURATION FILE
 ------------------
 
@@ -30,6 +38,10 @@
 The file '$(sysconfdir)/perfconfig' can be used to
 store a system-wide default configuration.
 
+When reading or writing, the values are read from the system and user
+configuration files by default, and options '--system' and '--user'
+can be used to tell the command to read from or write to only that location.
+
 Syntax
 ~~~~~~
 
@@ -62,7 +74,7 @@
 		medium = green, default
 		normal = lightgray, default
 		selected = white, lightgray
-		code = blue, default
+		jump_arrows = blue, default
 		addr = magenta, default
 		root = white, blue
 
@@ -98,6 +110,347 @@
 		order = caller
 		sort-key = function
 
+Variables
+~~~~~~~~~
+
+colors.*::
+	The variables for customizing the colors used in the output for the
+	'report', 'top' and 'annotate' in the TUI. They should specify the
+	foreground and background colors, separated by a comma, for example:
+
+		medium = green, lightgray
+
+	If you want to use the color configured for you terminal, just leave it
+	as 'default', for example:
+
+		medium = default, lightgray
+
+	Available colors:
+	red, yellow, green, cyan, gray, black, blue,
+	white, default, magenta, lightgray
+
+	colors.top::
+		'top' means a overhead percentage which is more than 5%.
+		And values of this variable specify percentage colors.
+		Basic key values are foreground-color 'red' and
+		background-color 'default'.
+	colors.medium::
+		'medium' means a overhead percentage which has more than 0.5%.
+		Default values are 'green' and 'default'.
+	colors.normal::
+		'normal' means the rest of overhead percentages
+		except 'top', 'medium', 'selected'.
+		Default values are 'lightgray' and 'default'.
+	colors.selected::
+		This selects the colors for the current entry in a list of entries
+		from sub-commands (top, report, annotate).
+		Default values are 'black' and 'lightgray'.
+	colors.jump_arrows::
+		Colors for jump arrows on assembly code listings
+		such as 'jns', 'jmp', 'jane', etc.
+		Default values are 'blue', 'default'.
+	colors.addr::
+		This selects colors for addresses from 'annotate'.
+		Default values are 'magenta', 'default'.
+	colors.root::
+		Colors for headers in the output of a sub-commands (top, report).
+		Default values are 'white', 'blue'.
+
+tui.*, gtk.*::
+	Subcommands that can be configured here are 'top', 'report' and 'annotate'.
+	These values are booleans, for example:
+
+	[tui]
+		top = true
+
+	will make the TUI be the default for the 'top' subcommand. Those will be
+	available if the required libs were detected at tool build time.
+
+buildid.*::
+	buildid.dir::
+		Each executable and shared library in modern distributions comes with a
+		content based identifier that, if available, will be inserted in a
+		'perf.data' file header to, at analysis time find what is needed to do
+		symbol resolution, code annotation, etc.
+
+		The recording tools also stores a hard link or copy in a per-user
+		directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms
+		and /proc/kcore files to be used at analysis time.
+
+		The buildid.dir variable can be used to either change this directory
+		cache location, or to disable it altogether. If you want to disable it,
+		set buildid.dir to /dev/null. The default is $HOME/.debug
+
+annotate.*::
+	These options work only for TUI.
+	These are in control of addresses, jump function, source code
+	in lines of assembly code from a specific program.
+
+	annotate.hide_src_code::
+		If a program which is analyzed has source code,
+		this option lets 'annotate' print a list of assembly code with the source code.
+		For example, let's see a part of a program. There're four lines.
+		If this option is 'true', they can be printed
+		without source code from a program as below.
+
+		│        push   %rbp
+		│        mov    %rsp,%rbp
+		│        sub    $0x10,%rsp
+		│        mov    (%rdi),%rdx
+
+		But if this option is 'false', source code of the part
+		can be also printed as below. Default is 'false'.
+
+		│      struct rb_node *rb_next(const struct rb_node *node)
+		│      {
+		│        push   %rbp
+		│        mov    %rsp,%rbp
+		│        sub    $0x10,%rsp
+		│              struct rb_node *parent;
+		│
+		│              if (RB_EMPTY_NODE(node))
+		│        mov    (%rdi),%rdx
+		│              return n;
+
+        annotate.use_offset::
+		Basing on a first address of a loaded function, offset can be used.
+		Instead of using original addresses of assembly code,
+		addresses subtracted from a base address can be printed.
+		Let's illustrate an example.
+		If a base address is 0XFFFFFFFF81624d50 as below,
+
+		ffffffff81624d50 <load0>
+
+		an address on assembly code has a specific absolute address as below
+
+		ffffffff816250b8:│  mov    0x8(%r14),%rdi
+
+		but if use_offset is 'true', an address subtracted from a base address is printed.
+		Default is true. This option is only applied to TUI.
+
+		             368:│  mov    0x8(%r14),%rdi
+
+	annotate.jump_arrows::
+		There can be jump instruction among assembly code.
+		Depending on a boolean value of jump_arrows,
+		arrows can be printed or not which represent
+		where do the instruction jump into as below.
+
+		│     ┌──jmp    1333
+		│     │  xchg   %ax,%ax
+		│1330:│  mov    %r15,%r10
+		│1333:└─→cmp    %r15,%r14
+
+		If jump_arrow is 'false', the arrows isn't printed as below.
+		Default is 'false'.
+
+		│      ↓ jmp    1333
+		│        xchg   %ax,%ax
+		│1330:   mov    %r15,%r10
+		│1333:   cmp    %r15,%r14
+
+        annotate.show_linenr::
+		When showing source code if this option is 'true',
+		line numbers are printed as below.
+
+		│1628         if (type & PERF_SAMPLE_IDENTIFIER) {
+		│     ↓ jne    508
+		│1628                 data->id = *array;
+		│1629                 array++;
+		│1630         }
+
+		However if this option is 'false', they aren't printed as below.
+		Default is 'false'.
+
+		│             if (type & PERF_SAMPLE_IDENTIFIER) {
+		│     ↓ jne    508
+		│                     data->id = *array;
+		│                     array++;
+		│             }
+
+        annotate.show_nr_jumps::
+		Let's see a part of assembly code.
+
+		│1382:   movb   $0x1,-0x270(%rbp)
+
+		If use this, the number of branches jumping to that address can be printed as below.
+		Default is 'false'.
+
+		│1 1382:   movb   $0x1,-0x270(%rbp)
+
+        annotate.show_total_period::
+		To compare two records on an instruction base, with this option
+		provided, display total number of samples that belong to a line
+		in assembly code. If this option is 'true', total periods are printed
+		instead of percent values as below.
+
+		  302 │      mov    %eax,%eax
+
+		But if this option is 'false', percent values for overhead are printed i.e.
+		Default is 'false'.
+
+		99.93 │      mov    %eax,%eax
+
+hist.*::
+	hist.percentage::
+		This option control the way to calculate overhead of filtered entries -
+		that means the value of this option is effective only if there's a
+		filter (by comm, dso or symbol name). Suppose a following example:
+
+		       Overhead  Symbols
+		       ........  .......
+		        33.33%     foo
+		        33.33%     bar
+		        33.33%     baz
+
+	       This is an original overhead and we'll filter out the first 'foo'
+	       entry. The value of 'relative' would increase the overhead of 'bar'
+	       and 'baz' to 50.00% for each, while 'absolute' would show their
+	       current overhead (33.33%).
+
+ui.*::
+	ui.show-headers::
+		This option controls display of column headers (like 'Overhead' and 'Symbol')
+		in 'report' and 'top'. If this option is false, they are hidden.
+		This option is only applied to TUI.
+
+call-graph.*::
+	When sub-commands 'top' and 'report' work with -g/—-children
+	there're options in control of call-graph.
+
+	call-graph.record-mode::
+		The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
+		The value of 'dwarf' is effective only if perf detect needed library
+		(libunwind or a recent version of libdw).
+		'lbr' only work for cpus that support it.
+
+	call-graph.dump-size::
+		The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
+		When using dwarf into record-mode, the default size will be used if omitted.
+
+	call-graph.print-type::
+		The print-types can be graph (graph absolute), fractal (graph relative),
+		flat and folded. This option controls a way to show overhead for each callchain
+		entry. Suppose a following example.
+
+                Overhead  Symbols
+                ........  .......
+                  40.00%  foo
+                          |
+                          ---foo
+                             |
+                             |--50.00%--bar
+                             |          main
+                             |
+                              --50.00%--baz
+                                        main
+
+		This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly
+		half and half so 'fractal' shows 50.00% for each
+		(meaning that it assumes 100% total overhead of 'foo').
+
+		The 'graph' uses absolute overhead value of 'foo' as total so each of
+		'bar' and 'baz' callchain will have 20.00% of overhead.
+		If 'flat' is used, single column and linear exposure of call chains.
+		'folded' mean call chains are displayed in a line, separated by semicolons.
+
+	call-graph.order::
+		This option controls print order of callchains. The default is
+		'callee' which means callee is printed at top and then followed by its
+		caller and so on. The 'caller' prints it in reverse order.
+
+		If this option is not set and report.children or top.children is
+		set to true (or the equivalent command line option is given),
+		the default value of this option is changed to 'caller' for the
+		execution of 'perf report' or 'perf top'. Other commands will
+		still default to 'callee'.
+
+	call-graph.sort-key::
+		The callchains are merged if they contain same information.
+		The sort-key option determines a way to compare the callchains.
+		A value of 'sort-key' can be 'function' or 'address'.
+		The default is 'function'.
+
+	call-graph.threshold::
+		When there're many callchains it'd print tons of lines. So perf omits
+		small callchains under a certain overhead (threshold) and this option
+		control the threshold. Default is 0.5 (%). The overhead is calculated
+		by value depends on call-graph.print-type.
+
+	call-graph.print-limit::
+		This is a maximum number of lines of callchain printed for a single
+		histogram entry. Default is 0 which means no limitation.
+
+report.*::
+	report.percent-limit::
+		This one is mostly the same as call-graph.threshold but works for
+		histogram entries. Entries having an overhead lower than this
+		percentage will not be printed. Default is '0'. If percent-limit
+		is '10', only entries which have more than 10% of overhead will be
+		printed.
+
+	report.queue-size::
+		This option sets up the maximum allocation size of the internal
+		event queue for ordering events. Default is 0, meaning no limit.
+
+	report.children::
+		'Children' means functions called from another function.
+		If this option is true, 'perf report' cumulates callchains of children
+		and show (accumulated) total overhead as well as 'Self' overhead.
+		Please refer to the 'perf report' manual. The default is 'true'.
+
+	report.group::
+		This option is to show event group information together.
+		Example output with this turned on, notice that there is one column
+		per event in the group, ref-cycles and cycles:
+
+		# group: {ref-cycles,cycles}
+		# ========
+		#
+		# Samples: 7K of event 'anon group { ref-cycles, cycles }'
+		# Event count (approx.): 6876107743
+		#
+		#         Overhead  Command      Shared Object               Symbol
+		# ................  .......  .................  ...................
+		#
+		    99.84%  99.76%  noploop  noploop            [.] main
+		     0.07%   0.00%  noploop  ld-2.15.so         [.] strcmp
+		     0.03%   0.00%  noploop  [kernel.kallsyms]  [k] timerqueue_del
+
+top.*::
+	top.children::
+		Same as 'report.children'. So if it is enabled, the output of 'top'
+		command will have 'Children' overhead column as well as 'Self' overhead
+		column by default.
+		The default is 'true'.
+
+man.*::
+	man.viewer::
+		This option can assign a tool to view manual pages when 'help'
+		subcommand was invoked. Supported tools are 'man', 'woman'
+		(with emacs client) and 'konqueror'. Default is 'man'.
+
+		New man viewer tool can be also added using 'man.<tool>.cmd'
+		or use different path using 'man.<tool>.path' config option.
+
+pager.*::
+	pager.<subcommand>::
+		When the subcommand is run on stdio, determine whether it uses
+		pager or not based on this value. Default is 'unspecified'.
+
+kmem.*::
+	kmem.default::
+		This option decides which allocator is to be analyzed if neither
+		'--slab' nor '--page' option is used. Default is 'slab'.
+
+record.*::
+	record.build-id::
+		This option can be 'cache', 'no-cache' or 'skip'.
+		'cache' is to post-process data and save/update the binaries into
+		the build-id cache (in ~/.debug). This is the default.
+		But if this option is 'no-cache', it will not update the build-id cache.
+		'skip' skips post-processing and does not update the cache.
+
 SEE ALSO
 --------
 linkperf:perf[1]

diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 0b1cede..87b2588 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt

@@ -53,6 +53,13 @@
 --strip::
 	Use with --itrace to strip out non-synthesized events.
 
+-j::
+--jit::
+	Process jitdump files by injecting the mmap records corresponding to jitted
+	functions. This option also generates the ELF images for each jitted function
+	found in the jitdumps files captured in the input perf.data file. Use this option
+	if you are monitoring environment using JIT runtimes, such as Java, DART or V8.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index fbceb63..19aa175 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt

@@ -341,6 +341,12 @@
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--all-kernel::
+Configure all used events to run in kernel space.
+
+--all-user::
+Configure all used events to run in user space.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8a301f6..1211399 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt

@@ -117,6 +117,22 @@
 	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
 	and symbol_to, see '--branch-stack'.
 
+	If the --mem-mode option is used, the following sort keys are also available
+	(incompatible with --branch-stack):
+	symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+	- symbol_daddr: name of data symbol being executed on at the time of sample
+	- dso_daddr: name of library or module containing the data being executed
+	on at the time of the sample
+	- locked: whether the bus was locked at the time of the sample
+	- tlb: type of tlb access for the data at the time of the sample
+	- mem: type of memory access for the data at the time of the sample
+	- snoop: type of snoop (if any) for the data at the time of the sample
+	- dcacheline: the cacheline the data address is on at the time of the sample
+
+	And the default sort keys are changed to local_weight, mem, sym, dso,
+	symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
 	If the data file has tracepoint event(s), following (dynamic) sort keys
 	are also available:
 	trace, trace_fields, [<event>.]<field>[/raw]
@@ -151,22 +167,6 @@
 	By default, every sort keys not specified in -F will be appended
 	automatically.
 
-	If --mem-mode option is used, following sort keys are also available
-	(incompatible with --branch-stack):
-	symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
-
-	- symbol_daddr: name of data symbol being executed on at the time of sample
-	- dso_daddr: name of library or module containing the data being executed
-	on at the time of sample
-	- locked: whether the bus was locked at the time of sample
-	- tlb: type of tlb access for the data at the time of sample
-	- mem: type of memory access for the data at the time of sample
-	- snoop: type of snoop (if any) for the data at the time of sample
-	- dcacheline: the cacheline the data address is on at the time of sample
-
-	And default sort keys are changed to local_weight, mem, sym, dso,
-	symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
-
 -p::
 --parent=<regex>::
         A regex filter to identify parent. The parent is a caller of this
@@ -351,7 +351,10 @@
 
 --percent-limit::
 	Do not show entries which have an overhead under that percent.
-	(Default: 0).
+	(Default: 0).  Note that this option also sets the percent limit (threshold)
+	of callchains.  However the default value of callchain threshold is
+	different than the default value of hist entries.  Please see the
+	--call-graph option for details.
 
 --percentage::
 	Determine how to display the overhead percentage of filtered entries.
@@ -398,6 +401,9 @@
 --raw-trace::
 	When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+	Enable hierarchical output.
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 52ef7a9..04f23b4 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt

@@ -69,6 +69,14 @@
 --scale::
 	scale/normalize counter values
 
+-d::
+--detailed::
+	print more detailed statistics, can be specified up to 3 times
+
+	   -d:          detailed events, L1 and LLC data cache
+        -d -d:     more detailed events, dTLB and iTLB events
+     -d -d -d:     very detailed events, adding prefetch events
+
 -r::
 --repeat=<n>::
 	repeat command and print average + stddev (max: 100). 0 means forever.
@@ -139,6 +147,10 @@
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
 	example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
+--metric-only::
+Only print computed metrics. Print them in a single line.
+Don't show any raw values. Not supported with --per-thread.
+
 --per-socket::
 Aggregate counts per processor socket for system-wide mode measurements.  This
 is a useful mode to detect imbalance between sockets.  To enable this mode,
@@ -211,6 +223,29 @@
 
  Wall-clock time elapsed:   719.554352 msecs
 
+CSV FORMAT
+----------
+
+With -x, perf stat is able to output a not-quite-CSV format output
+Commas in the output are not put into "". To make it easy to parse
+it is recommended to use a different character like -x \;
+
+The fields are in this order:
+
+	- optional usec time stamp in fractions of second (with -I xxx)
+	- optional CPU, core, or socket identifier
+	- optional number of logical CPUs aggregated
+	- counter value
+	- unit of the counter value or empty
+	- event name
+	- run time of counter
+	- percentage of measurement time the counter was running
+	- optional variance if multiple values are collected with -r
+	- optional metric value
+	- optional unit of metric
+
+Additional metrics may be printed with all earlier fields being empty.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-list[1]

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index b0e60e1..19f046f 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt

@@ -233,6 +233,9 @@
 --raw-trace::
 	When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+	Enable hierarchy output.
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 

diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example
index 767ea24..1d8d5bc 100644
--- a/tools/perf/Documentation/perfconfig.example
+++ b/tools/perf/Documentation/perfconfig.example

@@ -5,7 +5,7 @@
 	medium = green, lightgray
 	normal = black, lightgray
 	selected = lightgray, magenta
-	code = blue, lightgray
+	jump_arrows = blue, lightgray
 	addr = magenta, lightgray
 
 [tui]

diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index e0ce957..5950b5a 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt

@@ -27,3 +27,4 @@
 To change sampling frequency to 100 Hz: perf record -F 100
 See assembly instructions with percentage: perf annotate <symbol>
 If you prefer Intel style assembly, try: perf annotate -M intel
+For hierarchical output, try: perf report --hierarchy

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index dcd9a70..32a64e6 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile

@@ -68,6 +68,20 @@
 	$(print_msg)
 	$(make)
 
+ifdef MAKECMDGOALS
+has_clean := 0
+ifneq ($(filter clean,$(MAKECMDGOALS)),)
+  has_clean := 1
+endif # clean
+
+ifeq ($(has_clean),1)
+  rest := $(filter-out clean,$(MAKECMDGOALS))
+  ifneq ($(rest),)
+$(rest): clean
+  endif # rest
+endif # has_clean
+endif # MAKECMDGOALS
+
 #
 # The clean target is not really parallel, don't print the jobs info:
 #
@@ -75,10 +89,17 @@
 	$(make)
 
 #
-# The build-test target is not really parallel, don't print the jobs info:
+# The build-test target is not really parallel, don't print the jobs info,
+# it also uses only the tests/make targets that don't pollute the source
+# repository, i.e. that uses O= or builds the tarpkg outside the source
+# repo directories.
+#
+# For a full test, use:
+#
+# make -C tools/perf -f tests/make
 #
 build-test:
-	@$(MAKE) SHUF=1 -f tests/make --no-print-directory
+	@$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out
 
 #
 # All other targets get passed through:

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5d34815..4a4fad4 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf

@@ -58,6 +58,9 @@
 #
 # Define NO_LIBBIONIC if you do not want bionic support
 #
+# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support
+# used for generating build-ids for ELFs generated by jitdump.
+#
 # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
 # for dwarf backtrace post unwind.
 #
@@ -136,6 +139,8 @@
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
 $(call allow-override,LD,$(CROSS_COMPILE)ld)
 
+LD += $(EXTRA_LDFLAGS)
+
 PKG_CONFIG = $(CROSS_COMPILE)pkg-config
 
 RM      = rm -f
@@ -165,7 +170,16 @@
 endif
 endif
 
+# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
+# Without this setting the output feature dump file misses some features, for
+# example, liberty. Select all checkers so we won't get an incomplete feature
+# dump file.
 ifeq ($(config),1)
+ifdef MAKECMDGOALS
+ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump)
+FEATURE_TESTS := all
+endif
+endif
 include config/Makefile
 endif
 
@@ -618,7 +632,7 @@
 	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
 	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
 		$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
-		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c
+		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c
 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
 	$(python-clean)
 

diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index 7fbca17..18b1351 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile

@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1

diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index 7fbca17..18b1351 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile

@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 7fbca17..56e05f1 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile

@@ -1,3 +1,6 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_JITDUMP := 1

diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..c8fe207 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build

@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o

diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 0000000..0dd6b7f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h

@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+	{0x4, "H_REMOVE"},					\
+	{0x8, "H_ENTER"},					\
+	{0xc, "H_READ"},					\
+	{0x10, "H_CLEAR_MOD"},					\
+	{0x14, "H_CLEAR_REF"},					\
+	{0x18, "H_PROTECT"},					\
+	{0x1c, "H_GET_TCE"},					\
+	{0x20, "H_PUT_TCE"},					\
+	{0x24, "H_SET_SPRG0"},					\
+	{0x28, "H_SET_DABR"},					\
+	{0x2c, "H_PAGE_INIT"},					\
+	{0x30, "H_SET_ASR"},					\
+	{0x34, "H_ASR_ON"},					\
+	{0x38, "H_ASR_OFF"},					\
+	{0x3c, "H_LOGICAL_CI_LOAD"},				\
+	{0x40, "H_LOGICAL_CI_STORE"},				\
+	{0x44, "H_LOGICAL_CACHE_LOAD"},				\
+	{0x48, "H_LOGICAL_CACHE_STORE"},			\
+	{0x4c, "H_LOGICAL_ICBI"},				\
+	{0x50, "H_LOGICAL_DCBF"},				\
+	{0x54, "H_GET_TERM_CHAR"},				\
+	{0x58, "H_PUT_TERM_CHAR"},				\
+	{0x5c, "H_REAL_TO_LOGICAL"},				\
+	{0x60, "H_HYPERVISOR_DATA"},				\
+	{0x64, "H_EOI"},					\
+	{0x68, "H_CPPR"},					\
+	{0x6c, "H_IPI"},					\
+	{0x70, "H_IPOLL"},					\
+	{0x74, "H_XIRR"},					\
+	{0x78, "H_MIGRATE_DMA"},				\
+	{0x7c, "H_PERFMON"},					\
+	{0xdc, "H_REGISTER_VPA"},				\
+	{0xe0, "H_CEDE"},					\
+	{0xe4, "H_CONFER"},					\
+	{0xe8, "H_PROD"},					\
+	{0xec, "H_GET_PPP"},					\
+	{0xf0, "H_SET_PPP"},					\
+	{0xf4, "H_PURR"},					\
+	{0xf8, "H_PIC"},					\
+	{0xfc, "H_REG_CRQ"},					\
+	{0x100, "H_FREE_CRQ"},					\
+	{0x104, "H_VIO_SIGNAL"},				\
+	{0x108, "H_SEND_CRQ"},					\
+	{0x110, "H_COPY_RDMA"},					\
+	{0x114, "H_REGISTER_LOGICAL_LAN"},			\
+	{0x118, "H_FREE_LOGICAL_LAN"},				\
+	{0x11c, "H_ADD_LOGICAL_LAN_BUFFER"},			\
+	{0x120, "H_SEND_LOGICAL_LAN"},				\
+	{0x124, "H_BULK_REMOVE"},				\
+	{0x130, "H_MULTICAST_CTRL"},				\
+	{0x134, "H_SET_XDABR"},					\
+	{0x138, "H_STUFF_TCE"},					\
+	{0x13c, "H_PUT_TCE_INDIRECT"},				\
+	{0x14c, "H_CHANGE_LOGICAL_LAN_MAC"},			\
+	{0x150, "H_VTERM_PARTNER_INFO"},			\
+	{0x154, "H_REGISTER_VTERM"},				\
+	{0x158, "H_FREE_VTERM"},				\
+	{0x15c, "H_RESET_EVENTS"},				\
+	{0x160, "H_ALLOC_RESOURCE"},				\
+	{0x164, "H_FREE_RESOURCE"},				\
+	{0x168, "H_MODIFY_QP"},					\
+	{0x16c, "H_QUERY_QP"},					\
+	{0x170, "H_REREGISTER_PMR"},				\
+	{0x174, "H_REGISTER_SMR"},				\
+	{0x178, "H_QUERY_MR"},					\
+	{0x17c, "H_QUERY_MW"},					\
+	{0x180, "H_QUERY_HCA"},					\
+	{0x184, "H_QUERY_PORT"},				\
+	{0x188, "H_MODIFY_PORT"},				\
+	{0x18c, "H_DEFINE_AQP1"},				\
+	{0x190, "H_GET_TRACE_BUFFER"},				\
+	{0x194, "H_DEFINE_AQP0"},				\
+	{0x198, "H_RESIZE_MR"},					\
+	{0x19c, "H_ATTACH_MCQP"},				\
+	{0x1a0, "H_DETACH_MCQP"},				\
+	{0x1a4, "H_CREATE_RPT"},				\
+	{0x1a8, "H_REMOVE_RPT"},				\
+	{0x1ac, "H_REGISTER_RPAGES"},				\
+	{0x1b0, "H_DISABLE_AND_GETC"},				\
+	{0x1b4, "H_ERROR_DATA"},				\
+	{0x1b8, "H_GET_HCA_INFO"},				\
+	{0x1bc, "H_GET_PERF_COUNT"},				\
+	{0x1c0, "H_MANAGE_TRACE"},				\
+	{0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"},			\
+	{0x1d8, "H_POLL_PENDING"},				\
+	{0x1e4, "H_QUERY_INT_STATE"},				\
+	{0x244, "H_ILLAN_ATTRIBUTES"},				\
+	{0x250, "H_MODIFY_HEA_QP"},				\
+	{0x254, "H_QUERY_HEA_QP"},				\
+	{0x258, "H_QUERY_HEA"},					\
+	{0x25c, "H_QUERY_HEA_PORT"},				\
+	{0x260, "H_MODIFY_HEA_PORT"},				\
+	{0x264, "H_REG_BCMC"},					\
+	{0x268, "H_DEREG_BCMC"},				\
+	{0x26c, "H_REGISTER_HEA_RPAGES"},			\
+	{0x270, "H_DISABLE_AND_GET_HEA"},			\
+	{0x274, "H_GET_HEA_INFO"},				\
+	{0x278, "H_ALLOC_HEA_RESOURCE"},			\
+	{0x284, "H_ADD_CONN"},					\
+	{0x288, "H_DEL_CONN"},					\
+	{0x298, "H_JOIN"},					\
+	{0x2a4, "H_VASI_STATE"},				\
+	{0x2b0, "H_ENABLE_CRQ"},				\
+	{0x2b8, "H_GET_EM_PARMS"},				\
+	{0x2d0, "H_SET_MPP"},					\
+	{0x2d4, "H_GET_MPP"},					\
+	{0x2ec, "H_HOME_NODE_ASSOCIATIVITY"},			\
+	{0x2f4, "H_BEST_ENERGY"},				\
+	{0x2fc, "H_XIRR_X"},					\
+	{0x300, "H_RANDOM"},					\
+	{0x304, "H_COP"},					\
+	{0x314, "H_GET_MPP_X"},					\
+	{0x31c, "H_SET_MODE"},					\
+	{0xf000, "H_RTAS"}					\
+
+#endif

diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 0000000..e68ba2d
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h

@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+	{0x0,	"RETURN_TO_HOST"}, \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+#endif

diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 0000000..74eee30
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c

@@ -0,0 +1,170 @@
+#include "util/kvm-stat.h"
+#include "util/parse-events.h"
+#include "util/debug.h"
+
+#include "book3s_hv_exits.h"
+#include "book3s_hcalls.h"
+
+#define NR_TPS 4
+
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 40;
+const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter";
+const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit";
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
+
+/* Tracepoints specific to ppc_book3s_hv */
+const char *ppc_book3s_hv_kvm_tp[] = {
+	"kvm_hv:kvm_guest_enter",
+	"kvm_hv:kvm_guest_exit",
+	"kvm_hv:kvm_hcall_enter",
+	"kvm_hv:kvm_hcall_exit",
+	NULL,
+};
+
+/* 1 extra placeholder for NULL */
+const char *kvm_events_tp[NR_TPS + 1];
+const char *kvm_exit_reason;
+
+static void hcall_event_get_key(struct perf_evsel *evsel,
+				struct perf_sample *sample,
+				struct event_key *key)
+{
+	key->info = 0;
+	key->key = perf_evsel__intval(evsel, sample, "req");
+}
+
+static const char *get_hcall_exit_reason(u64 exit_code)
+{
+	struct exit_reasons_table *tbl = hcall_reasons;
+
+	while (tbl->reason != NULL) {
+		if (tbl->exit_code == exit_code)
+			return tbl->reason;
+		tbl++;
+	}
+
+	pr_debug("Unknown hcall code: %lld\n",
+	       (unsigned long long)exit_code);
+	return "UNKNOWN";
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+			    struct perf_sample *sample __maybe_unused,
+			    struct event_key *key __maybe_unused)
+{
+	return (!strcmp(evsel->name, kvm_events_tp[3]));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+			      struct perf_sample *sample, struct event_key *key)
+{
+	if (!strcmp(evsel->name, kvm_events_tp[2])) {
+		hcall_event_get_key(evsel, sample, key);
+		return true;
+	}
+
+	return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+				   struct event_key *key,
+				   char *decode)
+{
+	const char *hcall_reason = get_hcall_exit_reason(key->key);
+
+	scnprintf(decode, decode_str_len, "%s", hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+	.is_begin_event = hcall_event_begin,
+	.is_end_event = hcall_event_end,
+	.decode_key = hcall_event_decode_key,
+	.name = "HCALL-EVENT",
+};
+
+static struct kvm_events_ops exit_events = {
+	.is_begin_event = exit_event_begin,
+	.is_end_event = exit_event_end,
+	.decode_key = exit_event_decode_key,
+	.name = "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+	{ .name = "vmexit", .ops = &exit_events },
+	{ .name = "hcall", .ops = &hcall_events },
+	{ NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+	NULL,
+};
+
+
+static int is_tracepoint_available(const char *str, struct perf_evlist *evlist)
+{
+	struct parse_events_error err;
+	int ret;
+
+	err.str = NULL;
+	ret = parse_events(evlist, str, &err);
+	if (err.str)
+		pr_err("%s : %s\n", str, err.str);
+	return ret;
+}
+
+static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm,
+				struct perf_evlist *evlist)
+{
+	const char **events_ptr;
+	int i, nr_tp = 0, err = -1;
+
+	/* Check for book3s_hv tracepoints */
+	for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) {
+		err = is_tracepoint_available(*events_ptr, evlist);
+		if (err)
+			return -1;
+		nr_tp++;
+	}
+
+	for (i = 0; i < nr_tp; i++)
+		kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i];
+
+	kvm_events_tp[i] = NULL;
+	kvm_exit_reason = "trap";
+	kvm->exit_reasons = hv_exit_reasons;
+	kvm->exit_reasons_isa = "HV";
+
+	return 0;
+}
+
+/* Wrapper to setup kvm tracepoints */
+static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm)
+{
+	struct perf_evlist *evlist = perf_evlist__new();
+
+	if (evlist == NULL)
+		return -ENOMEM;
+
+	/* Right now, only supported on book3s_hv */
+	return ppc__setup_book3s_hv(kvm, evlist);
+}
+
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm)
+{
+	return ppc__setup_kvm_tp(kvm);
+}
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+	int ret;
+
+	ret = ppc__setup_kvm_tp(kvm);
+	if (ret) {
+		kvm->exit_reasons = NULL;
+		kvm->exit_reasons_isa = NULL;
+	}
+
+	return ret;
+}

diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c
index a5dbc07..ed57df2 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c

@@ -10,7 +10,7 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include <asm/kvm_perf.h>
+#include <asm/sie.h>
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -18,6 +18,12 @@
 define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
 define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
 
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
 static void event_icpt_insn_get_key(struct perf_evsel *evsel,
 				    struct perf_sample *sample,
 				    struct event_key *key)
@@ -73,7 +79,7 @@
 	.name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
 	"kvm:kvm_s390_sie_enter",
 	"kvm:kvm_s390_sie_exit",
 	"kvm:kvm_s390_intercept_instruction",

diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 09ba923..269af21 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile

@@ -3,3 +3,4 @@
 endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1

diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index 7bb0d13..72193f19 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c

@@ -59,7 +59,7 @@
 		u64 quot, rem;
 
 		quot = (cyc >> time_shift);
-		rem = cyc & ((1 << time_shift) - 1);
+		rem = cyc & (((u64)1 << time_shift) - 1);
 		delta = time_offset + quot * time_mult +
 			((rem * time_mult) >> time_shift);
 
@@ -103,6 +103,7 @@
 
 	sigfillset(&sa.sa_mask);
 	sa.sa_sigaction = segfault_handler;
+	sa.sa_flags = 0;
 	sigaction(SIGSEGV, &sa, NULL);
 
 	fd = sys_perf_event_open(&attr, 0, -1, -1,

diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 8d8150f..d66f9ad 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c

@@ -60,7 +60,9 @@
 	u64 misc;
 };
 
-static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+static size_t
+intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused)
 {
 	return INTEL_BTS_AUXTRACE_PRIV_SIZE;
 }

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index f05daac..a339517 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c

@@ -89,7 +89,7 @@
 
 	*config = attr.config;
 out_free:
-	parse_events__free_terms(terms);
+	parse_events_terms__delete(terms);
 	return err;
 }
 
@@ -273,7 +273,9 @@
 	return attr;
 }
 
-static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+static size_t
+intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+			struct perf_evlist *evlist __maybe_unused)
 {
 	return INTEL_PT_AUXTRACE_PRIV_SIZE;
 }

diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index 14e4e66..b63d4be 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c

@@ -1,5 +1,7 @@
 #include "../../util/kvm-stat.h"
-#include <asm/kvm_perf.h>
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -11,6 +13,12 @@
 	.name = "VM-EXIT"
 };
 
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
 /*
  * For the mmio events, we treat:
  * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
@@ -65,7 +73,7 @@
 				  struct event_key *key,
 				  char *decode)
 {
-	scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
+	scnprintf(decode, decode_str_len, "%#lx:%s",
 		  (unsigned long)key->key,
 		  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
 }
@@ -109,7 +117,7 @@
 				    struct event_key *key,
 				    char *decode)
 {
-	scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
+	scnprintf(decode, decode_str_len, "%#llx:%s",
 		  (unsigned long long)key->key,
 		  key->info ? "POUT" : "PIN");
 }
@@ -121,7 +129,7 @@
 	.name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
 	"kvm:kvm_entry",
 	"kvm:kvm_exit",
 	"kvm:kvm_mmio",

diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index e4c2c30..5c3cce0 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S

@@ -1,6 +1,11 @@
+
+/* Various wrappers to make the kernel .S file build in user-space: */
+
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
+#define _ASM_EXTABLE_FAULT(x, y)
+
 #include "../../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index cc5c126..cfe3663 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c

@@ -245,7 +245,7 @@
 			hists__collapse_resort(hists, NULL);
 			/* Don't sort callchain */
 			perf_evsel__reset_sample_bit(pos, CALLCHAIN);
-			hists__output_resort(hists, NULL);
+			perf_evsel__output_resort(pos, NULL);
 
 			if (symbol_conf.event_group &&
 			    !perf_evsel__is_group_leader(pos))

diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index d93bff7..632efc6 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c

@@ -38,19 +38,7 @@
 
 static int build_id_cache__kcore_dir(char *dir, size_t sz)
 {
-	struct timeval tv;
-	struct tm tm;
-	char dt[32];
-
-	if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
-		return -1;
-
-	if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
-		return -1;
-
-	scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
-
-	return 0;
+	return fetch_current_timestamp(dir, sz);
 }
 
 static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)

diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index f04e804..c42448e 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c

@@ -13,8 +13,10 @@
 #include "util/util.h"
 #include "util/debug.h"
 
+static bool use_system_config, use_user_config;
+
 static const char * const config_usage[] = {
-	"perf config [options]",
+	"perf config [<file-option>] [options]",
 	NULL
 };
 
@@ -25,6 +27,8 @@
 static struct option config_options[] = {
 	OPT_SET_UINT('l', "list", &actions,
 		     "show current config variables", ACTION_LIST),
+	OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"),
+	OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"),
 	OPT_END()
 };
 
@@ -42,10 +46,23 @@
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int ret = 0;
+	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 
 	argc = parse_options(argc, argv, config_options, config_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
+	if (use_system_config && use_user_config) {
+		pr_err("Error: only one config file at a time\n");
+		parse_options_usage(config_usage, config_options, "user", 0);
+		parse_options_usage(NULL, config_options, "system", 0);
+		return -1;
+	}
+
+	if (use_system_config)
+		config_exclusive_filename = perf_etc_perfconfig();
+	else if (use_user_config)
+		config_exclusive_filename = user_config;
+
 	switch (actions) {
 	case ACTION_LIST:
 		if (argc) {
@@ -53,9 +70,13 @@
 			parse_options_usage(config_usage, config_options, "l", 1);
 		} else {
 			ret = perf_config(show_config, NULL);
-			if (ret < 0)
+			if (ret < 0) {
+				const char * config_filename = config_exclusive_filename;
+				if (!config_exclusive_filename)
+					config_filename = user_config;
 				pr_err("Nothing configured, "
-				       "please check your ~/.perfconfig file\n");
+				       "please check your %s \n", config_filename);
+			}
 		}
 		break;
 	default:

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 36ccc2b..4d72359 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c

@@ -1264,8 +1264,6 @@
 	if (ret < 0)
 		return ret;
 
-	perf_config(perf_default_config, NULL);
-
 	argc = parse_options(argc, argv, options, diff_usage, 0);
 
 	if (symbol__init(NULL) < 0)

diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 96c1a4c..49d55e2 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c

@@ -86,8 +86,7 @@
 		return -1;
 	}
 
-	strbuf_remove(&buffer, 0, strlen("emacsclient"));
-	version = atoi(buffer.buf);
+	version = atoi(buffer.buf + strlen("emacsclient"));
 
 	if (version < 22) {
 		fprintf(stderr,
@@ -273,7 +272,7 @@
 	if (!prefixcmp(var, "man."))
 		return add_man_viewer_info(var, value);
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static struct cmdnames main_cmds, other_cmds;

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0022e02..7fa6866 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c

@@ -17,6 +17,7 @@
 #include "util/build-id.h"
 #include "util/data.h"
 #include "util/auxtrace.h"
+#include "util/jit.h"
 
 #include <subcmd/parse-options.h>
 
@@ -29,6 +30,7 @@
 	bool			sched_stat;
 	bool			have_auxtrace;
 	bool			strip;
+	bool			jit_mode;
 	const char		*input_name;
 	struct perf_data_file	output;
 	u64			bytes_written;
@@ -71,6 +73,15 @@
 	return perf_event__repipe_synth(tool, event);
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
+			       union perf_event *event __maybe_unused,
+			       struct ordered_events *oe __maybe_unused)
+{
+	return 0;
+}
+#endif
+
 static int perf_event__repipe_op2_synth(struct perf_tool *tool,
 					union perf_event *event,
 					struct perf_session *session
@@ -234,6 +245,31 @@
 	return err;
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_sample *sample,
+				       struct machine *machine)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	u64 n = 0;
+	int ret;
+
+	/*
+	 * if jit marker, then inject jit mmaps and generate ELF images
+	 */
+	ret = jit_process(inject->session, &inject->output, machine,
+			  event->mmap.filename, sample->pid, &n);
+	if (ret < 0)
+		return ret;
+	if (ret) {
+		inject->bytes_written += n;
+		return 0;
+	}
+	return perf_event__repipe_mmap(tool, event, sample, machine);
+}
+#endif
+
 static int perf_event__repipe_mmap2(struct perf_tool *tool,
 				   union perf_event *event,
 				   struct perf_sample *sample,
@@ -247,6 +283,31 @@
 	return err;
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
+					union perf_event *event,
+					struct perf_sample *sample,
+					struct machine *machine)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+	u64 n = 0;
+	int ret;
+
+	/*
+	 * if jit marker, then inject jit mmaps and generate ELF images
+	 */
+	ret = jit_process(inject->session, &inject->output, machine,
+			  event->mmap2.filename, sample->pid, &n);
+	if (ret < 0)
+		return ret;
+	if (ret) {
+		inject->bytes_written += n;
+		return 0;
+	}
+	return perf_event__repipe_mmap2(tool, event, sample, machine);
+}
+#endif
+
 static int perf_event__repipe_fork(struct perf_tool *tool,
 				   union perf_event *event,
 				   struct perf_sample *sample,
@@ -626,12 +687,16 @@
 	ret = perf_session__process_events(session);
 
 	if (!file_out->is_pipe) {
-		if (inject->build_ids) {
+		if (inject->build_ids)
 			perf_header__set_feat(&session->header,
 					      HEADER_BUILD_ID);
-			if (inject->have_auxtrace)
-				dsos__hit_all(session);
-		}
+		/*
+		 * Keep all buildids when there is unprocessed AUX data because
+		 * it is not known which ones the AUX trace hits.
+		 */
+		if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
+		    inject->have_auxtrace && !inject->itrace_synth_opts.set)
+			dsos__hit_all(session);
 		/*
 		 * The AUX areas have been removed and replaced with
 		 * synthesized hardware events, so clear the feature flag and
@@ -703,7 +768,7 @@
 	};
 	int ret;
 
-	const struct option options[] = {
+	struct option options[] = {
 		OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
 			    "Inject build-ids into the output stream"),
 		OPT_STRING('i', "input", &inject.input_name, "file",
@@ -713,6 +778,9 @@
 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
 			    "Merge sched-stat and sched-switch for getting events "
 			    "where and how long tasks slept"),
+#ifdef HAVE_JITDUMP
+		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
+#endif
 		OPT_INCR('v', "verbose", &verbose,
 			 "be more verbose (show build ids, etc)"),
 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
@@ -729,7 +797,9 @@
 		"perf inject [<options>]",
 		NULL
 	};
-
+#ifndef HAVE_JITDUMP
+	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
+#endif
 	argc = parse_options(argc, argv, options, inject_usage, 0);
 
 	/*
@@ -755,6 +825,29 @@
 	if (inject.session == NULL)
 		return -1;
 
+	if (inject.build_ids) {
+		/*
+		 * to make sure the mmap records are ordered correctly
+		 * and so that the correct especially due to jitted code
+		 * mmaps. We cannot generate the buildid hit list and
+		 * inject the jit mmaps at the same time for now.
+		 */
+		inject.tool.ordered_events = true;
+		inject.tool.ordering_requires_timestamps = true;
+	}
+#ifdef HAVE_JITDUMP
+	if (inject.jit_mode) {
+		inject.tool.mmap2	   = perf_event__jit_repipe_mmap2;
+		inject.tool.mmap	   = perf_event__jit_repipe_mmap;
+		inject.tool.ordered_events = true;
+		inject.tool.ordering_requires_timestamps = true;
+		/*
+		 * JIT MMAP injection injects all MMAP events in one go, so it
+		 * does not obey finished_round semantics.
+		 */
+		inject.tool.finished_round = perf_event__drop_oe;
+	}
+#endif
 	ret = symbol__init(&inject.session->header.env);
 	if (ret < 0)
 		goto out_delete;

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1180105..4d3340c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c

@@ -1834,7 +1834,7 @@
 	return cmd_record(i, rec_argv, NULL);
 }
 
-static int kmem_config(const char *var, const char *value, void *cb)
+static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
 {
 	if (!strcmp(var, "kmem.default")) {
 		if (!strcmp(value, "slab"))
@@ -1847,7 +1847,7 @@
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 4418d92..bff6664 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c

@@ -30,7 +30,6 @@
 #include <math.h>
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include <asm/kvm_perf.h>
 #include "util/kvm-stat.h"
 
 void exit_event_get_key(struct perf_evsel *evsel,
@@ -38,12 +37,12 @@
 			struct event_key *key)
 {
 	key->info = 0;
-	key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+	key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
 }
 
 bool kvm_exit_event(struct perf_evsel *evsel)
 {
-	return !strcmp(evsel->name, KVM_EXIT_TRACE);
+	return !strcmp(evsel->name, kvm_exit_trace);
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -59,7 +58,7 @@
 
 bool kvm_entry_event(struct perf_evsel *evsel)
 {
-	return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+	return !strcmp(evsel->name, kvm_entry_trace);
 }
 
 bool exit_event_end(struct perf_evsel *evsel,
@@ -91,7 +90,7 @@
 	const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
 						  key->key);
 
-	scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason);
+	scnprintf(decode, decode_str_len, "%s", exit_reason);
 }
 
 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
@@ -357,7 +356,7 @@
 	time_diff = sample->time - time_begin;
 
 	if (kvm->duration && time_diff > kvm->duration) {
-		char decode[DECODE_STR_LEN];
+		char decode[decode_str_len];
 
 		kvm->events_ops->decode_key(kvm, &event->key, decode);
 		if (!skip_event(decode)) {
@@ -385,7 +384,8 @@
 			return NULL;
 		}
 
-		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID);
+		vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
+							  vcpu_id_str);
 		thread__set_priv(thread, vcpu_record);
 	}
 
@@ -574,7 +574,7 @@
 
 static void print_result(struct perf_kvm_stat *kvm)
 {
-	char decode[DECODE_STR_LEN];
+	char decode[decode_str_len];
 	struct kvm_event *event;
 	int vcpu = kvm->trace_vcpu;
 
@@ -585,7 +585,7 @@
 
 	pr_info("\n\n");
 	print_vcpu_info(kvm);
-	pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name);
+	pr_info("%*s ", decode_str_len, kvm->events_ops->name);
 	pr_info("%10s ", "Samples");
 	pr_info("%9s ", "Samples%");
 
@@ -604,7 +604,7 @@
 		min = get_event_min(event, vcpu);
 
 		kvm->events_ops->decode_key(kvm, &event->key, decode);
-		pr_info("%*s ", DECODE_STR_LEN, decode);
+		pr_info("%*s ", decode_str_len, decode);
 		pr_info("%10llu ", (unsigned long long)ecount);
 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
@@ -1132,6 +1132,11 @@
 		_p;			\
 	})
 
+int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
+{
+	return 0;
+}
+
 static int
 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 {
@@ -1148,7 +1153,14 @@
 		NULL
 	};
 	const char * const *events_tp;
+	int ret;
+
 	events_tp_size = 0;
+	ret = setup_kvm_events_tp(kvm);
+	if (ret < 0) {
+		pr_err("Unable to setup the kvm tracepoints\n");
+		return ret;
+	}
 
 	for (events_tp = kvm_events_tp; *events_tp; events_tp++)
 		events_tp_size++;
@@ -1377,6 +1389,12 @@
 	/*
 	 * generate the event list
 	 */
+	err = setup_kvm_events_tp(kvm);
+	if (err < 0) {
+		pr_err("Unable to setup the kvm tracepoints\n");
+		return err;
+	}
+
 	kvm->evlist = kvm_live_event_list();
 	if (kvm->evlist == NULL) {
 		err = -1;

diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 3901700..88aeac9 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c

@@ -6,6 +6,8 @@
 #include "util/tool.h"
 #include "util/session.h"
 #include "util/data.h"
+#include "util/mem-events.h"
+#include "util/debug.h"
 
 #define MEM_OPERATION_LOAD	0x1
 #define MEM_OPERATION_STORE	0x2
@@ -21,11 +23,56 @@
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+static int parse_record_events(const struct option *opt,
+			       const char *str, int unset __maybe_unused)
+{
+	struct perf_mem *mem = *(struct perf_mem **)opt->value;
+	int j;
+
+	if (strcmp(str, "list")) {
+		if (!perf_mem_events__parse(str)) {
+			mem->operation = 0;
+			return 0;
+		}
+		exit(-1);
+	}
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		struct perf_mem_event *e = &perf_mem_events[j];
+
+		fprintf(stderr, "%-13s%-*s%s\n",
+			e->tag,
+			verbose ? 25 : 0,
+			verbose ? perf_mem_events__name(j) : "",
+			e->supported ? ": available" : "");
+	}
+	exit(0);
+}
+
+static const char * const __usage[] = {
+	"perf mem record [<options>] [<command>]",
+	"perf mem record [<options>] -- <command> [<options>]",
+	NULL
+};
+
+static const char * const *record_mem_usage = __usage;
+
 static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 {
 	int rec_argc, i = 0, j;
 	const char **rec_argv;
 	int ret;
+	struct option options[] = {
+	OPT_CALLBACK('e', "event", &mem, "event",
+		     "event selector. use 'perf mem record -e list' to list available events",
+		     parse_record_events),
+	OPT_INCR('v', "verbose", &verbose,
+		 "be more verbose (show counter open errors, etc)"),
+	OPT_END()
+	};
+
+	argc = parse_options(argc, argv, options, record_mem_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
 
 	rec_argc = argc + 7; /* max number of arguments */
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
@@ -35,23 +82,40 @@
 	rec_argv[i++] = "record";
 
 	if (mem->operation & MEM_OPERATION_LOAD)
+		perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+
+	if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
 		rec_argv[i++] = "-W";
 
 	rec_argv[i++] = "-d";
 
-	if (mem->operation & MEM_OPERATION_LOAD) {
-		rec_argv[i++] = "-e";
-		rec_argv[i++] = "cpu/mem-loads/pp";
-	}
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		if (!perf_mem_events[j].record)
+			continue;
 
-	if (mem->operation & MEM_OPERATION_STORE) {
-		rec_argv[i++] = "-e";
-		rec_argv[i++] = "cpu/mem-stores/pp";
-	}
+		if (!perf_mem_events[j].supported) {
+			pr_err("failed: event '%s' not supported\n",
+			       perf_mem_events__name(j));
+			return -1;
+		}
 
-	for (j = 1; j < argc; j++, i++)
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = perf_mem_events__name(j);
+	};
+
+	for (j = 0; j < argc; j++, i++)
 		rec_argv[i] = argv[j];
 
+	if (verbose > 0) {
+		pr_debug("calling: record ");
+
+		while (rec_argv[j]) {
+			pr_debug("%s ", rec_argv[j]);
+			j++;
+		}
+		pr_debug("\n");
+	}
+
 	ret = cmd_record(i, rec_argv, NULL);
 	free(rec_argv);
 	return ret;
@@ -298,6 +362,10 @@
 		NULL
 	};
 
+	if (perf_mem_events__init()) {
+		pr_err("failed: memory events not supported\n");
+		return -1;
+	}
 
 	argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
 					mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 319712a..515510e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c

@@ -32,6 +32,8 @@
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
+#include "util/bpf-loader.h"
+#include "asm/bug.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -49,7 +51,9 @@
 	const char		*progname;
 	int			realtime_prio;
 	bool			no_buildid;
+	bool			no_buildid_set;
 	bool			no_buildid_cache;
+	bool			no_buildid_cache_set;
 	bool			buildid_all;
 	unsigned long long	samples;
 };
@@ -320,7 +324,10 @@
 		} else {
 			pr_err("failed to mmap with %d (%s)\n", errno,
 				strerror_r(errno, msg, sizeof(msg)));
-			rc = -errno;
+			if (errno)
+				rc = -errno;
+			else
+				rc = -EINVAL;
 		}
 		goto out;
 	}
@@ -464,6 +471,29 @@
 	perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
+static void
+record__finish_output(struct record *rec)
+{
+	struct perf_data_file *file = &rec->file;
+	int fd = perf_data_file__fd(file);
+
+	if (file->is_pipe)
+		return;
+
+	rec->session->header.data_size += rec->bytes_written;
+	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
+
+	if (!rec->no_buildid) {
+		process_buildids(rec);
+
+		if (rec->buildid_all)
+			dsos__hit_all(rec->session);
+	}
+	perf_session__write_header(rec->session, rec->evlist, fd, true);
+
+	return;
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -482,6 +512,74 @@
 
 static void snapshot_sig_handler(int sig);
 
+static int record__synthesize(struct record *rec)
+{
+	struct perf_session *session = rec->session;
+	struct machine *machine = &session->machines.host;
+	struct perf_data_file *file = &rec->file;
+	struct record_opts *opts = &rec->opts;
+	struct perf_tool *tool = &rec->tool;
+	int fd = perf_data_file__fd(file);
+	int err = 0;
+
+	if (file->is_pipe) {
+		err = perf_event__synthesize_attrs(tool, session,
+						   process_synthesized_event);
+		if (err < 0) {
+			pr_err("Couldn't synthesize attrs.\n");
+			goto out;
+		}
+
+		if (have_tracepoints(&rec->evlist->entries)) {
+			/*
+			 * FIXME err <= 0 here actually means that
+			 * there were no tracepoints so its not really
+			 * an error, just that we don't need to
+			 * synthesize anything.  We really have to
+			 * return this more properly and also
+			 * propagate errors that now are calling die()
+			 */
+			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
+								  process_synthesized_event);
+			if (err <= 0) {
+				pr_err("Couldn't record tracing data.\n");
+				goto out;
+			}
+			rec->bytes_written += err;
+		}
+	}
+
+	if (rec->opts.full_auxtrace) {
+		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+					session, process_synthesized_event);
+		if (err)
+			goto out;
+	}
+
+	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+						 machine);
+	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/kallsyms permission or run as root.\n");
+
+	err = perf_event__synthesize_modules(tool, process_synthesized_event,
+					     machine);
+	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+			   "Check /proc/modules permission or run as root.\n");
+
+	if (perf_guest) {
+		machines__process_guests(&session->machines,
+					 perf_event__synthesize_guest_os, tool);
+	}
+
+	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+					    process_synthesized_event, opts->sample_address,
+					    opts->proc_map_timeout);
+out:
+	return err;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
 	int err;
@@ -534,6 +632,16 @@
 		goto out_child;
 	}
 
+	err = bpf__apply_obj_config();
+	if (err) {
+		char errbuf[BUFSIZ];
+
+		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+		pr_err("ERROR: Apply config to BPF failed: %s\n",
+			 errbuf);
+		goto out_child;
+	}
+
 	/*
 	 * Normally perf_session__new would do this, but it doesn't have the
 	 * evlist.
@@ -566,63 +674,8 @@
 
 	machine = &session->machines.host;
 
-	if (file->is_pipe) {
-		err = perf_event__synthesize_attrs(tool, session,
-						   process_synthesized_event);
-		if (err < 0) {
-			pr_err("Couldn't synthesize attrs.\n");
-			goto out_child;
-		}
-
-		if (have_tracepoints(&rec->evlist->entries)) {
-			/*
-			 * FIXME err <= 0 here actually means that
-			 * there were no tracepoints so its not really
-			 * an error, just that we don't need to
-			 * synthesize anything.  We really have to
-			 * return this more properly and also
-			 * propagate errors that now are calling die()
-			 */
-			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
-								  process_synthesized_event);
-			if (err <= 0) {
-				pr_err("Couldn't record tracing data.\n");
-				goto out_child;
-			}
-			rec->bytes_written += err;
-		}
-	}
-
-	if (rec->opts.full_auxtrace) {
-		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
-					session, process_synthesized_event);
-		if (err)
-			goto out_delete_session;
-	}
-
-	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-						 machine);
+	err = record__synthesize(rec);
 	if (err < 0)
-		pr_err("Couldn't record kernel reference relocation symbol\n"
-		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-		       "Check /proc/kallsyms permission or run as root.\n");
-
-	err = perf_event__synthesize_modules(tool, process_synthesized_event,
-					     machine);
-	if (err < 0)
-		pr_err("Couldn't record kernel module information.\n"
-		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-		       "Check /proc/modules permission or run as root.\n");
-
-	if (perf_guest) {
-		machines__process_guests(&session->machines,
-					 perf_event__synthesize_guest_os, tool);
-	}
-
-	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-					    process_synthesized_event, opts->sample_address,
-					    opts->proc_map_timeout);
-	if (err != 0)
 		goto out_child;
 
 	if (rec->realtime_prio) {
@@ -758,18 +811,8 @@
 	/* this will be recalculated during process_buildids() */
 	rec->samples = 0;
 
-	if (!err && !file->is_pipe) {
-		rec->session->header.data_size += rec->bytes_written;
-		file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-
-		if (!rec->no_buildid) {
-			process_buildids(rec);
-
-			if (rec->buildid_all)
-				dsos__hit_all(rec->session);
-		}
-		perf_session__write_header(rec->session, rec->evlist, fd, true);
-	}
+	if (!err)
+		record__finish_output(rec);
 
 	if (!err && !quiet) {
 		char samples[128];
@@ -1097,10 +1140,12 @@
 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
 		    "don't sample"),
-	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
-		    "do not update the buildid cache"),
-	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
-		    "do not collect buildids in perf.data"),
+	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
+			&record.no_buildid_cache_set,
+			"do not update the buildid cache"),
+	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
+			&record.no_buildid_set,
+			"do not collect buildids in perf.data"),
 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
 		     "monitor event in cgroup name only",
 		     parse_cgroups),
@@ -1136,6 +1181,12 @@
 			"per thread proc mmap processing timeout in ms"),
 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
 		    "Record context switch events"),
+	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
+			 "Configure all used events to run in kernel space.",
+			 PARSE_OPT_EXCLUSIVE),
+	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
+			 "Configure all used events to run in user space.",
+			 PARSE_OPT_EXCLUSIVE),
 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
 		   "clang binary to use for compiling BPF scriptlets"),
 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2bf537f..7eea49f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c

@@ -75,7 +75,10 @@
 		return 0;
 	}
 	if (!strcmp(var, "report.percent-limit")) {
-		rep->min_percent = strtof(value, NULL);
+		double pcnt = strtof(value, NULL);
+
+		rep->min_percent = pcnt;
+		callchain_param.min_percent = pcnt;
 		return 0;
 	}
 	if (!strcmp(var, "report.children")) {
@@ -87,7 +90,7 @@
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static int hist_iter__report_callback(struct hist_entry_iter *iter,
@@ -466,10 +469,11 @@
 	return ret;
 }
 
-static void report__collapse_hists(struct report *rep)
+static int report__collapse_hists(struct report *rep)
 {
 	struct ui_progress prog;
 	struct perf_evsel *pos;
+	int ret = 0;
 
 	ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
 
@@ -481,7 +485,9 @@
 
 		hists->socket_filter = rep->socket_filter;
 
-		hists__collapse_resort(hists, &prog);
+		ret = hists__collapse_resort(hists, &prog);
+		if (ret < 0)
+			break;
 
 		/* Non-group events are considered as leader */
 		if (symbol_conf.event_group &&
@@ -494,6 +500,7 @@
 	}
 
 	ui_progress__finish();
+	return ret;
 }
 
 static void report__output_resort(struct report *rep)
@@ -504,7 +511,7 @@
 	ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
 
 	evlist__for_each(rep->session->evlist, pos)
-		hists__output_resort(evsel__hists(pos), &prog);
+		perf_evsel__output_resort(pos, &prog);
 
 	ui_progress__finish();
 }
@@ -561,7 +568,11 @@
 		}
 	}
 
-	report__collapse_hists(rep);
+	ret = report__collapse_hists(rep);
+	if (ret) {
+		ui__error("failed to process hist entry\n");
+		return ret;
+	}
 
 	if (session_done())
 		return 0;
@@ -633,8 +644,10 @@
 		    int unset __maybe_unused)
 {
 	struct report *rep = opt->value;
+	double pcnt = strtof(str, NULL);
 
-	rep->min_percent = strtof(str, NULL);
+	rep->min_percent = pcnt;
+	callchain_param.min_percent = pcnt;
 	return 0;
 }
 
@@ -798,6 +811,8 @@
 		    "only show processor socket that match with this filter"),
 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
 		    "Show raw trace event output (do not use print fmt or plugins)"),
+	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+		    "Show entries in a hierarchy"),
 	OPT_END()
 	};
 	struct perf_data_file file = {
@@ -907,13 +922,19 @@
 		symbol_conf.cumulate_callchain = false;
 	}
 
-	if (setup_sorting(session->evlist) < 0) {
-		if (sort_order)
-			parse_options_usage(report_usage, options, "s", 1);
-		if (field_order)
-			parse_options_usage(sort_order ? NULL : report_usage,
-					    options, "F", 1);
-		goto error;
+	if (symbol_conf.report_hierarchy) {
+		/* disable incompatible options */
+		symbol_conf.event_group = false;
+		symbol_conf.cumulate_callchain = false;
+
+		if (field_order) {
+			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+			parse_options_usage(report_usage, options, "F", 1);
+			parse_options_usage(NULL, options, "hierarchy", 0);
+			goto error;
+		}
+
+		sort__need_collapse = true;
 	}
 
 	/* Force tty output for header output and per-thread stat. */
@@ -925,6 +946,15 @@
 	else
 		use_browser = 0;
 
+	if (setup_sorting(session->evlist) < 0) {
+		if (sort_order)
+			parse_options_usage(report_usage, options, "s", 1);
+		if (field_order)
+			parse_options_usage(sort_order ? NULL : report_usage,
+					    options, "F", 1);
+		goto error;
+	}
+
 	if (report.header || report.header_only) {
 		perf_session__fprintf_info(session, stdout,
 					   report.show_full_info);

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c691214..57f9a7e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c

@@ -23,6 +23,7 @@
 #include "util/stat.h"
 #include <linux/bitmap.h>
 #include "asm/bug.h"
+#include "util/mem-events.h"
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -58,6 +59,9 @@
 	PERF_OUTPUT_IREGS	    = 1U << 14,
 	PERF_OUTPUT_BRSTACK	    = 1U << 15,
 	PERF_OUTPUT_BRSTACKSYM	    = 1U << 16,
+	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
+	PERF_OUTPUT_WEIGHT	    = 1U << 18,
+	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
 };
 
 struct output_option {
@@ -81,6 +85,9 @@
 	{.str = "iregs", .field = PERF_OUTPUT_IREGS},
 	{.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
 	{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
+	{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
+	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
+	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
 };
 
 /* default set to maintain compatibility with current format */
@@ -101,7 +108,7 @@
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
 			      PERF_OUTPUT_PERIOD,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 
 	[PERF_TYPE_SOFTWARE] = {
@@ -111,7 +118,7 @@
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
 
 		.invalid_fields = PERF_OUTPUT_TRACE,
 	},
@@ -121,7 +128,7 @@
 
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 				  PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE,
+				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
 	},
 
 	[PERF_TYPE_RAW] = {
@@ -131,9 +138,10 @@
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
+			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 
 	[PERF_TYPE_BREAKPOINT] = {
@@ -145,7 +153,7 @@
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
 			      PERF_OUTPUT_PERIOD,
 
-		.invalid_fields = PERF_OUTPUT_TRACE,
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
 };
 
@@ -242,6 +250,16 @@
 					   PERF_OUTPUT_ADDR, allow_user_set))
 		return -EINVAL;
 
+	if (PRINT_FIELD(DATA_SRC) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
+					PERF_OUTPUT_DATA_SRC))
+		return -EINVAL;
+
+	if (PRINT_FIELD(WEIGHT) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
+					PERF_OUTPUT_WEIGHT))
+		return -EINVAL;
+
 	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
 		pr_err("Display of symbols requested but neither sample IP nor "
 			   "sample address\nis selected. Hence, no addresses to convert "
@@ -608,6 +626,84 @@
 	printf("  %-4s ", str);
 }
 
+struct printer_data {
+	int line_no;
+	bool hit_nul;
+	bool is_printable;
+};
+
+static void
+print_sample_bpf_output_printer(enum binary_printer_ops op,
+				unsigned int val,
+				void *extra)
+{
+	unsigned char ch = (unsigned char)val;
+	struct printer_data *printer_data = extra;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printf("\n");
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printf("%17s", !printer_data->line_no ? "BPF output:" :
+						        "           ");
+		break;
+	case BINARY_PRINT_ADDR:
+		printf(" %04x:", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		printf(" %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		printf("   ");
+		break;
+	case BINARY_PRINT_SEP:
+		printf("  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		if (printer_data->hit_nul && ch)
+			printer_data->is_printable = false;
+
+		if (!isprint(ch)) {
+			printf("%c", '.');
+
+			if (!printer_data->is_printable)
+				break;
+
+			if (ch == '\0')
+				printer_data->hit_nul = true;
+			else
+				printer_data->is_printable = false;
+		} else {
+			printf("%c", ch);
+		}
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		printf(" ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		printf("\n");
+		printer_data->line_no++;
+		break;
+	case BINARY_PRINT_DATA_END:
+	default:
+		break;
+	}
+}
+
+static void print_sample_bpf_output(struct perf_sample *sample)
+{
+	unsigned int nr_bytes = sample->raw_size;
+	struct printer_data printer_data = {0, false, true};
+
+	print_binary(sample->raw_data, nr_bytes, 8,
+		     print_sample_bpf_output_printer, &printer_data);
+
+	if (printer_data.is_printable && printer_data.hit_nul)
+		printf("%17s \"%s\"\n", "BPF string:",
+		       (char *)(sample->raw_data));
+}
+
 struct perf_script {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -634,6 +730,23 @@
 	return max;
 }
 
+static size_t data_src__printf(u64 data_src)
+{
+	struct mem_info mi = { .data_src.val = data_src };
+	char decode[100];
+	char out[100];
+	static int maxlen;
+	int len;
+
+	perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+	len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode);
+	if (maxlen < len)
+		maxlen = len;
+
+	return printf("%-*s", maxlen, out);
+}
+
 static void process_event(struct perf_script *script, union perf_event *event,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al)
@@ -673,6 +786,12 @@
 	if (PRINT_FIELD(ADDR))
 		print_sample_addr(event, sample, thread, attr);
 
+	if (PRINT_FIELD(DATA_SRC))
+		data_src__printf(sample->data_src);
+
+	if (PRINT_FIELD(WEIGHT))
+		printf("%16" PRIu64, sample->weight);
+
 	if (PRINT_FIELD(IP)) {
 		if (!symbol_conf.use_callchain)
 			printf(" ");
@@ -692,6 +811,9 @@
 	else if (PRINT_FIELD(BRSTACKSYM))
 		print_sample_brstacksym(event, sample, thread, attr);
 
+	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+		print_sample_bpf_output(sample);
+
 	printf("\n");
 }
 
@@ -1090,23 +1212,6 @@
 	return NULL;
 }
 
-static struct script_spec *script_spec__findnew(const char *spec,
-						struct scripting_ops *ops)
-{
-	struct script_spec *s = script_spec__find(spec);
-
-	if (s)
-		return s;
-
-	s = script_spec__new(spec, ops);
-	if (!s)
-		return NULL;
-
-	script_spec__add(s);
-
-	return s;
-}
-
 int script_spec_register(const char *spec, struct scripting_ops *ops)
 {
 	struct script_spec *s;
@@ -1115,9 +1220,11 @@
 	if (s)
 		return -1;
 
-	s = script_spec__findnew(spec, ops);
+	s = script_spec__new(spec, ops);
 	if (!s)
 		return -1;
+	else
+		script_spec__add(s);
 
 	return 0;
 }

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 038e877..1f19f2f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c

@@ -122,6 +122,7 @@
 static unsigned int		initial_delay			= 0;
 static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
+static bool			metric_only			= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
@@ -735,6 +736,191 @@
 	}
 }
 
+struct outstate {
+	FILE *fh;
+	bool newline;
+	const char *prefix;
+	int  nfields;
+	int  id, nr;
+	struct perf_evsel *evsel;
+};
+
+#define METRIC_LEN  35
+
+static void new_line_std(void *ctx)
+{
+	struct outstate *os = ctx;
+
+	os->newline = true;
+}
+
+static void do_new_line_std(struct outstate *os)
+{
+	fputc('\n', os->fh);
+	fputs(os->prefix, os->fh);
+	aggr_printout(os->evsel, os->id, os->nr);
+	if (stat_config.aggr_mode == AGGR_NONE)
+		fprintf(os->fh, "        ");
+	fprintf(os->fh, "                                                 ");
+}
+
+static void print_metric_std(void *ctx, const char *color, const char *fmt,
+			     const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	bool newline = os->newline;
+
+	os->newline = false;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%-*s", METRIC_LEN, "");
+		return;
+	}
+
+	if (newline)
+		do_new_line_std(os);
+
+	n = fprintf(out, " # ");
+	if (color)
+		n += color_fprintf(out, color, fmt, val);
+	else
+		n += fprintf(out, fmt, val);
+	fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(void *ctx)
+{
+	struct outstate *os = ctx;
+	int i;
+
+	fputc('\n', os->fh);
+	if (os->prefix)
+		fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+	aggr_printout(os->evsel, os->id, os->nr);
+	for (i = 0; i < os->nfields; i++)
+		fputs(csv_sep, os->fh);
+}
+
+static void print_metric_csv(void *ctx,
+			     const char *color __maybe_unused,
+			     const char *fmt, const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+
+	if (unit == NULL || fmt == NULL) {
+		fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+		return;
+	}
+	snprintf(buf, sizeof(buf), fmt, val);
+	vals = buf;
+	while (isspace(*vals))
+		vals++;
+	ends = vals;
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	while (isspace(*unit))
+		unit++;
+	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
+}
+
+#define METRIC_ONLY_LEN 20
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+	if (!unit)
+		return false;
+	if (strstr(unit, "/sec") ||
+	    strstr(unit, "hz") ||
+	    strstr(unit, "Hz") ||
+	    strstr(unit, "CPUs utilized"))
+		return false;
+	return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+			   const char *unit)
+{
+	if (!strncmp(unit, "of all", 6)) {
+		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+			 unit);
+		return buf;
+	}
+	return unit;
+}
+
+static void print_metric_only(void *ctx, const char *color, const char *fmt,
+			      const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	char buf[1024];
+	unsigned mlen = METRIC_ONLY_LEN;
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(buf, os->evsel, unit);
+	if (color)
+		n = color_fprintf(out, color, fmt, val);
+	else
+		n = fprintf(out, fmt, val);
+	if (n > METRIC_ONLY_LEN)
+		n = METRIC_ONLY_LEN;
+	if (mlen < strlen(unit))
+		mlen = strlen(unit) + 1;
+	fprintf(out, "%*s", mlen - n, "");
+}
+
+static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof buf, fmt, val);
+	vals = buf;
+	while (isspace(*vals))
+		vals++;
+	ends = vals;
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s", vals, csv_sep);
+}
+
+static void new_line_metric(void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(void *ctx, const char *color __maybe_unused,
+				const char *fmt __maybe_unused,
+				const char *unit, double val __maybe_unused)
+{
+	struct outstate *os = ctx;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	if (csv_output)
+		fprintf(os->fh, "%s%s", unit, csv_sep);
+	else
+		fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
+}
+
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -763,6 +949,28 @@
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+	int i;
+
+	if (!aggr_get_id)
+		return 0;
+
+	if (stat_config.aggr_mode == AGGR_NONE)
+		return id;
+
+	if (stat_config.aggr_mode == AGGR_GLOBAL)
+		return 0;
+
+	for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+		int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+		if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+			return cpu2;
+	}
+	return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -793,22 +1001,124 @@
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
-static void printout(int id, int nr, struct perf_evsel *counter, double uval)
+static void printout(int id, int nr, struct perf_evsel *counter, double uval,
+		     char *prefix, u64 run, u64 ena, double noise)
 {
-	int cpu = cpu_map__id_to_cpu(id);
+	struct perf_stat_output_ctx out;
+	struct outstate os = {
+		.fh = stat_config.output,
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = nr,
+		.evsel = counter,
+	};
+	print_metric_t pm = print_metric_std;
+	void (*nl)(void *);
 
-	if (stat_config.aggr_mode == AGGR_GLOBAL)
-		cpu = 0;
+	if (metric_only) {
+		nl = new_line_metric;
+		if (csv_output)
+			pm = print_metric_only_csv;
+		else
+			pm = print_metric_only;
+	} else
+		nl = new_line_std;
 
-	if (nsec_counter(counter))
+	if (csv_output && !metric_only) {
+		static int aggr_fields[] = {
+			[AGGR_GLOBAL] = 0,
+			[AGGR_THREAD] = 1,
+			[AGGR_NONE] = 1,
+			[AGGR_SOCKET] = 2,
+			[AGGR_CORE] = 2,
+		};
+
+		pm = print_metric_csv;
+		nl = new_line_csv;
+		os.nfields = 3;
+		os.nfields += aggr_fields[stat_config.aggr_mode];
+		if (counter->cgrp)
+			os.nfields++;
+	}
+	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+		if (metric_only) {
+			pm(&os, NULL, "", "", 0);
+			return;
+		}
+		aggr_printout(counter, id, nr);
+
+		fprintf(stat_config.output, "%*s%s",
+			csv_output ? 0 : 18,
+			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+			csv_sep);
+
+		fprintf(stat_config.output, "%-*s%s",
+			csv_output ? 0 : unit_width,
+			counter->unit, csv_sep);
+
+		fprintf(stat_config.output, "%*s",
+			csv_output ? 0 : -25,
+			perf_evsel__name(counter));
+
+		if (counter->cgrp)
+			fprintf(stat_config.output, "%s%s",
+				csv_sep, counter->cgrp->name);
+
+		if (!csv_output)
+			pm(&os, NULL, NULL, "", 0);
+		print_noise(counter, noise);
+		print_running(run, ena);
+		if (csv_output)
+			pm(&os, NULL, NULL, "", 0);
+		return;
+	}
+
+	if (metric_only)
+		/* nothing */;
+	else if (nsec_counter(counter))
 		nsec_printout(id, nr, counter, uval);
 	else
 		abs_printout(id, nr, counter, uval);
 
-	if (!csv_output && !stat_config.interval)
-		perf_stat__print_shadow_stats(stat_config.output, counter,
-					      uval, cpu,
-					      stat_config.aggr_mode);
+	out.print_metric = pm;
+	out.new_line = nl;
+	out.ctx = &os;
+
+	if (csv_output && !metric_only) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+
+	perf_stat__print_shadow_stats(counter, uval,
+				first_shadow_cpu(counter, id),
+				&out);
+	if (!csv_output && !metric_only) {
+		print_noise(counter, noise);
+		print_running(run, ena);
+	}
+}
+
+static void aggr_update_shadow(void)
+{
+	int cpu, s2, id, s;
+	u64 val;
+	struct perf_evsel *counter;
+
+	for (s = 0; s < aggr_map->nr; s++) {
+		id = aggr_map->map[s];
+		evlist__for_each(evsel_list, counter) {
+			val = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = aggr_get_id(evsel_list->cpus, cpu);
+				if (s2 != id)
+					continue;
+				val += perf_counts(counter->counts, cpu, 0)->val;
+			}
+			val = val * counter->scale;
+			perf_stat__update_shadow_stats(counter, &val,
+						       first_shadow_cpu(counter, id));
+		}
+	}
 }
 
 static void print_aggr(char *prefix)
@@ -818,12 +1128,23 @@
 	int cpu, s, s2, id, nr;
 	double uval;
 	u64 ena, run, val;
+	bool first;
 
 	if (!(aggr_map || aggr_get_id))
 		return;
 
+	aggr_update_shadow();
+
+	/*
+	 * With metric_only everything is on a single line.
+	 * Without each counter has its own line.
+	 */
 	for (s = 0; s < aggr_map->nr; s++) {
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
 		id = aggr_map->map[s];
+		first = true;
 		evlist__for_each(evsel_list, counter) {
 			val = ena = run = 0;
 			nr = 0;
@@ -836,41 +1157,20 @@
 				run += perf_counts(counter->counts, cpu, 0)->run;
 				nr++;
 			}
-			if (prefix)
+			if (first && metric_only) {
+				first = false;
+				aggr_printout(counter, id, nr);
+			}
+			if (prefix && !metric_only)
 				fprintf(output, "%s", prefix);
 
-			if (run == 0 || ena == 0) {
-				aggr_printout(counter, id, nr);
-
-				fprintf(output, "%*s%s",
-					csv_output ? 0 : 18,
-					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-					csv_sep);
-
-				fprintf(output, "%-*s%s",
-					csv_output ? 0 : unit_width,
-					counter->unit, csv_sep);
-
-				fprintf(output, "%*s",
-					csv_output ? 0 : -25,
-					perf_evsel__name(counter));
-
-				if (counter->cgrp)
-					fprintf(output, "%s%s",
-						csv_sep, counter->cgrp->name);
-
-				print_running(run, ena);
-				fputc('\n', output);
-				continue;
-			}
 			uval = val * counter->scale;
-			printout(id, nr, counter, uval);
-			if (!csv_output)
-				print_noise(counter, 1.0);
-
-			print_running(run, ena);
-			fputc('\n', output);
+			printout(id, nr, counter, uval, prefix, run, ena, 1.0);
+			if (!metric_only)
+				fputc('\n', output);
 		}
+		if (metric_only)
+			fputc('\n', output);
 	}
 }
 
@@ -895,12 +1195,7 @@
 			fprintf(output, "%s", prefix);
 
 		uval = val * counter->scale;
-		printout(thread, 0, counter, uval);
-
-		if (!csv_output)
-			print_noise(counter, 1.0);
-
-		print_running(run, ena);
+		printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
 		fputc('\n', output);
 	}
 }
@@ -914,43 +1209,19 @@
 	FILE *output = stat_config.output;
 	struct perf_stat_evsel *ps = counter->priv;
 	double avg = avg_stats(&ps->res_stats[0]);
-	int scaled = counter->counts->scaled;
 	double uval;
 	double avg_enabled, avg_running;
 
 	avg_enabled = avg_stats(&ps->res_stats[1]);
 	avg_running = avg_stats(&ps->res_stats[2]);
 
-	if (prefix)
+	if (prefix && !metric_only)
 		fprintf(output, "%s", prefix);
 
-	if (scaled == -1 || !counter->supported) {
-		fprintf(output, "%*s%s",
-			csv_output ? 0 : 18,
-			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-			csv_sep);
-		fprintf(output, "%-*s%s",
-			csv_output ? 0 : unit_width,
-			counter->unit, csv_sep);
-		fprintf(output, "%*s",
-			csv_output ? 0 : -25,
-			perf_evsel__name(counter));
-
-		if (counter->cgrp)
-			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
-
-		print_running(avg_running, avg_enabled);
-		fputc('\n', output);
-		return;
-	}
-
 	uval = avg * counter->scale;
-	printout(-1, 0, counter, uval);
-
-	print_noise(counter, avg);
-
-	print_running(avg_running, avg_enabled);
-	fprintf(output, "\n");
+	printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
+	if (!metric_only)
+		fprintf(output, "\n");
 }
 
 /*
@@ -972,41 +1243,80 @@
 		if (prefix)
 			fprintf(output, "%s", prefix);
 
-		if (run == 0 || ena == 0) {
-			fprintf(output, "CPU%*d%s%*s%s",
-				csv_output ? 0 : -4,
-				perf_evsel__cpus(counter)->map[cpu], csv_sep,
-				csv_output ? 0 : 18,
-				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-				csv_sep);
-
-				fprintf(output, "%-*s%s",
-					csv_output ? 0 : unit_width,
-					counter->unit, csv_sep);
-
-				fprintf(output, "%*s",
-					csv_output ? 0 : -25,
-					perf_evsel__name(counter));
-
-			if (counter->cgrp)
-				fprintf(output, "%s%s",
-					csv_sep, counter->cgrp->name);
-
-			print_running(run, ena);
-			fputc('\n', output);
-			continue;
-		}
-
 		uval = val * counter->scale;
-		printout(cpu, 0, counter, uval);
-		if (!csv_output)
-			print_noise(counter, 1.0);
-		print_running(run, ena);
+		printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
 
 		fputc('\n', output);
 	}
 }
 
+static void print_no_aggr_metric(char *prefix)
+{
+	int cpu;
+	int nrcpus = 0;
+	struct perf_evsel *counter;
+	u64 ena, run, val;
+	double uval;
+
+	nrcpus = evsel_list->cpus->nr;
+	for (cpu = 0; cpu < nrcpus; cpu++) {
+		bool first = true;
+
+		if (prefix)
+			fputs(prefix, stat_config.output);
+		evlist__for_each(evsel_list, counter) {
+			if (first) {
+				aggr_printout(counter, cpu, 0);
+				first = false;
+			}
+			val = perf_counts(counter->counts, cpu, 0)->val;
+			ena = perf_counts(counter->counts, cpu, 0)->ena;
+			run = perf_counts(counter->counts, cpu, 0)->run;
+
+			uval = val * counter->scale;
+			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
+		}
+		fputc('\n', stat_config.output);
+	}
+}
+
+static int aggr_header_lens[] = {
+	[AGGR_CORE] = 18,
+	[AGGR_SOCKET] = 12,
+	[AGGR_NONE] = 6,
+	[AGGR_THREAD] = 24,
+	[AGGR_GLOBAL] = 0,
+};
+
+static void print_metric_headers(char *prefix)
+{
+	struct perf_stat_output_ctx out;
+	struct perf_evsel *counter;
+	struct outstate os = {
+		.fh = stat_config.output
+	};
+
+	if (prefix)
+		fprintf(stat_config.output, "%s", prefix);
+
+	if (!csv_output)
+		fprintf(stat_config.output, "%*s",
+			aggr_header_lens[stat_config.aggr_mode], "");
+
+	/* Print metrics headers only */
+	evlist__for_each(evsel_list, counter) {
+		os.evsel = counter;
+		out.ctx = &os;
+		out.print_metric = print_metric_header;
+		out.new_line = new_line_metric;
+		os.evsel = counter;
+		perf_stat__print_shadow_stats(counter, 0,
+					      0,
+					      &out);
+	}
+	fputc('\n', stat_config.output);
+}
+
 static void print_interval(char *prefix, struct timespec *ts)
 {
 	FILE *output = stat_config.output;
@@ -1014,7 +1324,7 @@
 
 	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-	if (num_print_interval == 0 && !csv_output) {
+	if (num_print_interval == 0 && !csv_output && !metric_only) {
 		switch (stat_config.aggr_mode) {
 		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
@@ -1101,6 +1411,17 @@
 	else
 		print_header(argc, argv);
 
+	if (metric_only) {
+		static int num_print_iv;
+
+		if (num_print_iv == 0)
+			print_metric_headers(prefix);
+		if (num_print_iv++ == 25)
+			num_print_iv = 0;
+		if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
+			fprintf(stat_config.output, "%s", prefix);
+	}
+
 	switch (stat_config.aggr_mode) {
 	case AGGR_CORE:
 	case AGGR_SOCKET:
@@ -1113,10 +1434,16 @@
 	case AGGR_GLOBAL:
 		evlist__for_each(evsel_list, counter)
 			print_counter_aggr(counter, prefix);
+		if (metric_only)
+			fputc('\n', stat_config.output);
 		break;
 	case AGGR_NONE:
-		evlist__for_each(evsel_list, counter)
-			print_counter(counter, prefix);
+		if (metric_only)
+			print_no_aggr_metric(prefix);
+		else {
+			evlist__for_each(evsel_list, counter)
+				print_counter(counter, prefix);
+		}
 		break;
 	case AGGR_UNSET:
 	default:
@@ -1237,6 +1564,8 @@
 		     "aggregate counts per thread", AGGR_THREAD),
 	OPT_UINTEGER('D', "delay", &initial_delay,
 		     "ms to wait before starting measurement after program start"),
+	OPT_BOOLEAN(0, "metric-only", &metric_only,
+			"Only print computed metrics. No raw values"),
 	OPT_END()
 };
 
@@ -1435,7 +1764,7 @@
  */
 static int add_default_attributes(void)
 {
-	struct perf_event_attr default_attrs[] = {
+	struct perf_event_attr default_attrs0[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
@@ -1443,8 +1772,14 @@
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
+};
+	struct perf_event_attr frontend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
+};
+	struct perf_event_attr backend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
+};
+	struct perf_event_attr default_attrs1[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
@@ -1561,7 +1896,19 @@
 	}
 
 	if (!evsel_list->nr_entries) {
-		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+			return -1;
+		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						frontend_attrs) < 0)
+				return -1;
+		}
+		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+			if (perf_evlist__add_default_attrs(evsel_list,
+						backend_attrs) < 0)
+				return -1;
+		}
+		if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
 			return -1;
 	}
 
@@ -1825,9 +2172,11 @@
 	if (evsel_list == NULL)
 		return -ENOMEM;
 
+	parse_events__shrink_config_terms();
 	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
 					(const char **) stat_usage,
 					PARSE_OPT_STOP_AT_NON_OPTION);
+	perf_stat__init_shadow_stats();
 
 	if (csv_sep) {
 		csv_output = true;
@@ -1858,6 +2207,16 @@
 		goto out;
 	}
 
+	if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
+		goto out;
+	}
+
+	if (metric_only && run_count > 1) {
+		fprintf(stderr, "--metric-only is not supported with -r\n");
+		goto out;
+	}
+
 	if (output_fd < 0) {
 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
 		parse_options_usage(stat_usage, stat_options, "log-fd", 0);

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bf01cbb..94af190 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c

@@ -252,7 +252,8 @@
 	char bf[160];
 	int printed = 0;
 	const int win_width = top->winsize.ws_col - 1;
-	struct hists *hists = evsel__hists(top->sym_evsel);
+	struct perf_evsel *evsel = top->sym_evsel;
+	struct hists *hists = evsel__hists(evsel);
 
 	puts(CONSOLE_CLEAR);
 
@@ -288,7 +289,7 @@
 	}
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	hists__output_recalc_col_len(hists, top->print_entries - printed);
 	putchar('\n');
@@ -540,6 +541,7 @@
 static void perf_top__sort_new_samples(void *arg)
 {
 	struct perf_top *t = arg;
+	struct perf_evsel *evsel = t->sym_evsel;
 	struct hists *hists;
 
 	perf_top__reset_sample_counters(t);
@@ -547,7 +549,7 @@
 	if (t->evlist->selected != NULL)
 		t->sym_evsel = t->evlist->selected;
 
-	hists = evsel__hists(t->sym_evsel);
+	hists = evsel__hists(evsel);
 
 	if (t->evlist->enabled) {
 		if (t->zero) {
@@ -559,7 +561,7 @@
 	}
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 }
 
 static void *display_thread_tui(void *arg)
@@ -1063,7 +1065,7 @@
 	return parse_callchain_top_opt(arg);
 }
 
-static int perf_top_config(const char *var, const char *value, void *cb)
+static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
 	if (!strcmp(var, "top.call-graph"))
 		var = "call-graph.record-mode"; /* fall-through */
@@ -1072,7 +1074,7 @@
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static int
@@ -1212,6 +1214,8 @@
 		     parse_branch_stack),
 	OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
 		    "Show raw trace event output (do not use print fmt or plugins)"),
+	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+		    "Show entries in a hierarchy"),
 	OPT_END()
 	};
 	const char * const top_usage[] = {
@@ -1239,10 +1243,30 @@
 		goto out_delete_evlist;
 	}
 
+	if (symbol_conf.report_hierarchy) {
+		/* disable incompatible options */
+		symbol_conf.event_group = false;
+		symbol_conf.cumulate_callchain = false;
+
+		if (field_order) {
+			pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+			parse_options_usage(top_usage, options, "fields", 0);
+			parse_options_usage(NULL, options, "hierarchy", 0);
+			goto out_delete_evlist;
+		}
+	}
+
 	sort__mode = SORT_MODE__TOP;
 	/* display thread wants entries to be collapsed in a different tree */
 	sort__need_collapse = 1;
 
+	if (top.use_stdio)
+		use_browser = 0;
+	else if (top.use_tui)
+		use_browser = 1;
+
+	setup_browser(false);
+
 	if (setup_sorting(top.evlist) < 0) {
 		if (sort_order)
 			parse_options_usage(top_usage, options, "s", 1);
@@ -1252,13 +1276,6 @@
 		goto out_delete_evlist;
 	}
 
-	if (top.use_stdio)
-		use_browser = 0;
-	else if (top.use_tui)
-		use_browser = 1;
-
-	setup_browser(false);
-
 	status = target__validate(target);
 	if (status) {
 		target__strerror(target, status, errbuf, BUFSIZ);

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 20916dd..8dc98c5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c

@@ -33,6 +33,7 @@
 #include "util/stat.h"
 #include "trace-event.h"
 #include "util/parse-events.h"
+#include "util/bpf-loader.h"
 
 #include <libaudit.h>
 #include <stdlib.h>
@@ -1724,8 +1725,12 @@
 
 	sc->args = sc->tp_format->format.fields;
 	sc->nr_args = sc->tp_format->format.nr_fields;
-	/* drop nr field - not relevant here; does not exist on older kernels */
-	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
+	/*
+	 * We need to check and discard the first variable '__syscall_nr'
+	 * or 'nr' that mean the syscall number. It is needless here.
+	 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
+	 */
+	if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
 		sc->args = sc->args->next;
 		--sc->nr_args;
 	}
@@ -2177,6 +2182,37 @@
 	return 0;
 }
 
+static void bpf_output__printer(enum binary_printer_ops op,
+				unsigned int val, void *extra)
+{
+	FILE *output = extra;
+	unsigned char ch = (unsigned char)val;
+
+	switch (op) {
+	case BINARY_PRINT_CHAR_DATA:
+		fprintf(output, "%c", isprint(ch) ? ch : '.');
+		break;
+	case BINARY_PRINT_DATA_BEGIN:
+	case BINARY_PRINT_LINE_BEGIN:
+	case BINARY_PRINT_ADDR:
+	case BINARY_PRINT_NUM_DATA:
+	case BINARY_PRINT_NUM_PAD:
+	case BINARY_PRINT_SEP:
+	case BINARY_PRINT_CHAR_PAD:
+	case BINARY_PRINT_LINE_END:
+	case BINARY_PRINT_DATA_END:
+	default:
+		break;
+	}
+}
+
+static void bpf_output__fprintf(struct trace *trace,
+				struct perf_sample *sample)
+{
+	print_binary(sample->raw_data, sample->raw_size, 8,
+		     bpf_output__printer, trace->output);
+}
+
 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 				union perf_event *event __maybe_unused,
 				struct perf_sample *sample)
@@ -2189,7 +2225,9 @@
 
 	fprintf(trace->output, "%s:", evsel->name);
 
-	if (evsel->tp_format) {
+	if (perf_evsel__is_bpf_output(evsel)) {
+		bpf_output__fprintf(trace, sample);
+	} else if (evsel->tp_format) {
 		event_format__fprintf(evsel->tp_format, sample->cpu,
 				      sample->raw_data, sample->raw_size,
 				      trace->output);
@@ -2586,6 +2624,16 @@
 	if (err < 0)
 		goto out_error_open;
 
+	err = bpf__apply_obj_config();
+	if (err) {
+		char errbuf[BUFSIZ];
+
+		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+		pr_err("ERROR: Apply config to BPF failed: %s\n",
+			 errbuf);
+		goto out_error_open;
+	}
+
 	/*
 	 * Better not use !target__has_task() here because we need to cover the
 	 * case where no threads were specified in the command line, but a

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 511141b..eca6a91 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile

@@ -61,50 +61,45 @@
 
 ifeq ($(LIBUNWIND_LIBS),)
   NO_LIBUNWIND := 1
-else
-  #
-  # For linking with debug library, run like:
-  #
-  #   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
-  #
-  ifdef LIBUNWIND_DIR
-    LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
-    LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
-  endif
-  LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
-
-  # Set per-feature check compilation flags
-  FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
-  FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
 endif
+#
+# For linking with debug library, run like:
+#
+#   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
+#
+ifdef LIBUNWIND_DIR
+  LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
+  LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+endif
+LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
+
+# Set per-feature check compilation flags
+FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
 
 ifeq ($(NO_PERF_REGS),0)
   CFLAGS += -DHAVE_PERF_REGS_SUPPORT
 endif
 
-ifndef NO_LIBELF
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBDW_DIR=/opt/libdw/
-  ifdef LIBDW_DIR
-    LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
-    LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
-  endif
-  FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
+# for linking with debug library, run like:
+# make DEBUG=1 LIBDW_DIR=/opt/libdw/
+ifdef LIBDW_DIR
+  LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
+  LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
 endif
+FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
 
-ifdef LIBBABELTRACE
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
-  ifdef LIBBABELTRACE_DIR
-    LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
-    LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
-  endif
-  FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+# for linking with debug library, run like:
+# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
+ifdef LIBBABELTRACE_DIR
+  LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
+  LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
 endif
+FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
 FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
 # include ARCH specific config
@@ -145,28 +140,26 @@
   $(call detected_var,PARSER_DEBUG_FLEX)
 endif
 
-ifndef NO_LIBPYTHON
-  # Try different combinations to accommodate systems that only have
-  # python[2][-config] in weird combinations but always preferring
-  # python2 and python2-config as per pep-0394. If we catch a
-  # python[-config] in version 3, the version check will kill it.
-  PYTHON2 := $(if $(call get-executable,python2),python2,python)
-  override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
-  PYTHON2_CONFIG := \
-    $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
-  override PYTHON_CONFIG := \
-    $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
+# Try different combinations to accommodate systems that only have
+# python[2][-config] in weird combinations but always preferring
+# python2 and python2-config as per pep-0394. If we catch a
+# python[-config] in version 3, the version check will kill it.
+PYTHON2 := $(if $(call get-executable,python2),python2,python)
+override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
+PYTHON2_CONFIG := \
+  $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
+override PYTHON_CONFIG := \
+  $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
 
-  PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
 
-  PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
-  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
 
-  FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
-  FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
-  FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
-  FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
-endif
+FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
+FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
 
 CFLAGS += -fno-omit-frame-pointer
 CFLAGS += -ggdb3
@@ -335,6 +328,13 @@
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
+ifdef PERF_HAVE_JITDUMP
+  ifndef NO_DWARF
+    $(call detected,CONFIG_JITDUMP)
+    CFLAGS += -DHAVE_JITDUMP
+  endif
+endif
+
 ifeq ($(ARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
@@ -411,6 +411,17 @@
   endif
 endif
 
+ifndef NO_LIBCRYPTO
+  ifneq ($(feature-libcrypto), 1)
+    msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev);
+    NO_LIBCRYPTO := 1
+  else
+    CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
+    EXTLIBS += -lcrypto
+    $(call detected,CONFIG_CRYPTO)
+  endif
+endif
+
 ifdef NO_NEWT
   NO_SLANG=1
 endif

diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile
new file mode 100644
index 0000000..5ce61a1
--- /dev/null
+++ b/tools/perf/jvmti/Makefile

@@ -0,0 +1,89 @@
+ARCH=$(shell uname -m)
+
+ifeq ($(ARCH), x86_64)
+JARCH=amd64
+endif
+ifeq ($(ARCH), armv7l)
+JARCH=armhf
+endif
+ifeq ($(ARCH), armv6l)
+JARCH=armhf
+endif
+ifeq ($(ARCH), aarch64)
+JARCH=aarch64
+endif
+ifeq ($(ARCH), ppc64)
+JARCH=powerpc
+endif
+ifeq ($(ARCH), ppc64le)
+JARCH=powerpc
+endif
+
+DESTDIR=/usr/local
+
+VERSION=1
+REVISION=0
+AGE=0
+
+LN=ln -sf
+RM=rm
+
+SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE)
+VLIBJVMTI=libjvmti.so.$(VERSION)
+SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI)
+SOLIBEXT=so
+
+# The following works at least on fedora 23, you may need the next
+# line for other distros.
+ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
+JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3)
+else
+  ifneq (,$(wildcard /usr/sbin/alternatives))
+    JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+  endif
+endif
+ifndef JDIR
+$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory)
+else
+  ifeq (,$(wildcard $(JDIR)/include/jvmti.h))
+  $(error the openjdk development package appears to me missing, install and try again)
+  endif
+endif
+$(info Using Java from $(JDIR))
+# -lrt required in 32-bit mode for clock_gettime()
+LIBS=-lelf -lrt
+INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux
+
+TARGETS=$(SLIBJVMTI)
+
+SRCS=libjvmti.c jvmti_agent.c
+OBJS=$(SRCS:.c=.o)
+SOBJS=$(OBJS:.o=.lo)
+OPT=-O2 -g -Werror -Wall
+
+CFLAGS=$(INCDIR) $(OPT)
+
+all: $(TARGETS)
+
+.c.o:
+	$(CC) $(CFLAGS) -c $*.c
+.c.lo:
+	$(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo
+
+$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h
+
+$(SLIBJVMTI):  $(SOBJS)
+	$(CC) $(CFLAGS) $(SLDFLAGS)  -o $@ $(SOBJS) $(LIBS)
+	$(LN) $@ libjvmti.$(SOLIBEXT)
+
+clean:
+	$(RM) -f *.o *.so.* *.so *.lo
+
+install:
+	-mkdir -p $(DESTDIR)/lib
+	install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/
+	(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI))
+	(cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT))
+	ldconfig
+
+.SUFFIXES: .c .S .o .lo

diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
new file mode 100644
index 0000000..6461e02
--- /dev/null
+++ b/tools/perf/jvmti/jvmti_agent.c

@@ -0,0 +1,465 @@
+/*
+ * jvmti_agent.c: JVMTI agent interface
+ *
+ * Adapted from the Oprofile code in opagent.c:
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#include <sys/types.h>
+#include <sys/stat.h> /* for mkdir() */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <syscall.h> /* for gettid() */
+#include <err.h>
+
+#include "jvmti_agent.h"
+#include "../util/jitdump.h"
+
+#define JIT_LANG "java"
+
+static char jit_path[PATH_MAX];
+static void *marker_addr;
+
+/*
+ * padding buffer
+ */
+static const char pad_bytes[7];
+
+static inline pid_t gettid(void)
+{
+	return (pid_t)syscall(__NR_gettid);
+}
+
+static int get_e_machine(struct jitheader *hdr)
+{
+	ssize_t sret;
+	char id[16];
+	int fd, ret = -1;
+	int m = -1;
+	struct {
+		uint16_t e_type;
+		uint16_t e_machine;
+	} info;
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	if (fd == -1)
+		return -1;
+
+	sret = read(fd, id, sizeof(id));
+	if (sret != sizeof(id))
+		goto error;
+
+	/* check ELF signature */
+	if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F')
+		goto error;
+
+	sret = read(fd, &info, sizeof(info));
+	if (sret != sizeof(info))
+		goto error;
+
+	m = info.e_machine;
+	if (m < 0)
+		m = 0; /* ELF EM_NONE */
+
+	hdr->elf_mach = m;
+	ret = 0;
+error:
+	close(fd);
+	return ret;
+}
+
+#define NSEC_PER_SEC	1000000000
+static int perf_clk_id = CLOCK_MONOTONIC;
+
+static inline uint64_t
+timespec_to_ns(const struct timespec *ts)
+{
+        return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static inline uint64_t
+perf_get_timestamp(void)
+{
+	struct timespec ts;
+	int ret;
+
+	ret = clock_gettime(perf_clk_id, &ts);
+	if (ret)
+		return 0;
+
+	return timespec_to_ns(&ts);
+}
+
+static int
+debug_cache_init(void)
+{
+	char str[32];
+	char *base, *p;
+	struct tm tm;
+	time_t t;
+	int ret;
+
+	time(&t);
+	localtime_r(&t, &tm);
+
+	base = getenv("JITDUMPDIR");
+	if (!base)
+		base = getenv("HOME");
+	if (!base)
+		base = ".";
+
+	strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base);
+
+	ret = mkdir(jit_path, 0755);
+	if (ret == -1) {
+		if (errno != EEXIST) {
+			warn("jvmti: cannot create jit cache dir %s", jit_path);
+			return -1;
+		}
+	}
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base);
+	ret = mkdir(jit_path, 0755);
+	if (ret == -1) {
+		if (errno != EEXIST) {
+			warn("cannot create jit cache dir %s", jit_path);
+			return -1;
+		}
+	}
+
+	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+
+	p = mkdtemp(jit_path);
+	if (p != jit_path) {
+		warn("cannot create jit cache dir %s", jit_path);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+perf_open_marker_file(int fd)
+{
+	long pgsz;
+
+	pgsz = sysconf(_SC_PAGESIZE);
+	if (pgsz == -1)
+		return -1;
+
+	/*
+	 * we mmap the jitdump to create an MMAP RECORD in perf.data file.
+	 * The mmap is captured either live (perf record running when we mmap)
+	 * or  in deferred mode, via /proc/PID/maps
+	 * the MMAP record is used as a marker of a jitdump file for more meta
+	 * data info about the jitted code. Perf report/annotate detect this
+	 * special filename and process the jitdump file.
+	 *
+	 * mapping must be PROT_EXEC to ensure it is captured by perf record
+	 * even when not using -d option
+	 */
+	marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0);
+	return (marker_addr == MAP_FAILED) ? -1 : 0;
+}
+
+static void
+perf_close_marker_file(void)
+{
+	long pgsz;
+
+	if (!marker_addr)
+		return;
+
+	pgsz = sysconf(_SC_PAGESIZE);
+	if (pgsz == -1)
+		return;
+
+	munmap(marker_addr, pgsz);
+}
+
+void *jvmti_open(void)
+{
+	int pad_cnt;
+	char dump_path[PATH_MAX];
+	struct jitheader header;
+	int fd;
+	FILE *fp;
+
+	/*
+	 * check if clockid is supported
+	 */
+	if (!perf_get_timestamp())
+		warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+
+	memset(&header, 0, sizeof(header));
+
+	debug_cache_init();
+
+	/*
+	 * jitdump file name
+	 */
+	snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+
+	fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
+	if (fd == -1)
+		return NULL;
+
+	/*
+	 * create perf.data maker for the jitdump file
+	 */
+	if (perf_open_marker_file(fd)) {
+		warnx("jvmti: failed to create marker file");
+		return NULL;
+	}
+
+	fp = fdopen(fd, "w+");
+	if (!fp) {
+		warn("jvmti: cannot create %s", dump_path);
+		close(fd);
+		goto error;
+	}
+
+	warnx("jvmti: jitdump in %s", dump_path);
+
+	if (get_e_machine(&header)) {
+		warn("get_e_machine failed\n");
+		goto error;
+	}
+
+	header.magic      = JITHEADER_MAGIC;
+	header.version    = JITHEADER_VERSION;
+	header.total_size = sizeof(header);
+	header.pid        = getpid();
+
+	/* calculate amount of padding '\0' */
+	pad_cnt = PADDING_8ALIGNED(header.total_size);
+	header.total_size += pad_cnt;
+
+	header.timestamp = perf_get_timestamp();
+
+	if (!fwrite(&header, sizeof(header), 1, fp)) {
+		warn("jvmti: cannot write dumpfile header");
+		goto error;
+	}
+
+	/* write padding '\0' if necessary */
+	if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) {
+		warn("jvmti: cannot write dumpfile header padding");
+		goto error;
+	}
+
+	return fp;
+error:
+	fclose(fp);
+	return NULL;
+}
+
+int
+jvmti_close(void *agent)
+{
+	struct jr_code_close rec;
+	FILE *fp = agent;
+
+	if (!fp) {
+		warnx("jvmti: incalid fd in close_agent");
+		return -1;
+	}
+
+	rec.p.id = JIT_CODE_CLOSE;
+	rec.p.total_size = sizeof(rec);
+
+	rec.p.timestamp = perf_get_timestamp();
+
+	if (!fwrite(&rec, sizeof(rec), 1, fp))
+		return -1;
+
+	fclose(fp);
+
+	fp = NULL;
+
+	perf_close_marker_file();
+
+	return 0;
+}
+
+int
+jvmti_write_code(void *agent, char const *sym,
+	uint64_t vma, void const *code, unsigned int const size)
+{
+	static int code_generation = 1;
+	struct jr_code_load rec;
+	size_t sym_len;
+	size_t padding_count;
+	FILE *fp = agent;
+	int ret = -1;
+
+	/* don't care about 0 length function, no samples */
+	if (size == 0)
+		return 0;
+
+	if (!fp) {
+		warnx("jvmti: invalid fd in write_native_code");
+		return -1;
+	}
+
+	sym_len = strlen(sym) + 1;
+
+	rec.p.id           = JIT_CODE_LOAD;
+	rec.p.total_size   = sizeof(rec) + sym_len;
+	padding_count      = PADDING_8ALIGNED(rec.p.total_size);
+	rec.p. total_size += padding_count;
+	rec.p.timestamp    = perf_get_timestamp();
+
+	rec.code_size  = size;
+	rec.vma        = vma;
+	rec.code_addr  = vma;
+	rec.pid	       = getpid();
+	rec.tid	       = gettid();
+
+	if (code)
+		rec.p.total_size += size;
+
+	/*
+	 * If JVM is multi-threaded, nultiple concurrent calls to agent
+	 * may be possible, so protect file writes
+	 */
+	flockfile(fp);
+
+	/*
+	 * get code index inside lock to avoid race condition
+	 */
+	rec.code_index = code_generation++;
+
+	ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+	fwrite_unlocked(sym, sym_len, 1, fp);
+
+	if (padding_count)
+		fwrite_unlocked(pad_bytes, padding_count, 1, fp);
+
+	if (code)
+		fwrite_unlocked(code, size, 1, fp);
+
+	funlockfile(fp);
+
+	ret = 0;
+
+	return ret;
+}
+
+int
+jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
+		       jvmti_line_info_t *li, int nr_lines)
+{
+	struct jr_code_debug_info rec;
+	size_t sret, len, size, flen;
+	size_t padding_count;
+	uint64_t addr;
+	const char *fn = file;
+	FILE *fp = agent;
+	int i;
+
+	/*
+	 * no entry to write
+	 */
+	if (!nr_lines)
+		return 0;
+
+	if (!fp) {
+		warnx("jvmti: invalid fd in write_debug_info");
+		return -1;
+	}
+
+	flen = strlen(file) + 1;
+
+	rec.p.id        = JIT_CODE_DEBUG_INFO;
+	size            = sizeof(rec);
+	rec.p.timestamp = perf_get_timestamp();
+	rec.code_addr   = (uint64_t)(uintptr_t)code;
+	rec.nr_entry    = nr_lines;
+
+	/*
+	 * on disk source line info layout:
+	 * uint64_t : addr
+	 * int      : line number
+	 * int      : column discriminator
+	 * file[]   : source file name
+	 * padding  : pad to multiple of 8 bytes
+	 */
+	size += nr_lines * sizeof(struct debug_entry);
+	size += flen * nr_lines;
+	/*
+	 * pad to 8 bytes
+	 */
+	padding_count = PADDING_8ALIGNED(size);
+
+	rec.p.total_size = size + padding_count;
+
+	/*
+	 * If JVM is multi-threaded, nultiple concurrent calls to agent
+	 * may be possible, so protect file writes
+	 */
+	flockfile(fp);
+
+	sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+	if (sret != 1)
+		goto error;
+
+	for (i = 0; i < nr_lines; i++) {
+
+		addr = (uint64_t)li[i].pc;
+		len  = sizeof(addr);
+		sret = fwrite_unlocked(&addr, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		len  = sizeof(li[0].line_number);
+		sret = fwrite_unlocked(&li[i].line_number, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		len  = sizeof(li[0].discrim);
+		sret = fwrite_unlocked(&li[i].discrim, len, 1, fp);
+		if (sret != 1)
+			goto error;
+
+		sret = fwrite_unlocked(fn, flen, 1, fp);
+		if (sret != 1)
+			goto error;
+	}
+	if (padding_count)
+		sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp);
+		if (sret != 1)
+			goto error;
+
+	funlockfile(fp);
+	return 0;
+error:
+	funlockfile(fp);
+	return -1;
+}

diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
new file mode 100644
index 0000000..bedf5d0
--- /dev/null
+++ b/tools/perf/jvmti/jvmti_agent.h

@@ -0,0 +1,36 @@
+#ifndef __JVMTI_AGENT_H__
+#define __JVMTI_AGENT_H__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <jvmti.h>
+
+#define __unused __attribute__((unused))
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef struct {
+	unsigned long	pc;
+	int		line_number;
+	int		discrim; /* discriminator -- 0 for now */
+} jvmti_line_info_t;
+
+void *jvmti_open(void);
+int   jvmti_close(void *agent);
+int   jvmti_write_code(void *agent, char const *symbol_name,
+		       uint64_t vma, void const *code,
+		       const unsigned int code_size);
+
+int   jvmti_write_debug_info(void *agent,
+		             uint64_t code,
+			     const char *file,
+			     jvmti_line_info_t *li,
+			     int nr_lines);
+
+#if defined(__cplusplus)
+}
+
+#endif
+#endif /* __JVMTI_H__ */

diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
new file mode 100644
index 0000000..ac12e4b
--- /dev/null
+++ b/tools/perf/jvmti/libjvmti.c

@@ -0,0 +1,304 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <err.h>
+#include <jvmti.h>
+#include <jvmticmlr.h>
+#include <limits.h>
+
+#include "jvmti_agent.h"
+
+static int has_line_numbers;
+void *jvmti_agent;
+
+static jvmtiError
+do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+		    jvmti_line_info_t *tab, jint *nr)
+{
+	jint i, lines = 0;
+	jint nr_lines = 0;
+	jvmtiLineNumberEntry *loc_tab = NULL;
+	jvmtiError ret;
+
+	ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
+	if (ret != JVMTI_ERROR_NONE)
+		return ret;
+
+	for (i = 0; i < nr_lines; i++) {
+		if (loc_tab[i].start_location < bci) {
+			tab[lines].pc = (unsigned long)pc;
+			tab[lines].line_number = loc_tab[i].line_number;
+			tab[lines].discrim = 0; /* not yet used */
+			lines++;
+		} else {
+			break;
+		}
+	}
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
+	*nr = lines;
+	return JVMTI_ERROR_NONE;
+}
+
+static jvmtiError
+get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines)
+{
+	const jvmtiCompiledMethodLoadRecordHeader *hdr;
+	jvmtiCompiledMethodLoadInlineRecord *rec;
+	jvmtiLineNumberEntry *lne = NULL;
+	PCStackInfo *c;
+	jint nr, ret;
+	int nr_total = 0;
+	int i, lines_total = 0;
+
+	if (!(tab && nr_lines))
+		return JVMTI_ERROR_NULL_POINTER;
+
+	/*
+	 * Phase 1 -- get the number of lines necessary
+	 */
+	for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+		if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+			rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+			for (i = 0; i < rec->numpcs; i++) {
+				c = rec->pcinfo + i;
+				nr = 0;
+				/*
+				 * unfortunately, need a tab to get the number of lines!
+				 */
+				ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
+				if (ret == JVMTI_ERROR_NONE) {
+					/* free what was allocated for nothing */
+					(*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
+					nr_total += (int)nr;
+				}
+			}
+		}
+	}
+
+	if (nr_total == 0)
+		return JVMTI_ERROR_NOT_FOUND;
+
+	/*
+	 * Phase 2 -- allocate big enough line table
+	 */
+	*tab = malloc(nr_total * sizeof(**tab));
+	if (!*tab)
+		return JVMTI_ERROR_OUT_OF_MEMORY;
+
+	for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+		if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+			rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+			for (i = 0; i < rec->numpcs; i++) {
+				c = rec->pcinfo + i;
+				nr = 0;
+				ret = do_get_line_numbers(jvmti, c->pc,
+							  c->methods[0],
+							  c->bcis[0],
+							  *tab + lines_total,
+							  &nr);
+				if (ret == JVMTI_ERROR_NONE)
+					lines_total += nr;
+			}
+		}
+	}
+	*nr_lines = lines_total;
+	return JVMTI_ERROR_NONE;
+}
+
+static void JNICALL
+compiled_method_load_cb(jvmtiEnv *jvmti,
+			jmethodID method,
+			jint code_size,
+			void const *code_addr,
+			jint map_length,
+			jvmtiAddrLocationMap const *map,
+			const void *compile_info)
+{
+	jvmti_line_info_t *line_tab = NULL;
+	jclass decl_class;
+	char *class_sign = NULL;
+	char *func_name = NULL;
+	char *func_sign = NULL;
+	char *file_name= NULL;
+	char fn[PATH_MAX];
+	uint64_t addr = (uint64_t)(uintptr_t)code_addr;
+	jvmtiError ret;
+	int nr_lines = 0; /* in line_tab[] */
+	size_t len;
+
+	ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
+						&decl_class);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: cannot get declaring class");
+		return;
+	}
+
+	if (has_line_numbers && map && map_length) {
+		ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
+		if (ret != JVMTI_ERROR_NONE) {
+			warnx("jvmti: cannot get line table for method");
+			nr_lines = 0;
+		}
+	}
+
+	ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: cannot get source filename ret=%d", ret);
+		goto error;
+	}
+
+	ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
+					  &class_sign, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: getclassignature failed");
+		goto error;
+	}
+
+	ret = (*jvmti)->GetMethodName(jvmti, method, &func_name,
+				      &func_sign, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: failed getmethodname");
+		goto error;
+	}
+
+	/*
+	 * Assume path name is class hierarchy, this is a common practice with Java programs
+	 */
+	if (*class_sign == 'L') {
+		int j, i = 0;
+		char *p = strrchr(class_sign, '/');
+		if (p) {
+			/* drop the 'L' prefix and copy up to the final '/' */
+			for (i = 0; i < (p - class_sign); i++)
+				fn[i] = class_sign[i+1];
+		}
+		/*
+		 * append file name, we use loops and not string ops to avoid modifying
+		 * class_sign which is used later for the symbol name
+		 */
+		for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
+			fn[i] = file_name[j];
+		fn[i] = '\0';
+	} else {
+		/* fallback case */
+		strcpy(fn, file_name);
+	}
+	/*
+	 * write source line info record if we have it
+	 */
+	if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines))
+		warnx("jvmti: write_debug_info() failed");
+
+	len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
+	{
+		char str[len];
+		snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign);
+
+		if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size))
+			warnx("jvmti: write_code() failed");
+	}
+error:
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+	(*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+	free(line_tab);
+}
+
+static void JNICALL
+code_generated_cb(jvmtiEnv *jvmti,
+		  char const *name,
+		  void const *code_addr,
+		  jint code_size)
+{
+	uint64_t addr = (uint64_t)(unsigned long)code_addr;
+	int ret;
+
+	ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size);
+	if (ret)
+		warnx("jvmti: write_code() failed for code_generated");
+}
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
+{
+	jvmtiEventCallbacks cb;
+	jvmtiCapabilities caps1;
+	jvmtiJlocationFormat format;
+	jvmtiEnv *jvmti = NULL;
+	jint ret;
+
+	jvmti_agent = jvmti_open();
+	if (!jvmti_agent) {
+		warnx("jvmti: open_agent failed");
+		return -1;
+	}
+
+	/*
+	 * Request a JVMTI interface version 1 environment
+	 */
+	ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1);
+	if (ret != JNI_OK) {
+		warnx("jvmti: jvmti version 1 not supported");
+		return -1;
+	}
+
+	/*
+	 * acquire method_load capability, we require it
+	 * request line numbers (optional)
+	 */
+	memset(&caps1, 0, sizeof(caps1));
+	caps1.can_generate_compiled_method_load_events = 1;
+
+	ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: acquire compiled_method capability failed");
+		return -1;
+	}
+	ret = (*jvmti)->GetJLocationFormat(jvmti, &format);
+        if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) {
+                memset(&caps1, 0, sizeof(caps1));
+                caps1.can_get_line_numbers = 1;
+                caps1.can_get_source_file_name = 1;
+		ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+                if (ret == JVMTI_ERROR_NONE)
+                        has_line_numbers = 1;
+        }
+
+	memset(&cb, 0, sizeof(cb));
+
+	cb.CompiledMethodLoad   = compiled_method_load_cb;
+	cb.DynamicCodeGenerated = code_generated_cb;
+
+	ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb));
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: cannot set event callbacks");
+		return -1;
+	}
+
+	ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+			JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: setnotification failed for method_load");
+		return -1;
+	}
+
+	ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+			JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL);
+	if (ret != JVMTI_ERROR_NONE) {
+		warnx("jvmti: setnotification failed on code_generated");
+		return -1;
+	}
+	return 0;
+}
+
+JNIEXPORT void JNICALL
+Agent_OnUnload(JavaVM *jvm __unused)
+{
+	int ret;
+
+	ret = jvmti_close(jvmti_agent);
+	if (ret)
+		errx(1, "Error: op_close_agent()");
+}

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index a929618..aaee0a7 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c

@@ -454,11 +454,12 @@
 
 static void execv_dashed_external(const char **argv)
 {
-	struct strbuf cmd = STRBUF_INIT;
+	char *cmd;
 	const char *tmp;
 	int status;
 
-	strbuf_addf(&cmd, "perf-%s", argv[0]);
+	if (asprintf(&cmd, "perf-%s", argv[0]) < 0)
+		goto do_die;
 
 	/*
 	 * argv[0] must be the perf command, but the argv array
@@ -467,7 +468,7 @@
 	 * restore it on error.
 	 */
 	tmp = argv[0];
-	argv[0] = cmd.buf;
+	argv[0] = cmd;
 
 	/*
 	 * if we fail because the command is not found, it is
@@ -475,15 +476,16 @@
 	 */
 	status = run_command_v_opt(argv, 0);
 	if (status != -ERR_RUN_COMMAND_EXEC) {
-		if (IS_RUN_COMMAND_ERR(status))
+		if (IS_RUN_COMMAND_ERR(status)) {
+do_die:
 			die("unable to run '%s'", argv[0]);
+		}
 		exit(-status);
 	}
 	errno = ENOENT; /* as if we called execvp */
 
 	argv[0] = tmp;
-
-	strbuf_release(&cmd);
+	zfree(&cmd);
 }
 
 static int run_argv(int *argcp, const char ***argv)
@@ -546,6 +548,8 @@
 
 	srandom(time(NULL));
 
+	perf_config(perf_default_config, NULL);
+
 	/* get debugfs/tracefs mount point from /proc/mounts */
 	tracing_path_mount();
 
@@ -613,6 +617,8 @@
 	 */
 	pthread__block_sigwinch();
 
+	perf_debug_setup();
+
 	while (1) {
 		static int done_help;
 		int was_alias = run_argv(&argc, &argv);

diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 90129ac..5381a01 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h

@@ -58,6 +58,8 @@
 	bool	     full_auxtrace;
 	bool	     auxtrace_snapshot_mode;
 	bool	     record_switch_events;
+	bool	     all_kernel;
+	bool	     all_user;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;

diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 15c8400..1d95009 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py

@@ -71,7 +71,10 @@
 except:
 	if not audit_package_warned:
 		audit_package_warned = True
-		print "Install the audit-libs-python package to get syscall names"
+		print "Install the audit-libs-python package to get syscall names.\n" \
+                    "For example:\n  # apt-get install python-audit (Ubuntu)" \
+                    "\n  # yum install audit-libs-python (Fedora)" \
+                    "\n  etc.\n"
 
 def syscall_name(id):
 	try:

diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
index bf016c4..8cc30e7 100644
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore

@@ -1,3 +1,4 @@
 llvm-src-base.c
 llvm-src-kbuild.c
 llvm-src-prologue.c
+llvm-src-relocation.c

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 614899b..1ba628e 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build

@@ -31,7 +31,7 @@
 perf-y += parse-no-sample-id-all.o
 perf-y += kmod-path.o
 perf-y += thread-map.o
-perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
 perf-y += bpf.o
 perf-y += topology.o
 perf-y += cpumap.o
@@ -59,6 +59,13 @@
 	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
 	$(Q)echo ';' >> $@
 
+$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build
+	$(call rule_mkdir)
+	$(Q)echo '#include <tests/llvm.h>' > $@
+	$(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@
+	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+	$(Q)echo ';' >> $@
+
 ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif

diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index fb80c9e..e7664fe 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c

@@ -29,14 +29,59 @@
 
 static int fd1;
 static int fd2;
+static int fd3;
 static int overflows;
+static int overflows_2;
+
+volatile long the_var;
+
+
+/*
+ * Use ASM to ensure watchpoint and breakpoint can be triggered
+ * at one instruction.
+ */
+#if defined (__x86_64__)
+extern void __test_function(volatile long *ptr);
+asm (
+	".globl __test_function\n"
+	"__test_function:\n"
+	"incq (%rdi)\n"
+	"ret\n");
+#elif defined (__aarch64__)
+extern void __test_function(volatile long *ptr);
+asm (
+	".globl __test_function\n"
+	"__test_function:\n"
+	"str x30, [x0]\n"
+	"ret\n");
+
+#else
+static void __test_function(volatile long *ptr)
+{
+	*ptr = 0x1234;
+}
+#endif
 
 __attribute__ ((noinline))
 static int test_function(void)
 {
+	__test_function(&the_var);
+	the_var++;
 	return time(NULL);
 }
 
+static void sig_handler_2(int signum __maybe_unused,
+			  siginfo_t *oh __maybe_unused,
+			  void *uc __maybe_unused)
+{
+	overflows_2++;
+	if (overflows_2 > 10) {
+		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+	}
+}
+
 static void sig_handler(int signum __maybe_unused,
 			siginfo_t *oh __maybe_unused,
 			void *uc __maybe_unused)
@@ -54,10 +99,11 @@
 		 */
 		ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
 		ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+		ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
 	}
 }
 
-static int bp_event(void *fn, int setup_signal)
+static int __event(bool is_x, void *addr, int sig)
 {
 	struct perf_event_attr pe;
 	int fd;
@@ -67,8 +113,8 @@
 	pe.size = sizeof(struct perf_event_attr);
 
 	pe.config = 0;
-	pe.bp_type = HW_BREAKPOINT_X;
-	pe.bp_addr = (unsigned long) fn;
+	pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+	pe.bp_addr = (unsigned long) addr;
 	pe.bp_len = sizeof(long);
 
 	pe.sample_period = 1;
@@ -86,17 +132,25 @@
 		return TEST_FAIL;
 	}
 
-	if (setup_signal) {
-		fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
-		fcntl(fd, F_SETSIG, SIGIO);
-		fcntl(fd, F_SETOWN, getpid());
-	}
+	fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
+	fcntl(fd, F_SETSIG, sig);
+	fcntl(fd, F_SETOWN, getpid());
 
 	ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 
 	return fd;
 }
 
+static int bp_event(void *addr, int sig)
+{
+	return __event(true, addr, sig);
+}
+
+static int wp_event(void *addr, int sig)
+{
+	return __event(false, addr, sig);
+}
+
 static long long bp_count(int fd)
 {
 	long long count;
@@ -114,7 +168,7 @@
 int test__bp_signal(int subtest __maybe_unused)
 {
 	struct sigaction sa;
-	long long count1, count2;
+	long long count1, count2, count3;
 
 	/* setup SIGIO signal handler */
 	memset(&sa, 0, sizeof(struct sigaction));
@@ -126,21 +180,52 @@
 		return TEST_FAIL;
 	}
 
+	sa.sa_sigaction = (void *) sig_handler_2;
+	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
+		pr_debug("failed setting up signal handler 2\n");
+		return TEST_FAIL;
+	}
+
 	/*
 	 * We create following events:
 	 *
-	 * fd1 - breakpoint event on test_function with SIGIO
+	 * fd1 - breakpoint event on __test_function with SIGIO
 	 *       signal configured. We should get signal
 	 *       notification each time the breakpoint is hit
 	 *
-	 * fd2 - breakpoint event on sig_handler without SIGIO
+	 * fd2 - breakpoint event on sig_handler with SIGUSR1
+	 *       configured. We should get SIGUSR1 each time when
+	 *       breakpoint is hit
+	 *
+	 * fd3 - watchpoint event on __test_function with SIGIO
 	 *       configured.
 	 *
 	 * Following processing should happen:
-	 *   - execute test_function
-	 *   - fd1 event breakpoint hit -> count1 == 1
-	 *   - SIGIO is delivered       -> overflows == 1
-	 *   - fd2 event breakpoint hit -> count2 == 1
+	 *   Exec:               Action:                       Result:
+	 *   incq (%rdi)       - fd1 event breakpoint hit   -> count1 == 1
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 1
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 1  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows = 1
+	 *   sys_rt_sigreturn  - return from sig_handler
+	 *   incq (%rdi)       - fd3 event watchpoint hit   -> count3 == 1       (wp and bp in one insn)
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 2
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 2  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows = 2
+	 *   sys_rt_sigreturn  - return from sig_handler
+	 *   the_var++         - fd3 event watchpoint hit   -> count3 == 2       (standalone watchpoint)
+	 *                     - SIGIO is delivered
+	 *   sig_handler       - fd2 event breakpoint hit   -> count2 == 3
+	 *                     - SIGUSR1 is delivered
+	 *   sig_handler_2                                  -> overflows_2 == 3  (nested signal)
+	 *   sys_rt_sigreturn  - return from sig_handler_2
+	 *   overflows++                                    -> overflows == 3
+	 *   sys_rt_sigreturn  - return from sig_handler
 	 *
 	 * The test case check following error conditions:
 	 * - we get stuck in signal handler because of debug
@@ -152,11 +237,13 @@
 	 *
 	 */
 
-	fd1 = bp_event(test_function, 1);
-	fd2 = bp_event(sig_handler, 0);
+	fd1 = bp_event(__test_function, SIGIO);
+	fd2 = bp_event(sig_handler, SIGUSR1);
+	fd3 = wp_event((void *)&the_var, SIGIO);
 
 	ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
 	ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
+	ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
 
 	/*
 	 * Kick off the test by trigering 'fd1'
@@ -166,15 +253,18 @@
 
 	ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
 	ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+	ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
 
 	count1 = bp_count(fd1);
 	count2 = bp_count(fd2);
+	count3 = bp_count(fd3);
 
 	close(fd1);
 	close(fd2);
+	close(fd3);
 
-	pr_debug("count1 %lld, count2 %lld, overflow %d\n",
-		 count1, count2, overflows);
+	pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n",
+		 count1, count2, count3, overflows, overflows_2);
 
 	if (count1 != 1) {
 		if (count1 == 11)
@@ -183,12 +273,18 @@
 			pr_debug("failed: wrong count for bp1%lld\n", count1);
 	}
 
-	if (overflows != 1)
+	if (overflows != 3)
 		pr_debug("failed: wrong overflow hit\n");
 
-	if (count2 != 1)
+	if (overflows_2 != 3)
+		pr_debug("failed: wrong overflow_2 hit\n");
+
+	if (count2 != 3)
 		pr_debug("failed: wrong count for bp2\n");
 
-	return count1 == 1 && overflows == 1 && count2 == 1 ?
+	if (count3 != 2)
+		pr_debug("failed: wrong count for bp3\n");
+
+	return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
 		TEST_OK : TEST_FAIL;
 }

diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c
new file mode 100644
index 0000000..93af774
--- /dev/null
+++ b/tools/perf/tests/bpf-script-test-relocation.c

@@ -0,0 +1,50 @@
+/*
+ * bpf-script-test-relocation.c
+ * Test BPF loader checking relocation
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+	(void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
+	(void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") my_table = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+int this_is_a_global_val;
+
+SEC("func=sys_write")
+int bpf_func__sys_write(void *ctx)
+{
+	int key = 0;
+	int value = 0;
+
+	/*
+	 * Incorrect relocation. Should not allow this program be
+	 * loaded into kernel.
+	 */
+	bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0);
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;

diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 33689a0..199501c 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c

@@ -1,7 +1,11 @@
 #include <stdio.h>
 #include <sys/epoll.h>
+#include <util/util.h>
 #include <util/bpf-loader.h>
 #include <util/evlist.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <bpf/bpf.h>
 #include "tests.h"
 #include "llvm.h"
 #include "debug.h"
@@ -71,6 +75,15 @@
 		(NR_ITERS + 1) / 4,
 	},
 #endif
+	{
+		LLVM_TESTCASE_BPF_RELOCATION,
+		"Test BPF relocation checker",
+		"[bpf_relocation_test]",
+		"fix 'perf test LLVM' first",
+		"libbpf error when dealing with relocation",
+		NULL,
+		0,
+	},
 };
 
 static int do_test(struct bpf_object *obj, int (*func)(void),
@@ -99,7 +112,7 @@
 	parse_evlist.error = &parse_error;
 	INIT_LIST_HEAD(&parse_evlist.list);
 
-	err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
+	err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj, NULL);
 	if (err || list_empty(&parse_evlist.list)) {
 		pr_debug("Failed to add events selected by BPF\n");
 		return TEST_FAIL;
@@ -190,7 +203,7 @@
 
 	ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
 				       bpf_testcase_table[idx].prog_id,
-				       true);
+				       true, NULL);
 	if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
 		pr_debug("Unable to get BPF object, %s\n",
 			 bpf_testcase_table[idx].msg_compile_fail);
@@ -202,14 +215,21 @@
 
 	obj = prepare_bpf(obj_buf, obj_buf_sz,
 			  bpf_testcase_table[idx].name);
-	if (!obj) {
+	if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) {
+		if (!obj)
+			pr_debug("Fail to load BPF object: %s\n",
+				 bpf_testcase_table[idx].msg_load_fail);
+		else
+			pr_debug("Success unexpectedly: %s\n",
+				 bpf_testcase_table[idx].msg_load_fail);
 		ret = TEST_FAIL;
 		goto out;
 	}
 
-	ret = do_test(obj,
-		      bpf_testcase_table[idx].target_func,
-		      bpf_testcase_table[idx].expect_result);
+	if (obj)
+		ret = do_test(obj,
+			      bpf_testcase_table[idx].target_func,
+			      bpf_testcase_table[idx].expect_result);
 out:
 	bpf__clear();
 	return ret;
@@ -227,6 +247,36 @@
 	return bpf_testcase_table[i].desc;
 }
 
+static int check_env(void)
+{
+	int err;
+	unsigned int kver_int;
+	char license[] = "GPL";
+
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	err = fetch_kernel_version(&kver_int, NULL, 0);
+	if (err) {
+		pr_debug("Unable to get kernel version\n");
+		return err;
+	}
+
+	err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
+			       sizeof(insns) / sizeof(insns[0]),
+			       license, kver_int, NULL, 0);
+	if (err < 0) {
+		pr_err("Missing basic BPF support, skip this test: %s\n",
+		       strerror(errno));
+		return err;
+	}
+	close(err);
+
+	return 0;
+}
+
 int test__bpf(int i)
 {
 	int err;
@@ -239,6 +289,9 @@
 		return TEST_SKIP;
 	}
 
+	if (check_env())
+		return TEST_SKIP;
+
 	err = __test__bpf(i);
 	return err;
 }

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 313a48c..afc9ad0 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c

@@ -439,7 +439,7 @@
 		.mmap_pages	     = UINT_MAX,
 		.user_freq	     = UINT_MAX,
 		.user_interval	     = ULLONG_MAX,
-		.freq		     = 4000,
+		.freq		     = 500,
 		.target		     = {
 			.uses_mmap   = true,
 		},
@@ -559,7 +559,13 @@
 				evlist = NULL;
 				continue;
 			}
-			pr_debug("perf_evlist__open failed\n");
+
+			if (verbose) {
+				char errbuf[512];
+				perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+				pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
+			}
+
 			goto out_put;
 		}
 		break;

diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 5e6a86e..ecf136c 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c

@@ -191,7 +191,7 @@
 	 * function since TEST_ASSERT_VAL() returns in case of failure.
 	 */
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(hists_to_evsel(hists), NULL);
 
 	if (verbose > 2) {
 		pr_info("use callchain: %d, cumulate callchain: %d\n",

diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 351a424..34b945a 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c

@@ -145,7 +145,7 @@
 		struct hists *hists = evsel__hists(evsel);
 
 		hists__collapse_resort(hists, NULL);
-		hists__output_resort(hists, NULL);
+		perf_evsel__output_resort(evsel, NULL);
 
 		if (verbose > 2) {
 			pr_info("Normal histogram\n");

diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index b2312651..23cce67 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c

@@ -156,7 +156,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -256,7 +256,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -310,7 +310,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -388,7 +388,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -491,7 +491,7 @@
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	hists__output_resort(hists, NULL);
+	perf_evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);

diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index 06f45c1..cff564f 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c

@@ -6,12 +6,6 @@
 #include "tests.h"
 #include "debug.h"
 
-static int perf_config_cb(const char *var, const char *val,
-			  void *arg __maybe_unused)
-{
-	return perf_default_config(var, val, arg);
-}
-
 #ifdef HAVE_LIBBPF_SUPPORT
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 {
@@ -35,6 +29,7 @@
 static struct {
 	const char *source;
 	const char *desc;
+	bool should_load_fail;
 } bpf_source_table[__LLVM_TESTCASE_MAX] = {
 	[LLVM_TESTCASE_BASE] = {
 		.source = test_llvm__bpf_base_prog,
@@ -48,14 +43,19 @@
 		.source = test_llvm__bpf_test_prologue_prog,
 		.desc = "Compile source for BPF prologue generation test",
 	},
+	[LLVM_TESTCASE_BPF_RELOCATION] = {
+		.source = test_llvm__bpf_test_relocation,
+		.desc = "Compile source for BPF relocation test",
+		.should_load_fail = true,
+	},
 };
 
-
 int
 test_llvm__fetch_bpf_obj(void **p_obj_buf,
 			 size_t *p_obj_buf_sz,
 			 enum test_llvm__testcase idx,
-			 bool force)
+			 bool force,
+			 bool *should_load_fail)
 {
 	const char *source;
 	const char *desc;
@@ -68,8 +68,8 @@
 
 	source = bpf_source_table[idx].source;
 	desc = bpf_source_table[idx].desc;
-
-	perf_config(perf_config_cb, NULL);
+	if (should_load_fail)
+		*should_load_fail = bpf_source_table[idx].should_load_fail;
 
 	/*
 	 * Skip this test if user's .perfconfig doesn't set [llvm] section
@@ -136,14 +136,15 @@
 	int ret;
 	void *obj_buf = NULL;
 	size_t obj_buf_sz = 0;
+	bool should_load_fail = false;
 
 	if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
 		return TEST_FAIL;
 
 	ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
-				       subtest, false);
+				       subtest, false, &should_load_fail);
 
-	if (ret == TEST_OK) {
+	if (ret == TEST_OK && !should_load_fail) {
 		ret = test__bpf_parsing(obj_buf, obj_buf_sz);
 		if (ret != TEST_OK) {
 			pr_debug("Failed to parse test case '%s'\n",

diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h
index 5150b4d..0eaa604 100644
--- a/tools/perf/tests/llvm.h
+++ b/tools/perf/tests/llvm.h

@@ -7,14 +7,17 @@
 extern const char test_llvm__bpf_base_prog[];
 extern const char test_llvm__bpf_test_kbuild_prog[];
 extern const char test_llvm__bpf_test_prologue_prog[];
+extern const char test_llvm__bpf_test_relocation[];
 
 enum test_llvm__testcase {
 	LLVM_TESTCASE_BASE,
 	LLVM_TESTCASE_KBUILD,
 	LLVM_TESTCASE_BPF_PROLOGUE,
+	LLVM_TESTCASE_BPF_RELOCATION,
 	__LLVM_TESTCASE_MAX,
 };
 
 int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz,
-			     enum test_llvm__testcase index, bool force);
+			     enum test_llvm__testcase index, bool force,
+			     bool *should_load_fail);
 #endif

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index f918015..cac15d9 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make

@@ -15,6 +15,7 @@
 PERF := .
 PERF_O := $(PERF)
 O_OPT :=
+FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O))
 
 ifneq ($(O),)
   FULL_O := $(shell readlink -f $(O) || echo $(O))
@@ -79,6 +80,7 @@
 make_no_libbionic   := NO_LIBBIONIC=1
 make_no_auxtrace    := NO_AUXTRACE=1
 make_no_libbpf	    := NO_LIBBPF=1
+make_no_libcrypto   := NO_LIBCRYPTO=1
 make_tags           := tags
 make_cscope         := cscope
 make_help           := help
@@ -102,6 +104,7 @@
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
+make_minimal        += NO_LIBCRYPTO=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -110,6 +113,9 @@
 # disable features detection
 ifeq ($(MK),Makefile)
 run += make_clean_all
+MAKE_F := $(MAKE)
+else
+MAKE_F := $(MAKE) -f $(MK)
 endif
 run += make_python_perf_so
 run += make_debug
@@ -260,6 +266,8 @@
 run_O := $(shell shuf -e $(run_O))
 endif
 
+max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L)
+
 ifdef DEBUG
 d := $(info run   $(run))
 d := $(info run_O $(run_O))
@@ -267,13 +275,13 @@
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
+clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null)
 
 $(run):
 	$(call clean)
 	@TMP_DEST=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
-	echo "- $@: $$cmd" && echo $$cmd > $@ && \
+	cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \
+	printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1; \
 	echo "  test: $(call test,$@)" >> $@ 2>&1; \
 	$(call test,$@) && \
@@ -283,8 +291,8 @@
 	$(call clean)
 	@TMP_O=$$(mktemp -d); \
 	TMP_DEST=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
-	echo "- $@: $$cmd" && echo $$cmd > $@ && \
+	cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \
+	printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1 && \
 	echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
 	$(call test_O,$@) && \
@@ -313,11 +321,43 @@
 	(make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
 	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
+FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP
+FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC
+
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
 	@echo OK
+	@rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
 
 out: $(run_O)
 	@echo OK
+	@rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
+
+ifeq ($(REUSE_FEATURES_DUMP),1)
+$(FEATURES_DUMP_FILE):
+	$(call clean)
+	@cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \
+	echo "- $@: $$cmd" && echo $$cmd && \
+	( eval $$cmd ) > /dev/null 2>&1
+
+$(FEATURES_DUMP_FILE_STATIC):
+	$(call clean)
+	@cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \
+	echo "- $@: $$cmd" && echo $$cmd && \
+	( eval $$cmd ) > /dev/null 2>&1
+
+# Add feature dump dependency for run/run_O targets
+$(foreach t,$(run) $(run_O),$(eval \
+	$(t): $(if $(findstring make_static,$(t)),\
+		$(FEATURES_DUMP_FILE_STATIC),\
+		$(FEATURES_DUMP_FILE))))
+
+# Append 'FEATURES_DUMP=' option to all test cases. For example:
+# make_no_libbpf: NO_LIBBPF=1  --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP
+# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC
+$(foreach t,$(run),$(if $(findstring make_static,$(t)),\
+			$(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\
+			$(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE))))
+endif
 
 .PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools
 endif # ifndef MK

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index abe8849..7865f68 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c

@@ -1271,6 +1271,38 @@
 	return 0;
 }
 
+static int test__checkevent_config_symbol(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_raw(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_num(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0);
+	return 0;
+}
+
+static int test__checkevent_config_cache(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0);
+	return 0;
+}
+
 static int count_tracepoints(void)
 {
 	struct dirent *events_ent;
@@ -1579,6 +1611,26 @@
 		.check = test__checkevent_precise_max_modifier,
 		.id    = 47,
 	},
+	{
+		.name  = "instructions/name=insn/",
+		.check = test__checkevent_config_symbol,
+		.id    = 48,
+	},
+	{
+		.name  = "r1234/name=rawpmu/",
+		.check = test__checkevent_config_raw,
+		.id    = 49,
+	},
+	{
+		.name  = "4:0x6530160/name=numpmu/",
+		.check = test__checkevent_config_num,
+		.id    = 50,
+	},
+	{
+		.name  = "L1-dcache-misses/name=cachepmu/",
+		.check = test__checkevent_config_cache,
+		.id    = 51,
+	},
 };
 
 static struct evlist_test test__events_pmu[] = {
@@ -1666,7 +1718,7 @@
 	}
 
 	ret = t->check(&terms);
-	parse_events__free_terms(&terms);
+	parse_events_terms__purge(&terms);
 
 	return ret;
 }

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index f0bfc9e..630b0b4 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c

@@ -110,7 +110,6 @@
 	 */
 	for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
 		struct symbol *pair, *first_pair;
-		bool backwards = true;
 
 		sym  = rb_entry(nd, struct symbol, rb_node);
 
@@ -151,27 +150,14 @@
 				continue;
 
 			} else {
-				struct rb_node *nnd;
-detour:
-				nnd = backwards ? rb_prev(&pair->rb_node) :
-						  rb_next(&pair->rb_node);
-				if (nnd) {
-					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
-
-					if (UM(next->start) == mem_start) {
-						pair = next;
+				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+				if (pair) {
+					if (UM(pair->start) == mem_start)
 						goto next_pair;
-					}
-				}
 
-				if (backwards) {
-					backwards = false;
-					pair = first_pair;
-					goto detour;
+					pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+						 mem_start, sym->name, pair->name);
 				}
-
-				pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
-					 mem_start, sym->name, pair->name);
 			}
 		} else
 			pr_debug("%#" PRIx64 ": %s not on kallsyms\n",

diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index d372021..af68a9d 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c

@@ -531,8 +531,8 @@
 		.bg	  = "yellow",
 	},
 	{
-		.colorset = HE_COLORSET_CODE,
-		.name	  = "code",
+		.colorset = HE_COLORSET_JUMP_ARROWS,
+		.name	  = "jump_arrows",
 		.fg	  = "blue",
 		.bg	  = "default",
 	},

diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 01781de..be3b70e 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h

@@ -7,7 +7,7 @@
 #define HE_COLORSET_MEDIUM	51
 #define HE_COLORSET_NORMAL	52
 #define HE_COLORSET_SELECTED	53
-#define HE_COLORSET_CODE	54
+#define HE_COLORSET_JUMP_ARROWS	54
 #define HE_COLORSET_ADDR	55
 #define HE_COLORSET_ROOT	56
 

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 718bd46..4fc208e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c

@@ -284,7 +284,7 @@
 		to = (u64)btarget->idx;
 	}
 
-	ui_browser__set_color(browser, HE_COLORSET_CODE);
+	ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
 	__ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
 				 from, to);
 }

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 08c09ad..4b98165 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c

@@ -32,6 +32,7 @@
 	bool		     show_headers;
 	float		     min_pcnt;
 	u64		     nr_non_filtered_entries;
+	u64		     nr_hierarchy_entries;
 	u64		     nr_callchain_rows;
 };
 
@@ -58,11 +59,11 @@
 
 	for (nd = rb_first(&hists->entries);
 	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-	     nd = rb_next(nd)) {
+	     nd = rb_hierarchy_next(nd)) {
 		struct hist_entry *he =
 			rb_entry(nd, struct hist_entry, rb_node);
 
-		if (he->unfolded)
+		if (he->leaf && he->unfolded)
 			unfolded_rows += he->nr_rows;
 	}
 	return unfolded_rows;
@@ -72,7 +73,9 @@
 {
 	u32 nr_entries;
 
-	if (hist_browser__has_filter(hb))
+	if (symbol_conf.report_hierarchy)
+		nr_entries = hb->nr_hierarchy_entries;
+	else if (hist_browser__has_filter(hb))
 		nr_entries = hb->nr_non_filtered_entries;
 	else
 		nr_entries = hb->hists->nr_entries;
@@ -247,6 +250,38 @@
 	return n;
 }
 
+static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he,
+				bool include_children)
+{
+	int count = 0;
+	struct rb_node *node;
+	struct hist_entry *child;
+
+	if (he->leaf)
+		return callchain__count_rows(&he->sorted_chain);
+
+	if (he->has_no_entry)
+		return 1;
+
+	node = rb_first(&he->hroot_out);
+	while (node) {
+		float percent;
+
+		child = rb_entry(node, struct hist_entry, rb_node);
+		percent = hist_entry__get_percent_limit(child);
+
+		if (!child->filtered && percent >= hb->min_pcnt) {
+			count++;
+
+			if (include_children && child->unfolded)
+				count += hierarchy_count_rows(hb, child, true);
+		}
+
+		node = rb_next(node);
+	}
+	return count;
+}
+
 static bool hist_entry__toggle_fold(struct hist_entry *he)
 {
 	if (!he)
@@ -326,11 +361,17 @@
 
 static void hist_entry__init_have_children(struct hist_entry *he)
 {
-	if (!he->init_have_children) {
+	if (he->init_have_children)
+		return;
+
+	if (he->leaf) {
 		he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
 		callchain__init_have_children(&he->sorted_chain);
-		he->init_have_children = true;
+	} else {
+		he->has_children = !RB_EMPTY_ROOT(&he->hroot_out);
 	}
+
+	he->init_have_children = true;
 }
 
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
@@ -349,17 +390,49 @@
 		has_children = callchain_list__toggle_fold(cl);
 
 	if (has_children) {
+		int child_rows = 0;
+
 		hist_entry__init_have_children(he);
 		browser->b.nr_entries -= he->nr_rows;
-		browser->nr_callchain_rows -= he->nr_rows;
 
-		if (he->unfolded)
-			he->nr_rows = callchain__count_rows(&he->sorted_chain);
+		if (he->leaf)
+			browser->nr_callchain_rows -= he->nr_rows;
 		else
+			browser->nr_hierarchy_entries -= he->nr_rows;
+
+		if (symbol_conf.report_hierarchy)
+			child_rows = hierarchy_count_rows(browser, he, true);
+
+		if (he->unfolded) {
+			if (he->leaf)
+				he->nr_rows = callchain__count_rows(&he->sorted_chain);
+			else
+				he->nr_rows = hierarchy_count_rows(browser, he, false);
+
+			/* account grand children */
+			if (symbol_conf.report_hierarchy)
+				browser->b.nr_entries += child_rows - he->nr_rows;
+
+			if (!he->leaf && he->nr_rows == 0) {
+				he->has_no_entry = true;
+				he->nr_rows = 1;
+			}
+		} else {
+			if (symbol_conf.report_hierarchy)
+				browser->b.nr_entries -= child_rows - he->nr_rows;
+
+			if (he->has_no_entry)
+				he->has_no_entry = false;
+
 			he->nr_rows = 0;
+		}
 
 		browser->b.nr_entries += he->nr_rows;
-		browser->nr_callchain_rows += he->nr_rows;
+
+		if (he->leaf)
+			browser->nr_callchain_rows += he->nr_rows;
+		else
+			browser->nr_hierarchy_entries += he->nr_rows;
 
 		return true;
 	}
@@ -422,13 +495,38 @@
 	return n;
 }
 
-static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
+static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
+				 bool unfold __maybe_unused)
+{
+	float percent;
+	struct rb_node *nd;
+	struct hist_entry *child;
+	int n = 0;
+
+	for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) {
+		child = rb_entry(nd, struct hist_entry, rb_node);
+		percent = hist_entry__get_percent_limit(child);
+		if (!child->filtered && percent >= hb->min_pcnt)
+			n++;
+	}
+
+	return n;
+}
+
+static void hist_entry__set_folding(struct hist_entry *he,
+				    struct hist_browser *hb, bool unfold)
 {
 	hist_entry__init_have_children(he);
 	he->unfolded = unfold ? he->has_children : false;
 
 	if (he->has_children) {
-		int n = callchain__set_folding(&he->sorted_chain, unfold);
+		int n;
+
+		if (he->leaf)
+			n = callchain__set_folding(&he->sorted_chain, unfold);
+		else
+			n = hierarchy_set_folding(hb, he, unfold);
+
 		he->nr_rows = unfold ? n : 0;
 	} else
 		he->nr_rows = 0;
@@ -438,19 +536,38 @@
 __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
 	struct rb_node *nd;
-	struct hists *hists = browser->hists;
+	struct hist_entry *he;
+	double percent;
 
-	for (nd = rb_first(&hists->entries);
-	     (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-	     nd = rb_next(nd)) {
-		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
-		hist_entry__set_folding(he, unfold);
-		browser->nr_callchain_rows += he->nr_rows;
+	nd = rb_first(&browser->hists->entries);
+	while (nd) {
+		he = rb_entry(nd, struct hist_entry, rb_node);
+
+		/* set folding state even if it's currently folded */
+		nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+		hist_entry__set_folding(he, browser, unfold);
+
+		percent = hist_entry__get_percent_limit(he);
+		if (he->filtered || percent < browser->min_pcnt)
+			continue;
+
+		if (!he->depth || unfold)
+			browser->nr_hierarchy_entries++;
+		if (he->leaf)
+			browser->nr_callchain_rows += he->nr_rows;
+		else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+			browser->nr_hierarchy_entries++;
+			he->has_no_entry = true;
+			he->nr_rows = 1;
+		} else
+			he->has_no_entry = false;
 	}
 }
 
 static void hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
+	browser->nr_hierarchy_entries = 0;
 	browser->nr_callchain_rows = 0;
 	__hist_browser__set_folding(browser, unfold);
 
@@ -657,9 +774,24 @@
 	return 1;
 }
 
+static bool check_percent_display(struct rb_node *node, u64 parent_total)
+{
+	struct callchain_node *child;
+
+	if (node == NULL)
+		return false;
+
+	if (rb_next(node))
+		return true;
+
+	child = rb_entry(node, struct callchain_node, rb_node);
+	return callchain_cumul_hits(child) != parent_total;
+}
+
 static int hist_browser__show_callchain_flat(struct hist_browser *browser,
 					     struct rb_root *root,
 					     unsigned short row, u64 total,
+					     u64 parent_total,
 					     print_callchain_entry_fn print,
 					     struct callchain_print_arg *arg,
 					     check_output_full_fn is_output_full)
@@ -669,7 +801,7 @@
 	bool need_percent;
 
 	node = rb_first(root);
-	need_percent = node && rb_next(node);
+	need_percent = check_percent_display(node, parent_total);
 
 	while (node) {
 		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -763,6 +895,7 @@
 static int hist_browser__show_callchain_folded(struct hist_browser *browser,
 					       struct rb_root *root,
 					       unsigned short row, u64 total,
+					       u64 parent_total,
 					       print_callchain_entry_fn print,
 					       struct callchain_print_arg *arg,
 					       check_output_full_fn is_output_full)
@@ -772,7 +905,7 @@
 	bool need_percent;
 
 	node = rb_first(root);
-	need_percent = node && rb_next(node);
+	need_percent = check_percent_display(node, parent_total);
 
 	while (node) {
 		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -844,20 +977,24 @@
 	return row - first_row;
 }
 
-static int hist_browser__show_callchain(struct hist_browser *browser,
+static int hist_browser__show_callchain_graph(struct hist_browser *browser,
 					struct rb_root *root, int level,
 					unsigned short row, u64 total,
+					u64 parent_total,
 					print_callchain_entry_fn print,
 					struct callchain_print_arg *arg,
 					check_output_full_fn is_output_full)
 {
 	struct rb_node *node;
 	int first_row = row, offset = level * LEVEL_OFFSET_STEP;
-	u64 new_total;
 	bool need_percent;
+	u64 percent_total = total;
+
+	if (callchain_param.mode == CHAIN_GRAPH_REL)
+		percent_total = parent_total;
 
 	node = rb_first(root);
-	need_percent = node && rb_next(node);
+	need_percent = check_percent_display(node, parent_total);
 
 	while (node) {
 		struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -878,7 +1015,7 @@
 			folded_sign = callchain_list__folded(chain);
 
 			row += hist_browser__show_callchain_list(browser, child,
-							chain, row, total,
+							chain, row, percent_total,
 							was_first && need_percent,
 							offset + extra_offset,
 							print, arg);
@@ -893,13 +1030,9 @@
 		if (folded_sign == '-') {
 			const int new_level = level + (extra_offset ? 2 : 1);
 
-			if (callchain_param.mode == CHAIN_GRAPH_REL)
-				new_total = child->children_hit;
-			else
-				new_total = total;
-
-			row += hist_browser__show_callchain(browser, &child->rb_root,
-							    new_level, row, new_total,
+			row += hist_browser__show_callchain_graph(browser, &child->rb_root,
+							    new_level, row, total,
+							    child->children_hit,
 							    print, arg, is_output_full);
 		}
 		if (is_output_full(browser, row))
@@ -910,6 +1043,45 @@
 	return row - first_row;
 }
 
+static int hist_browser__show_callchain(struct hist_browser *browser,
+					struct hist_entry *entry, int level,
+					unsigned short row,
+					print_callchain_entry_fn print,
+					struct callchain_print_arg *arg,
+					check_output_full_fn is_output_full)
+{
+	u64 total = hists__total_period(entry->hists);
+	u64 parent_total;
+	int printed;
+
+	if (symbol_conf.cumulate_callchain)
+		parent_total = entry->stat_acc->period;
+	else
+		parent_total = entry->stat.period;
+
+	if (callchain_param.mode == CHAIN_FLAT) {
+		printed = hist_browser__show_callchain_flat(browser,
+						&entry->sorted_chain, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	} else if (callchain_param.mode == CHAIN_FOLDED) {
+		printed = hist_browser__show_callchain_folded(browser,
+						&entry->sorted_chain, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	} else {
+		printed = hist_browser__show_callchain_graph(browser,
+						&entry->sorted_chain, level, row,
+						total, parent_total, print, arg,
+						is_output_full);
+	}
+
+	if (arg->is_current_entry)
+		browser->he_selection = entry;
+
+	return printed;
+}
+
 struct hpp_arg {
 	struct ui_browser *b;
 	char folded_sign;
@@ -1006,7 +1178,6 @@
 				    struct hist_entry *entry,
 				    unsigned short row)
 {
-	char s[256];
 	int printed = 0;
 	int width = browser->b.width;
 	char folded_sign = ' ';
@@ -1031,16 +1202,18 @@
 			.folded_sign	= folded_sign,
 			.current_entry	= current_entry,
 		};
-		struct perf_hpp hpp = {
-			.buf		= s,
-			.size		= sizeof(s),
-			.ptr		= &arg,
-		};
 		int column = 0;
 
 		hist_browser__gotorc(browser, row, 0);
 
-		perf_hpp__for_each_format(fmt) {
+		hists__for_each_format(browser->hists, fmt) {
+			char s[2048];
+			struct perf_hpp hpp = {
+				.buf	= s,
+				.size	= sizeof(s),
+				.ptr	= &arg,
+			};
+
 			if (perf_hpp__should_skip(fmt, entry->hists) ||
 			    column++ < browser->b.horiz_scroll)
 				continue;
@@ -1065,11 +1238,18 @@
 			}
 
 			if (fmt->color) {
-				width -= fmt->color(fmt, &hpp, entry);
+				int ret = fmt->color(fmt, &hpp, entry);
+				hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+				/*
+				 * fmt->color() already used ui_browser to
+				 * print the non alignment bits, skip it (+ret):
+				 */
+				ui_browser__printf(&browser->b, "%s", s + ret);
 			} else {
-				width -= fmt->entry(fmt, &hpp, entry);
+				hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry));
 				ui_browser__printf(&browser->b, "%s", s);
 			}
+			width -= hpp.buf - s;
 		}
 
 		/* The scroll bar isn't being used */
@@ -1084,43 +1264,246 @@
 		--row_offset;
 
 	if (folded_sign == '-' && row != browser->b.rows) {
-		u64 total = hists__total_period(entry->hists);
 		struct callchain_print_arg arg = {
 			.row_offset = row_offset,
 			.is_current_entry = current_entry,
 		};
 
-		if (callchain_param.mode == CHAIN_GRAPH_REL) {
-			if (symbol_conf.cumulate_callchain)
-				total = entry->stat_acc->period;
-			else
-				total = entry->stat.period;
-		}
-
-		if (callchain_param.mode == CHAIN_FLAT) {
-			printed += hist_browser__show_callchain_flat(browser,
-					&entry->sorted_chain, row, total,
+		printed += hist_browser__show_callchain(browser, entry, 1, row,
 					hist_browser__show_callchain_entry, &arg,
 					hist_browser__check_output_full);
-		} else if (callchain_param.mode == CHAIN_FOLDED) {
-			printed += hist_browser__show_callchain_folded(browser,
-					&entry->sorted_chain, row, total,
-					hist_browser__show_callchain_entry, &arg,
-					hist_browser__check_output_full);
-		} else {
-			printed += hist_browser__show_callchain(browser,
-					&entry->sorted_chain, 1, row, total,
-					hist_browser__show_callchain_entry, &arg,
-					hist_browser__check_output_full);
-		}
-
-		if (arg.is_current_entry)
-			browser->he_selection = entry;
 	}
 
 	return printed;
 }
 
+static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
+					      struct hist_entry *entry,
+					      unsigned short row,
+					      int level)
+{
+	int printed = 0;
+	int width = browser->b.width;
+	char folded_sign = ' ';
+	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	off_t row_offset = entry->row_offset;
+	bool first = true;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	struct hpp_arg arg = {
+		.b		= &browser->b,
+		.current_entry	= current_entry,
+	};
+	int column = 0;
+	int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+	if (current_entry) {
+		browser->he_selection = entry;
+		browser->selection = &entry->ms;
+	}
+
+	hist_entry__init_have_children(entry);
+	folded_sign = hist_entry__folded(entry);
+	arg.folded_sign = folded_sign;
+
+	if (entry->leaf && row_offset) {
+		row_offset--;
+		goto show_callchain;
+	}
+
+	hist_browser__gotorc(browser, row, 0);
+
+	if (current_entry && browser->b.navkeypressed)
+		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+	else
+		ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+	width -= level * HIERARCHY_INDENT;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&entry->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		char s[2048];
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.ptr		= &arg,
+		};
+
+		if (perf_hpp__should_skip(fmt, entry->hists) ||
+		    column++ < browser->b.horiz_scroll)
+			continue;
+
+		if (current_entry && browser->b.navkeypressed) {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_SELECTED);
+		} else {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_NORMAL);
+		}
+
+		if (first) {
+			ui_browser__printf(&browser->b, "%c", folded_sign);
+			width--;
+			first = false;
+		} else {
+			ui_browser__printf(&browser->b, "  ");
+			width -= 2;
+		}
+
+		if (fmt->color) {
+			int ret = fmt->color(fmt, &hpp, entry);
+			hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+			/*
+			 * fmt->color() already used ui_browser to
+			 * print the non alignment bits, skip it (+ret):
+			 */
+			ui_browser__printf(&browser->b, "%s", s + ret);
+		} else {
+			int ret = fmt->entry(fmt, &hpp, entry);
+			hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+			ui_browser__printf(&browser->b, "%s", s);
+		}
+		width -= hpp.buf - s;
+	}
+
+	ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+	width -= hierarchy_indent;
+
+	if (column >= browser->b.horiz_scroll) {
+		char s[2048];
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.ptr		= &arg,
+		};
+
+		if (current_entry && browser->b.navkeypressed) {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_SELECTED);
+		} else {
+			ui_browser__set_color(&browser->b,
+					      HE_COLORSET_NORMAL);
+		}
+
+		perf_hpp_list__for_each_format(entry->hpp_list, fmt) {
+			ui_browser__write_nstring(&browser->b, "", 2);
+			width -= 2;
+
+			/*
+			 * No need to call hist_entry__snprintf_alignment()
+			 * since this fmt is always the last column in the
+			 * hierarchy mode.
+			 */
+			if (fmt->color) {
+				width -= fmt->color(fmt, &hpp, entry);
+			} else {
+				int i = 0;
+
+				width -= fmt->entry(fmt, &hpp, entry);
+				ui_browser__printf(&browser->b, "%s", ltrim(s));
+
+				while (isspace(s[i++]))
+					width++;
+			}
+		}
+	}
+
+	/* The scroll bar isn't being used */
+	if (!browser->b.navkeypressed)
+		width += 1;
+
+	ui_browser__write_nstring(&browser->b, "", width);
+
+	++row;
+	++printed;
+
+show_callchain:
+	if (entry->leaf && folded_sign == '-' && row != browser->b.rows) {
+		struct callchain_print_arg carg = {
+			.row_offset = row_offset,
+		};
+
+		printed += hist_browser__show_callchain(browser, entry,
+					level + 1, row,
+					hist_browser__show_callchain_entry, &carg,
+					hist_browser__check_output_full);
+	}
+
+	return printed;
+}
+
+static int hist_browser__show_no_entry(struct hist_browser *browser,
+				       unsigned short row, int level)
+{
+	int width = browser->b.width;
+	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+	bool first = true;
+	int column = 0;
+	int ret;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	int indent = browser->hists->nr_hpp_node - 2;
+
+	if (current_entry) {
+		browser->he_selection = NULL;
+		browser->selection = NULL;
+	}
+
+	hist_browser__gotorc(browser, row, 0);
+
+	if (current_entry && browser->b.navkeypressed)
+		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+	else
+		ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+	ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+	width -= level * HIERARCHY_INDENT;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&browser->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (perf_hpp__should_skip(fmt, browser->hists) ||
+		    column++ < browser->b.horiz_scroll)
+			continue;
+
+		ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists));
+
+		if (first) {
+			/* for folded sign */
+			first = false;
+			ret++;
+		} else {
+			/* space between columns */
+			ret += 2;
+		}
+
+		ui_browser__write_nstring(&browser->b, "", ret);
+		width -= ret;
+	}
+
+	ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT);
+	width -= indent * HIERARCHY_INDENT;
+
+	if (column >= browser->b.horiz_scroll) {
+		char buf[32];
+
+		ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt);
+		ui_browser__printf(&browser->b, "  %s", buf);
+		width -= ret + 2;
+	}
+
+	/* The scroll bar isn't being used */
+	if (!browser->b.navkeypressed)
+		width += 1;
+
+	ui_browser__write_nstring(&browser->b, "", width);
+	return 1;
+}
+
 static int advance_hpp_check(struct perf_hpp *hpp, int inc)
 {
 	advance_hpp(hpp, inc);
@@ -1144,7 +1527,7 @@
 			return ret;
 	}
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(browser->hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
 			continue;
 
@@ -1160,11 +1543,96 @@
 	return ret;
 }
 
+static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size)
+{
+	struct hists *hists = browser->hists;
+	struct perf_hpp dummy_hpp = {
+		.buf    = buf,
+		.size   = size,
+	};
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	size_t ret = 0;
+	int column = 0;
+	int indent = hists->nr_hpp_node - 2;
+	bool first_node, first_col;
+
+	ret = scnprintf(buf, size, " ");
+	if (advance_hpp_check(&dummy_hpp, ret))
+		return ret;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (column++ < browser->b.horiz_scroll)
+			continue;
+
+		ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+
+		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
+		if (advance_hpp_check(&dummy_hpp, ret))
+			break;
+	}
+
+	ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+			indent * HIERARCHY_INDENT, "");
+	if (advance_hpp_check(&dummy_hpp, ret))
+		return ret;
+
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node) {
+			ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / ");
+			if (advance_hpp_check(&dummy_hpp, ret))
+				break;
+		}
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			char *start;
+
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col) {
+				ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+");
+				if (advance_hpp_check(&dummy_hpp, ret))
+					break;
+			}
+			first_col = false;
+
+			ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+			dummy_hpp.buf[ret] = '\0';
+			rtrim(dummy_hpp.buf);
+
+			start = ltrim(dummy_hpp.buf);
+			ret = strlen(start);
+
+			if (start != dummy_hpp.buf)
+				memmove(dummy_hpp.buf, start, ret + 1);
+
+			if (advance_hpp_check(&dummy_hpp, ret))
+				break;
+		}
+	}
+
+	return ret;
+}
+
 static void hist_browser__show_headers(struct hist_browser *browser)
 {
 	char headers[1024];
 
-	hists_browser__scnprintf_headers(browser, headers, sizeof(headers));
+	if (symbol_conf.report_hierarchy)
+		hists_browser__scnprintf_hierarchy_headers(browser, headers,
+							   sizeof(headers));
+	else
+		hists_browser__scnprintf_headers(browser, headers,
+						 sizeof(headers));
 	ui_browser__gotorc(&browser->b, 0, 0);
 	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
 	ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
@@ -1196,18 +1664,34 @@
 	hb->he_selection = NULL;
 	hb->selection = NULL;
 
-	for (nd = browser->top; nd; nd = rb_next(nd)) {
+	for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		float percent;
 
-		if (h->filtered)
+		if (h->filtered) {
+			/* let it move to sibling */
+			h->unfolded = false;
 			continue;
+		}
 
 		percent = hist_entry__get_percent_limit(h);
 		if (percent < hb->min_pcnt)
 			continue;
 
-		row += hist_browser__show_entry(hb, h, row);
+		if (symbol_conf.report_hierarchy) {
+			row += hist_browser__show_hierarchy_entry(hb, h, row,
+								  h->depth);
+			if (row == browser->rows)
+				break;
+
+			if (h->has_no_entry) {
+				hist_browser__show_no_entry(hb, row, h->depth + 1);
+				row++;
+			}
+		} else {
+			row += hist_browser__show_entry(hb, h, row);
+		}
+
 		if (row == browser->rows)
 			break;
 	}
@@ -1225,7 +1709,14 @@
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
 
-		nd = rb_next(nd);
+		/*
+		 * If it's filtered, its all children also were filtered.
+		 * So move to sibling node.
+		 */
+		if (rb_next(nd))
+			nd = rb_next(nd);
+		else
+			nd = rb_hierarchy_next(nd);
 	}
 
 	return NULL;
@@ -1241,7 +1732,7 @@
 		if (!h->filtered && percent >= min_pcnt)
 			return nd;
 
-		nd = rb_prev(nd);
+		nd = rb_hierarchy_prev(nd);
 	}
 
 	return NULL;
@@ -1271,8 +1762,8 @@
 		nd = browser->top;
 		goto do_offset;
 	case SEEK_END:
-		nd = hists__filter_prev_entries(rb_last(browser->entries),
-						hb->min_pcnt);
+		nd = rb_hierarchy_last(rb_last(browser->entries));
+		nd = hists__filter_prev_entries(nd, hb->min_pcnt);
 		first = false;
 		break;
 	default:
@@ -1306,7 +1797,7 @@
 	if (offset > 0) {
 		do {
 			h = rb_entry(nd, struct hist_entry, rb_node);
-			if (h->unfolded) {
+			if (h->unfolded && h->leaf) {
 				u16 remaining = h->nr_rows - h->row_offset;
 				if (offset > remaining) {
 					offset -= remaining;
@@ -1318,7 +1809,8 @@
 					break;
 				}
 			}
-			nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
+			nd = hists__filter_entries(rb_hierarchy_next(nd),
+						   hb->min_pcnt);
 			if (nd == NULL)
 				break;
 			--offset;
@@ -1327,7 +1819,7 @@
 	} else if (offset < 0) {
 		while (1) {
 			h = rb_entry(nd, struct hist_entry, rb_node);
-			if (h->unfolded) {
+			if (h->unfolded && h->leaf) {
 				if (first) {
 					if (-offset > h->row_offset) {
 						offset += h->row_offset;
@@ -1351,7 +1843,7 @@
 				}
 			}
 
-			nd = hists__filter_prev_entries(rb_prev(nd),
+			nd = hists__filter_prev_entries(rb_hierarchy_prev(nd),
 							hb->min_pcnt);
 			if (nd == NULL)
 				break;
@@ -1364,7 +1856,7 @@
 				 * row_offset at its last entry.
 				 */
 				h = rb_entry(nd, struct hist_entry, rb_node);
-				if (h->unfolded)
+				if (h->unfolded && h->leaf)
 					h->row_offset = h->nr_rows;
 				break;
 			}
@@ -1378,17 +1870,14 @@
 }
 
 static int hist_browser__fprintf_callchain(struct hist_browser *browser,
-					   struct hist_entry *he, FILE *fp)
+					   struct hist_entry *he, FILE *fp,
+					   int level)
 {
-	u64 total = hists__total_period(he->hists);
 	struct callchain_print_arg arg  = {
 		.fp = fp,
 	};
 
-	if (symbol_conf.cumulate_callchain)
-		total = he->stat_acc->period;
-
-	hist_browser__show_callchain(browser, &he->sorted_chain, 1, 0, total,
+	hist_browser__show_callchain(browser, he, level, 0,
 				     hist_browser__fprintf_callchain_entry, &arg,
 				     hist_browser__check_dump_full);
 	return arg.printed;
@@ -1414,7 +1903,7 @@
 	if (symbol_conf.use_callchain)
 		printed += fprintf(fp, "%c ", folded_sign);
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(browser->hists, fmt) {
 		if (perf_hpp__should_skip(fmt, he->hists))
 			continue;
 
@@ -1425,12 +1914,71 @@
 			first = false;
 
 		ret = fmt->entry(fmt, &hpp, he);
+		ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret);
 		advance_hpp(&hpp, ret);
 	}
-	printed += fprintf(fp, "%s\n", rtrim(s));
+	printed += fprintf(fp, "%s\n", s);
 
 	if (folded_sign == '-')
-		printed += hist_browser__fprintf_callchain(browser, he, fp);
+		printed += hist_browser__fprintf_callchain(browser, he, fp, 1);
+
+	return printed;
+}
+
+
+static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
+						 struct hist_entry *he,
+						 FILE *fp, int level)
+{
+	char s[8192];
+	int printed = 0;
+	char folded_sign = ' ';
+	struct perf_hpp hpp = {
+		.buf = s,
+		.size = sizeof(s),
+	};
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	bool first = true;
+	int ret;
+	int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+	printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
+
+	folded_sign = hist_entry__folded(he);
+	printed += fprintf(fp, "%c", folded_sign);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&he->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (!first) {
+			ret = scnprintf(hpp.buf, hpp.size, "  ");
+			advance_hpp(&hpp, ret);
+		} else
+			first = false;
+
+		ret = fmt->entry(fmt, &hpp, he);
+		advance_hpp(&hpp, ret);
+	}
+
+	ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, "");
+	advance_hpp(&hpp, ret);
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		ret = scnprintf(hpp.buf, hpp.size, "  ");
+		advance_hpp(&hpp, ret);
+
+		ret = fmt->entry(fmt, &hpp, he);
+		advance_hpp(&hpp, ret);
+	}
+
+	printed += fprintf(fp, "%s\n", rtrim(s));
+
+	if (he->leaf && folded_sign == '-') {
+		printed += hist_browser__fprintf_callchain(browser, he, fp,
+							   he->depth + 1);
+	}
 
 	return printed;
 }
@@ -1444,8 +1992,16 @@
 	while (nd) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-		printed += hist_browser__fprintf_entry(browser, h, fp);
-		nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
+		if (symbol_conf.report_hierarchy) {
+			printed += hist_browser__fprintf_hierarchy_entry(browser,
+									 h, fp,
+									 h->depth);
+		} else {
+			printed += hist_browser__fprintf_entry(browser, h, fp);
+		}
+
+		nd = hists__filter_entries(rb_hierarchy_next(nd),
+					   browser->min_pcnt);
 	}
 
 	return printed;
@@ -1580,11 +2136,18 @@
 	if (hists->uid_filter_str)
 		printed += snprintf(bf + printed, size - printed,
 				    ", UID: %s", hists->uid_filter_str);
-	if (thread)
-		printed += scnprintf(bf + printed, size - printed,
+	if (thread) {
+		if (sort__has_thread) {
+			printed += scnprintf(bf + printed, size - printed,
 				    ", Thread: %s(%d)",
 				     (thread->comm_set ? thread__comm_str(thread) : ""),
 				    thread->tid);
+		} else {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s",
+				     (thread->comm_set ? thread__comm_str(thread) : ""));
+		}
+	}
 	if (dso)
 		printed += scnprintf(bf + printed, size - printed,
 				    ", DSO: %s", dso->short_name);
@@ -1759,15 +2322,24 @@
 {
 	struct thread *thread = act->thread;
 
+	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+		return 0;
+
 	if (browser->hists->thread_filter) {
 		pstack__remove(browser->pstack, &browser->hists->thread_filter);
 		perf_hpp__set_elide(HISTC_THREAD, false);
 		thread__zput(browser->hists->thread_filter);
 		ui_helpline__pop();
 	} else {
-		ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
-				   thread->comm_set ? thread__comm_str(thread) : "",
-				   thread->tid);
+		if (sort__has_thread) {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "",
+					   thread->tid);
+		} else {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "");
+		}
+
 		browser->hists->thread_filter = thread__get(thread);
 		perf_hpp__set_elide(HISTC_THREAD, false);
 		pstack__push(browser->pstack, &browser->hists->thread_filter);
@@ -1782,13 +2354,22 @@
 add_thread_opt(struct hist_browser *browser, struct popup_action *act,
 	       char **optstr, struct thread *thread)
 {
-	if (thread == NULL)
+	int ret;
+
+	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
 		return 0;
 
-	if (asprintf(optstr, "Zoom %s %s(%d) thread",
-		     browser->hists->thread_filter ? "out of" : "into",
-		     thread->comm_set ? thread__comm_str(thread) : "",
-		     thread->tid) < 0)
+	if (sort__has_thread) {
+		ret = asprintf(optstr, "Zoom %s %s(%d) thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "",
+			       thread->tid);
+	} else {
+		ret = asprintf(optstr, "Zoom %s %s thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "");
+	}
+	if (ret < 0)
 		return 0;
 
 	act->thread = thread;
@@ -1801,6 +2382,9 @@
 {
 	struct map *map = act->ms.map;
 
+	if (!sort__has_dso || map == NULL)
+		return 0;
+
 	if (browser->hists->dso_filter) {
 		pstack__remove(browser->pstack, &browser->hists->dso_filter);
 		perf_hpp__set_elide(HISTC_DSO, false);
@@ -1825,7 +2409,7 @@
 add_dso_opt(struct hist_browser *browser, struct popup_action *act,
 	    char **optstr, struct map *map)
 {
-	if (map == NULL)
+	if (!sort__has_dso || map == NULL)
 		return 0;
 
 	if (asprintf(optstr, "Zoom %s %s DSO",
@@ -1850,7 +2434,7 @@
 add_map_opt(struct hist_browser *browser __maybe_unused,
 	    struct popup_action *act, char **optstr, struct map *map)
 {
-	if (map == NULL)
+	if (!sort__has_dso || map == NULL)
 		return 0;
 
 	if (asprintf(optstr, "Browse map details") < 0)
@@ -1952,6 +2536,9 @@
 static int
 do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
 {
+	if (!sort__has_socket || act->socket < 0)
+		return 0;
+
 	if (browser->hists->socket_filter > -1) {
 		pstack__remove(browser->pstack, &browser->hists->socket_filter);
 		browser->hists->socket_filter = -1;
@@ -1971,7 +2558,7 @@
 add_socket_opt(struct hist_browser *browser, struct popup_action *act,
 	       char **optstr, int socket_id)
 {
-	if (socket_id < 0)
+	if (!sort__has_socket || socket_id < 0)
 		return 0;
 
 	if (asprintf(optstr, "Zoom %s Processor Socket %d",
@@ -1989,17 +2576,60 @@
 	u64 nr_entries = 0;
 	struct rb_node *nd = rb_first(&hb->hists->entries);
 
-	if (hb->min_pcnt == 0) {
+	if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) {
 		hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries;
 		return;
 	}
 
 	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
 		nr_entries++;
-		nd = rb_next(nd);
+		nd = rb_hierarchy_next(nd);
 	}
 
 	hb->nr_non_filtered_entries = nr_entries;
+	hb->nr_hierarchy_entries = nr_entries;
+}
+
+static void hist_browser__update_percent_limit(struct hist_browser *hb,
+					       double percent)
+{
+	struct hist_entry *he;
+	struct rb_node *nd = rb_first(&hb->hists->entries);
+	u64 total = hists__total_period(hb->hists);
+	u64 min_callchain_hits = total * (percent / 100);
+
+	hb->min_pcnt = callchain_param.min_percent = percent;
+
+	while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
+		he = rb_entry(nd, struct hist_entry, rb_node);
+
+		if (he->has_no_entry) {
+			he->has_no_entry = false;
+			he->nr_rows = 0;
+		}
+
+		if (!he->leaf || !symbol_conf.use_callchain)
+			goto next;
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (percent / 100);
+		}
+
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				     min_callchain_hits, &callchain_param);
+
+next:
+		nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+		/* force to re-evaluate folding state of callchains */
+		he->init_have_children = false;
+		hist_entry__set_folding(he, hb, false);
+	}
 }
 
 static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
@@ -2037,6 +2667,7 @@
 	"E             Expand all callchains\n"				\
 	"F             Toggle percentage of filtered entries\n"		\
 	"H             Display column headers\n"			\
+	"L             Change percent limit\n"				\
 	"m             Display context menu\n"				\
 	"S             Zoom into current Processor Socket\n"		\
 
@@ -2077,7 +2708,7 @@
 	memset(options, 0, sizeof(options));
 	memset(actions, 0, sizeof(actions));
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(browser->hists, fmt) {
 		perf_hpp__reset_width(fmt, hists);
 		/*
 		 * This is done just once, and activates the horizontal scrolling
@@ -2192,6 +2823,24 @@
 				top->zero = !top->zero;
 			}
 			continue;
+		case 'L':
+			if (ui_browser__input_window("Percent Limit",
+					"Please enter the value you want to hide entries under that percent.",
+					buf, "ENTER: OK, ESC: Cancel",
+					delay_secs * 2) == K_ENTER) {
+				char *end;
+				double new_percent = strtod(buf, &end);
+
+				if (new_percent < 0 || new_percent > 100) {
+					ui_browser__warning(&browser->b, delay_secs * 2,
+						"Invalid percent: %.2f", new_percent);
+					continue;
+				}
+
+				hist_browser__update_percent_limit(browser, new_percent);
+				hist_browser__reset(browser);
+			}
+			continue;
 		case K_F1:
 		case 'h':
 		case '?':
@@ -2263,10 +2912,7 @@
 			continue;
 		}
 
-		if (!sort__has_sym)
-			goto add_exit_option;
-
-		if (browser->selection == NULL)
+		if (!sort__has_sym || browser->selection == NULL)
 			goto skip_annotation;
 
 		if (sort__mode == SORT_MODE__BRANCH) {
@@ -2306,11 +2952,16 @@
 					     &options[nr_options],
 					     socked_id);
 		/* perf script support */
+		if (!is_report_browser(hbt))
+			goto skip_scripting;
+
 		if (browser->he_selection) {
-			nr_options += add_script_opt(browser,
-						     &actions[nr_options],
-						     &options[nr_options],
-						     thread, NULL);
+			if (sort__has_thread && thread) {
+				nr_options += add_script_opt(browser,
+							     &actions[nr_options],
+							     &options[nr_options],
+							     thread, NULL);
+			}
 			/*
 			 * Note that browser->selection != NULL
 			 * when browser->he_selection is not NULL,
@@ -2320,16 +2971,18 @@
 			 *
 			 * See hist_browser__show_entry.
 			 */
-			nr_options += add_script_opt(browser,
-						     &actions[nr_options],
-						     &options[nr_options],
-						     NULL, browser->selection->sym);
+			if (sort__has_sym && browser->selection->sym) {
+				nr_options += add_script_opt(browser,
+							     &actions[nr_options],
+							     &options[nr_options],
+							     NULL, browser->selection->sym);
+			}
 		}
 		nr_options += add_script_opt(browser, &actions[nr_options],
 					     &options[nr_options], NULL, NULL);
 		nr_options += add_switch_opt(browser, &actions[nr_options],
 					     &options[nr_options]);
-add_exit_option:
+skip_scripting:
 		nr_options += add_exit_opt(browser, &actions[nr_options],
 					   &options[nr_options]);
 

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 0f8dcfd..bd9bf7e 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c

@@ -306,7 +306,7 @@
 
 	nr_cols = 0;
 
-	perf_hpp__for_each_format(fmt)
+	hists__for_each_format(hists, fmt)
 		col_types[nr_cols++] = G_TYPE_STRING;
 
 	store = gtk_tree_store_newv(nr_cols, col_types);
@@ -317,7 +317,7 @@
 
 	col_idx = 0;
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
 			continue;
 
@@ -367,7 +367,7 @@
 
 		col_idx = 0;
 
-		perf_hpp__for_each_format(fmt) {
+		hists__for_each_format(hists, fmt) {
 			if (perf_hpp__should_skip(fmt, h->hists))
 				continue;
 
@@ -396,6 +396,194 @@
 	gtk_container_add(GTK_CONTAINER(window), view);
 }
 
+static void perf_gtk__add_hierarchy_entries(struct hists *hists,
+					    struct rb_root *root,
+					    GtkTreeStore *store,
+					    GtkTreeIter *parent,
+					    struct perf_hpp *hpp,
+					    float min_pcnt)
+{
+	int col_idx = 0;
+	struct rb_node *node;
+	struct hist_entry *he;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	u64 total = hists__total_period(hists);
+	int size;
+
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		GtkTreeIter iter;
+		float percent;
+		char *bf;
+
+		he = rb_entry(node, struct hist_entry, rb_node);
+		if (he->filtered)
+			continue;
+
+		percent = hist_entry__get_percent_limit(he);
+		if (percent < min_pcnt)
+			continue;
+
+		gtk_tree_store_append(store, &iter, parent);
+
+		col_idx = 0;
+
+		/* the first hpp_list_node is for overhead columns */
+		fmt_node = list_first_entry(&hists->hpp_formats,
+					    struct perf_hpp_list_node, list);
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (fmt->color)
+				fmt->color(fmt, hpp, he);
+			else
+				fmt->entry(fmt, hpp, he);
+
+			gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1);
+		}
+
+		bf = hpp->buf;
+		size = hpp->size;
+		perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+			int ret;
+
+			if (fmt->color)
+				ret = fmt->color(fmt, hpp, he);
+			else
+				ret = fmt->entry(fmt, hpp, he);
+
+			snprintf(hpp->buf + ret, hpp->size - ret, "  ");
+			advance_hpp(hpp, ret + 2);
+		}
+
+		gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1);
+
+		if (!he->leaf) {
+			hpp->buf = bf;
+			hpp->size = size;
+
+			perf_gtk__add_hierarchy_entries(hists, &he->hroot_out,
+							store, &iter, hpp,
+							min_pcnt);
+
+			if (!hist_entry__has_hierarchy_children(he, min_pcnt)) {
+				char buf[32];
+				GtkTreeIter child;
+
+				snprintf(buf, sizeof(buf), "no entry >= %.2f%%",
+					 min_pcnt);
+
+				gtk_tree_store_append(store, &child, &iter);
+				gtk_tree_store_set(store, &child, col_idx, buf, -1);
+			}
+		}
+
+		if (symbol_conf.use_callchain && he->leaf) {
+			if (callchain_param.mode == CHAIN_GRAPH_REL)
+				total = symbol_conf.cumulate_callchain ?
+					he->stat_acc->period : he->stat.period;
+
+			perf_gtk__add_callchain(&he->sorted_chain, store, &iter,
+						col_idx, total);
+		}
+	}
+
+}
+
+static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
+				     float min_pcnt)
+{
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	GType col_types[MAX_COLUMNS];
+	GtkCellRenderer *renderer;
+	GtkTreeStore *store;
+	GtkWidget *view;
+	int col_idx;
+	int nr_cols = 0;
+	char s[512];
+	char buf[512];
+	bool first_node, first_col;
+	struct perf_hpp hpp = {
+		.buf		= s,
+		.size		= sizeof(s),
+	};
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) ||
+		    perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		col_types[nr_cols++] = G_TYPE_STRING;
+	}
+	col_types[nr_cols++] = G_TYPE_STRING;
+
+	store = gtk_tree_store_newv(nr_cols, col_types);
+	view = gtk_tree_view_new();
+	renderer = gtk_cell_renderer_text_new();
+
+	col_idx = 0;
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+							    -1, fmt->name,
+							    renderer, "markup",
+							    col_idx++, NULL);
+	}
+
+	/* construct merged column header since sort keys share single column */
+	buf[0] = '\0';
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node)
+			strcat(buf, " / ");
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				strcat(buf, "+");
+			first_col = false;
+
+			fmt->header(fmt, &hpp, hists_to_evsel(hists));
+			strcat(buf, ltrim(rtrim(hpp.buf)));
+		}
+	}
+
+	gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+						    -1, buf,
+						    renderer, "markup",
+						    col_idx++, NULL);
+
+	for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+		GtkTreeViewColumn *column;
+
+		column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+		gtk_tree_view_column_set_resizable(column, TRUE);
+
+		if (col_idx == 0) {
+			gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+							  column);
+		}
+	}
+
+	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+	g_object_unref(GTK_TREE_MODEL(store));
+
+	perf_gtk__add_hierarchy_entries(hists, &hists->entries, store,
+					NULL, &hpp, min_pcnt);
+
+	gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+	g_signal_connect(view, "row-activated",
+			 G_CALLBACK(on_row_activated), NULL);
+	gtk_container_add(GTK_CONTAINER(window), view);
+}
+
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 				  const char *help,
 				  struct hist_browser_timer *hbt __maybe_unused,
@@ -463,7 +651,10 @@
 							GTK_POLICY_AUTOMATIC,
 							GTK_POLICY_AUTOMATIC);
 
-		perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
+		if (symbol_conf.report_hierarchy)
+			perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt);
+		else
+			perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
 
 		tab_label = gtk_label_new(evname);
 

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index bf2a66e..3baeaa6 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c

@@ -5,6 +5,7 @@
 #include "../util/util.h"
 #include "../util/sort.h"
 #include "../util/evsel.h"
+#include "../util/evlist.h"
 
 /* hist period print (hpp) functions */
 
@@ -371,7 +372,20 @@
 	return 0;
 }
 
-#define HPP__COLOR_PRINT_FNS(_name, _fn)		\
+static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a)
+{
+	return a->header == hpp__header_fn;
+}
+
+static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b))
+		return false;
+
+	return a->idx == b->idx;
+}
+
+#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx)		\
 	{						\
 		.name   = _name,			\
 		.header	= hpp__header_fn,		\
@@ -381,9 +395,11 @@
 		.cmp	= hpp__nop_cmp,			\
 		.collapse = hpp__nop_cmp,		\
 		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
 	}
 
-#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn)		\
+#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx)	\
 	{						\
 		.name   = _name,			\
 		.header	= hpp__header_fn,		\
@@ -393,9 +409,11 @@
 		.cmp	= hpp__nop_cmp,			\
 		.collapse = hpp__nop_cmp,		\
 		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
 	}
 
-#define HPP__PRINT_FNS(_name, _fn)			\
+#define HPP__PRINT_FNS(_name, _fn, _idx)		\
 	{						\
 		.name   = _name,			\
 		.header	= hpp__header_fn,		\
@@ -404,22 +422,25 @@
 		.cmp	= hpp__nop_cmp,			\
 		.collapse = hpp__nop_cmp,		\
 		.sort	= hpp__sort_ ## _fn,		\
+		.idx	= PERF_HPP__ ## _idx,		\
+		.equal	= hpp__equal,			\
 	}
 
 struct perf_hpp_fmt perf_hpp__format[] = {
-	HPP__COLOR_PRINT_FNS("Overhead", overhead),
-	HPP__COLOR_PRINT_FNS("sys", overhead_sys),
-	HPP__COLOR_PRINT_FNS("usr", overhead_us),
-	HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys),
-	HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us),
-	HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc),
-	HPP__PRINT_FNS("Samples", samples),
-	HPP__PRINT_FNS("Period", period)
+	HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD),
+	HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS),
+	HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US),
+	HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS),
+	HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US),
+	HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC),
+	HPP__PRINT_FNS("Samples", samples, SAMPLES),
+	HPP__PRINT_FNS("Period", period, PERIOD)
 };
 
-LIST_HEAD(perf_hpp__list);
-LIST_HEAD(perf_hpp__sort_list);
-
+struct perf_hpp_list perf_hpp_list = {
+	.fields	= LIST_HEAD_INIT(perf_hpp_list.fields),
+	.sorts	= LIST_HEAD_INIT(perf_hpp_list.sorts),
+};
 
 #undef HPP__COLOR_PRINT_FNS
 #undef HPP__COLOR_ACC_PRINT_FNS
@@ -485,9 +506,16 @@
 		hpp_dimension__add_output(PERF_HPP__PERIOD);
 }
 
-void perf_hpp__column_register(struct perf_hpp_fmt *format)
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+				    struct perf_hpp_fmt *format)
 {
-	list_add_tail(&format->list, &perf_hpp__list);
+	list_add_tail(&format->list, &list->fields);
+}
+
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+					struct perf_hpp_fmt *format)
+{
+	list_add_tail(&format->sort_list, &list->sorts);
 }
 
 void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
@@ -495,53 +523,43 @@
 	list_del(&format->list);
 }
 
-void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
-{
-	list_add_tail(&format->sort_list, &perf_hpp__sort_list);
-}
-
-void perf_hpp__column_enable(unsigned col)
-{
-	BUG_ON(col >= PERF_HPP__MAX_INDEX);
-	perf_hpp__column_register(&perf_hpp__format[col]);
-}
-
-void perf_hpp__column_disable(unsigned col)
-{
-	BUG_ON(col >= PERF_HPP__MAX_INDEX);
-	perf_hpp__column_unregister(&perf_hpp__format[col]);
-}
-
 void perf_hpp__cancel_cumulate(void)
 {
+	struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp;
+
 	if (is_strict_order(field_order))
 		return;
 
-	perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
-	perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead";
+	ovh = &perf_hpp__format[PERF_HPP__OVERHEAD];
+	acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC];
+
+	perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) {
+		if (acc->equal(acc, fmt)) {
+			perf_hpp__column_unregister(fmt);
+			continue;
+		}
+
+		if (ovh->equal(ovh, fmt))
+			fmt->name = "Overhead";
+	}
 }
 
-void perf_hpp__setup_output_field(void)
+static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	return a->equal && a->equal(a, b);
+}
+
+void perf_hpp__setup_output_field(struct perf_hpp_list *list)
 {
 	struct perf_hpp_fmt *fmt;
 
 	/* append sort keys to output field */
-	perf_hpp__for_each_sort_list(fmt) {
-		if (!list_empty(&fmt->list))
-			continue;
+	perf_hpp_list__for_each_sort_list(list, fmt) {
+		struct perf_hpp_fmt *pos;
 
-		/*
-		 * sort entry fields are dynamically created,
-		 * so they can share a same sort key even though
-		 * the list is empty.
-		 */
-		if (perf_hpp__is_sort_entry(fmt)) {
-			struct perf_hpp_fmt *pos;
-
-			perf_hpp__for_each_format(pos) {
-				if (perf_hpp__same_sort_entry(pos, fmt))
-					goto next;
-			}
+		perf_hpp_list__for_each_format(list, pos) {
+			if (fmt_equal(fmt, pos))
+				goto next;
 		}
 
 		perf_hpp__column_register(fmt);
@@ -550,27 +568,17 @@
 	}
 }
 
-void perf_hpp__append_sort_keys(void)
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list)
 {
 	struct perf_hpp_fmt *fmt;
 
 	/* append output fields to sort keys */
-	perf_hpp__for_each_format(fmt) {
-		if (!list_empty(&fmt->sort_list))
-			continue;
+	perf_hpp_list__for_each_format(list, fmt) {
+		struct perf_hpp_fmt *pos;
 
-		/*
-		 * sort entry fields are dynamically created,
-		 * so they can share a same sort key even though
-		 * the list is empty.
-		 */
-		if (perf_hpp__is_sort_entry(fmt)) {
-			struct perf_hpp_fmt *pos;
-
-			perf_hpp__for_each_sort_list(pos) {
-				if (perf_hpp__same_sort_entry(pos, fmt))
-					goto next;
-			}
+		perf_hpp_list__for_each_sort_list(list, pos) {
+			if (fmt_equal(fmt, pos))
+				goto next;
 		}
 
 		perf_hpp__register_sort_field(fmt);
@@ -579,20 +587,29 @@
 	}
 }
 
-void perf_hpp__reset_output_field(void)
+
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+	if (fmt->free)
+		fmt->free(fmt);
+}
+
+void perf_hpp__reset_output_field(struct perf_hpp_list *list)
 {
 	struct perf_hpp_fmt *fmt, *tmp;
 
 	/* reset output fields */
-	perf_hpp__for_each_format_safe(fmt, tmp) {
+	perf_hpp_list__for_each_format_safe(list, fmt, tmp) {
 		list_del_init(&fmt->list);
 		list_del_init(&fmt->sort_list);
+		fmt_free(fmt);
 	}
 
 	/* reset sort keys */
-	perf_hpp__for_each_sort_list_safe(fmt, tmp) {
+	perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) {
 		list_del_init(&fmt->list);
 		list_del_init(&fmt->sort_list);
+		fmt_free(fmt);
 	}
 }
 
@@ -606,7 +623,7 @@
 	bool first = true;
 	struct perf_hpp dummy_hpp;
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
 			continue;
 
@@ -624,22 +641,39 @@
 	return ret;
 }
 
+unsigned int hists__overhead_width(struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+	int ret = 0;
+	bool first = true;
+	struct perf_hpp dummy_hpp;
+
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			break;
+
+		if (first)
+			first = false;
+		else
+			ret += 2;
+
+		ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
+	}
+
+	return ret;
+}
+
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 {
-	int idx;
-
 	if (perf_hpp__is_sort_entry(fmt))
 		return perf_hpp__reset_sort_width(fmt, hists);
 
-	for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
-		if (fmt == &perf_hpp__format[idx])
-			break;
-	}
-
-	if (idx == PERF_HPP__MAX_INDEX)
+	if (perf_hpp__is_dynamic_entry(fmt))
 		return;
 
-	switch (idx) {
+	BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX);
+
+	switch (fmt->idx) {
 	case PERF_HPP__OVERHEAD:
 	case PERF_HPP__OVERHEAD_SYS:
 	case PERF_HPP__OVERHEAD_US:
@@ -667,7 +701,7 @@
 	struct perf_hpp_fmt *fmt;
 	const char *ptr = width_list_str;
 
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		char *p;
 
 		int len = strtol(ptr, &p, 10);
@@ -679,3 +713,71 @@
 			break;
 	}
 }
+
+static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt)
+{
+	struct perf_hpp_list_node *node = NULL;
+	struct perf_hpp_fmt *fmt_copy;
+	bool found = false;
+	bool skip = perf_hpp__should_skip(fmt, hists);
+
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		if (node->level == fmt->level) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		node = malloc(sizeof(*node));
+		if (node == NULL)
+			return -1;
+
+		node->skip = skip;
+		node->level = fmt->level;
+		perf_hpp_list__init(&node->hpp);
+
+		hists->nr_hpp_node++;
+		list_add_tail(&node->list, &hists->hpp_formats);
+	}
+
+	fmt_copy = perf_hpp_fmt__dup(fmt);
+	if (fmt_copy == NULL)
+		return -1;
+
+	if (!skip)
+		node->skip = false;
+
+	list_add_tail(&fmt_copy->list, &node->hpp.fields);
+	list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts);
+
+	return 0;
+}
+
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+				  struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	struct perf_hpp_fmt *fmt;
+	struct hists *hists;
+	int ret;
+
+	if (!symbol_conf.report_hierarchy)
+		return 0;
+
+	evlist__for_each(evlist, evsel) {
+		hists = evsel__hists(evsel);
+
+		perf_hpp_list__for_each_sort_list(list, fmt) {
+			if (perf_hpp__is_dynamic_entry(fmt) &&
+			    !perf_hpp__defined_dynamic_entry(fmt, hists))
+				continue;
+
+			ret = add_hierarchy_fmt(hists, fmt);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 387110d..7aff5ac 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c

@@ -165,8 +165,28 @@
 	return ret;
 }
 
+/*
+ * If have one single callchain root, don't bother printing
+ * its percentage (100 % in fractal mode and the same percentage
+ * than the hist in graph mode). This also avoid one level of column.
+ *
+ * However when percent-limit applied, it's possible that single callchain
+ * node have different (non-100% in fractal mode) percentage.
+ */
+static bool need_percent_display(struct rb_node *node, u64 parent_samples)
+{
+	struct callchain_node *cnode;
+
+	if (rb_next(node))
+		return true;
+
+	cnode = rb_entry(node, struct callchain_node, rb_node);
+	return callchain_cumul_hits(cnode) != parent_samples;
+}
+
 static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
-				       u64 total_samples, int left_margin)
+				       u64 total_samples, u64 parent_samples,
+				       int left_margin)
 {
 	struct callchain_node *cnode;
 	struct callchain_list *chain;
@@ -177,13 +197,8 @@
 	int ret = 0;
 	char bf[1024];
 
-	/*
-	 * If have one single callchain root, don't bother printing
-	 * its percentage (100 % in fractal mode and the same percentage
-	 * than the hist in graph mode). This also avoid one level of column.
-	 */
 	node = rb_first(root);
-	if (node && !rb_next(node)) {
+	if (node && !need_percent_display(node, parent_samples)) {
 		cnode = rb_entry(node, struct callchain_node, rb_node);
 		list_for_each_entry(chain, &cnode->val, list) {
 			/*
@@ -213,9 +228,15 @@
 		root = &cnode->rb_root;
 	}
 
+	if (callchain_param.mode == CHAIN_GRAPH_REL)
+		total_samples = parent_samples;
+
 	ret += __callchain__fprintf_graph(fp, root, total_samples,
 					  1, 1, left_margin);
-	ret += fprintf(fp, "\n");
+	if (ret) {
+		/* do not add a blank line if it printed nothing */
+		ret += fprintf(fp, "\n");
+	}
 
 	return ret;
 }
@@ -323,16 +344,19 @@
 					    u64 total_samples, int left_margin,
 					    FILE *fp)
 {
+	u64 parent_samples = he->stat.period;
+
+	if (symbol_conf.cumulate_callchain)
+		parent_samples = he->stat_acc->period;
+
 	switch (callchain_param.mode) {
 	case CHAIN_GRAPH_REL:
-		return callchain__fprintf_graph(fp, &he->sorted_chain,
-						symbol_conf.cumulate_callchain ?
-						he->stat_acc->period : he->stat.period,
-						left_margin);
+		return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
+						parent_samples, left_margin);
 		break;
 	case CHAIN_GRAPH_ABS:
 		return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
-						left_margin);
+						parent_samples, left_margin);
 		break;
 	case CHAIN_FLAT:
 		return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
@@ -349,30 +373,6 @@
 	return 0;
 }
 
-static size_t hist_entry__callchain_fprintf(struct hist_entry *he,
-					    struct hists *hists,
-					    FILE *fp)
-{
-	int left_margin = 0;
-	u64 total_period = hists->stats.total_period;
-
-	if (field_order == NULL && (sort_order == NULL ||
-				    !prefixcmp(sort_order, "comm"))) {
-		struct perf_hpp_fmt *fmt;
-
-		perf_hpp__for_each_format(fmt) {
-			if (!perf_hpp__is_sort_entry(fmt))
-				continue;
-
-			/* must be 'comm' sort entry */
-			left_margin = fmt->width(fmt, NULL, hists_to_evsel(hists));
-			left_margin -= thread__comm_len(he->thread);
-			break;
-		}
-	}
-	return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
-}
-
 static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
 {
 	const char *sep = symbol_conf.field_sep;
@@ -384,7 +384,7 @@
 	if (symbol_conf.exclude_other && !he->parent)
 		return 0;
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(he->hists, fmt) {
 		if (perf_hpp__should_skip(fmt, he->hists))
 			continue;
 
@@ -403,12 +403,94 @@
 		else
 			ret = fmt->entry(fmt, hpp, he);
 
+		ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
 		advance_hpp(hpp, ret);
 	}
 
 	return hpp->buf - start;
 }
 
+static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
+					 struct perf_hpp *hpp,
+					 struct hists *hists,
+					 FILE *fp)
+{
+	const char *sep = symbol_conf.field_sep;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+	char *buf = hpp->buf;
+	size_t size = hpp->size;
+	int ret, printed = 0;
+	bool first = true;
+
+	if (symbol_conf.exclude_other && !he->parent)
+		return 0;
+
+	ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, "");
+	advance_hpp(hpp, ret);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		/*
+		 * If there's no field_sep, we still need
+		 * to display initial '  '.
+		 */
+		if (!sep || !first) {
+			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+			advance_hpp(hpp, ret);
+		} else
+			first = false;
+
+		if (perf_hpp__use_color() && fmt->color)
+			ret = fmt->color(fmt, hpp, he);
+		else
+			ret = fmt->entry(fmt, hpp, he);
+
+		ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+		advance_hpp(hpp, ret);
+	}
+
+	if (!sep)
+		ret = scnprintf(hpp->buf, hpp->size, "%*s",
+				(hists->nr_hpp_node - 2) * HIERARCHY_INDENT, "");
+	advance_hpp(hpp, ret);
+
+	printed += fprintf(fp, "%s", buf);
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		hpp->buf  = buf;
+		hpp->size = size;
+
+		/*
+		 * No need to call hist_entry__snprintf_alignment() since this
+		 * fmt is always the last column in the hierarchy mode.
+		 */
+		if (perf_hpp__use_color() && fmt->color)
+			fmt->color(fmt, hpp, he);
+		else
+			fmt->entry(fmt, hpp, he);
+
+		/*
+		 * dynamic entries are right-aligned but we want left-aligned
+		 * in the hierarchy mode
+		 */
+		printed += fprintf(fp, "%s%s", sep ?: "  ", ltrim(buf));
+	}
+	printed += putc('\n', fp);
+
+	if (symbol_conf.use_callchain && he->leaf) {
+		u64 total = hists__total_period(hists);
+
+		printed += hist_entry_callchain__fprintf(he, total, 0, fp);
+		goto out;
+	}
+
+out:
+	return printed;
+}
+
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 			       struct hists *hists,
 			       char *bf, size_t bfsz, FILE *fp)
@@ -418,24 +500,134 @@
 		.buf		= bf,
 		.size		= size,
 	};
+	u64 total_period = hists->stats.total_period;
 
 	if (size == 0 || size > bfsz)
 		size = hpp.size = bfsz;
 
+	if (symbol_conf.report_hierarchy)
+		return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp);
+
 	hist_entry__snprintf(he, &hpp);
 
 	ret = fprintf(fp, "%s\n", bf);
 
 	if (symbol_conf.use_callchain)
-		ret += hist_entry__callchain_fprintf(he, hists, fp);
+		ret += hist_entry_callchain__fprintf(he, total_period, 0, fp);
 
 	return ret;
 }
 
+static int print_hierarchy_indent(const char *sep, int indent,
+				  const char *line, FILE *fp)
+{
+	if (sep != NULL || indent < 2)
+		return 0;
+
+	return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
+}
+
+static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
+				  const char *sep, FILE *fp)
+{
+	bool first_node, first_col;
+	int indent;
+	int depth;
+	unsigned width = 0;
+	unsigned header_width = 0;
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
+
+	indent = hists->nr_hpp_node;
+
+	/* preserve max indent depth for column headers */
+	print_hierarchy_indent(sep, indent, spaces, fp);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		fmt->header(fmt, hpp, hists_to_evsel(hists));
+		fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
+	}
+
+	/* combine sort headers with ' / ' */
+	first_node = true;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		if (!first_node)
+			header_width += fprintf(fp, " / ");
+		first_node = false;
+
+		first_col = true;
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				header_width += fprintf(fp, "+");
+			first_col = false;
+
+			fmt->header(fmt, hpp, hists_to_evsel(hists));
+			rtrim(hpp->buf);
+
+			header_width += fprintf(fp, "%s", ltrim(hpp->buf));
+		}
+	}
+
+	fprintf(fp, "\n# ");
+
+	/* preserve max indent depth for initial dots */
+	print_hierarchy_indent(sep, indent, dots, fp);
+
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+
+	first_col = true;
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+		if (!first_col)
+			fprintf(fp, "%s", sep ?: "..");
+		first_col = false;
+
+		width = fmt->width(fmt, hpp, hists_to_evsel(hists));
+		fprintf(fp, "%.*s", width, dots);
+	}
+
+	depth = 0;
+	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+		first_col = true;
+		width = depth * HIERARCHY_INDENT;
+
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+			if (perf_hpp__should_skip(fmt, hists))
+				continue;
+
+			if (!first_col)
+				width++;  /* for '+' sign between column header */
+			first_col = false;
+
+			width += fmt->width(fmt, hpp, hists_to_evsel(hists));
+		}
+
+		if (width > header_width)
+			header_width = width;
+
+		depth++;
+	}
+
+	fprintf(fp, "%s%-.*s", sep ?: "  ", header_width, dots);
+
+	fprintf(fp, "\n#\n");
+
+	return 2;
+}
+
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, float min_pcnt, FILE *fp)
 {
 	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
 	struct rb_node *nd;
 	size_t ret = 0;
 	unsigned int width;
@@ -449,10 +641,11 @@
 	bool first = true;
 	size_t linesz;
 	char *line = NULL;
+	unsigned indent;
 
 	init_rem_hits();
 
-	perf_hpp__for_each_format(fmt)
+	hists__for_each_format(hists, fmt)
 		perf_hpp__reset_width(fmt, hists);
 
 	if (symbol_conf.col_width_list_str)
@@ -463,7 +656,16 @@
 
 	fprintf(fp, "# ");
 
-	perf_hpp__for_each_format(fmt) {
+	if (symbol_conf.report_hierarchy) {
+		list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
+			perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+				perf_hpp__reset_width(fmt, hists);
+		}
+		nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp);
+		goto print_entries;
+	}
+
+	hists__for_each_format(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, hists))
 			continue;
 
@@ -487,7 +689,7 @@
 
 	fprintf(fp, "# ");
 
-	perf_hpp__for_each_format(fmt) {
+	hists__for_each_format(hists, fmt) {
 		unsigned int i;
 
 		if (perf_hpp__should_skip(fmt, hists))
@@ -520,7 +722,9 @@
 		goto out;
 	}
 
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+	indent = hists__overhead_width(hists) + 4;
+
+	for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		float percent;
 
@@ -536,6 +740,20 @@
 		if (max_rows && ++nr_rows >= max_rows)
 			break;
 
+		/*
+		 * If all children are filtered out or percent-limited,
+		 * display "no entry >= x.xx%" message.
+		 */
+		if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) {
+			int depth = hists->nr_hpp_node + h->depth + 1;
+
+			print_hierarchy_indent(sep, depth, spaces, fp);
+			fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt);
+
+			if (max_rows && ++nr_rows >= max_rows)
+				break;
+		}
+
 		if (h->ms.map == NULL && verbose > 1) {
 			__map_groups__fprintf_maps(h->thread->mg,
 						   MAP__FUNCTION, fp);

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 5eec53a..eea25e2 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build

@@ -82,6 +82,7 @@
 libperf-y += parse-regs-options.o
 libperf-y += term.o
 libperf-y += help-unknown-cmd.o
+libperf-y += mem-events.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -105,8 +106,17 @@
 
 libperf-$(CONFIG_ZLIB) += zlib.o
 libperf-$(CONFIG_LZMA) += lzma.o
+libperf-y += demangle-java.o
+
+ifdef CONFIG_JITDUMP
+libperf-$(CONFIG_LIBELF) += jitdump.o
+libperf-$(CONFIG_LIBELF) += genelf.o
+libperf-$(CONFIG_LIBELF) += genelf_debug.o
+endif
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+# avoid compiler warnings in 32-bit mode
+CFLAGS_genelf_debug.o  += -Wno-packed
 
 $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
 	$(call rule_mkdir)

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 360fda0..ec164fe 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c

@@ -478,10 +478,11 @@
 			 heap_array[last].ordinal);
 }
 
-size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+				       struct perf_evlist *evlist)
 {
 	if (itr)
-		return itr->info_priv_size(itr);
+		return itr->info_priv_size(itr, evlist);
 	return 0;
 }
 
@@ -852,7 +853,7 @@
 	int err;
 
 	pr_debug2("Synthesizing auxtrace information\n");
-	priv_size = auxtrace_record__info_priv_size(itr);
+	priv_size = auxtrace_record__info_priv_size(itr, session->evlist);
 	ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
 	if (!ev)
 		return -ENOMEM;

diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index b86f90db..e5a8e2d 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h

@@ -293,7 +293,8 @@
 	int (*recording_options)(struct auxtrace_record *itr,
 				 struct perf_evlist *evlist,
 				 struct record_opts *opts);
-	size_t (*info_priv_size)(struct auxtrace_record *itr);
+	size_t (*info_priv_size)(struct auxtrace_record *itr,
+				 struct perf_evlist *evlist);
 	int (*info_fill)(struct auxtrace_record *itr,
 			 struct perf_session *session,
 			 struct auxtrace_info_event *auxtrace_info,
@@ -429,7 +430,8 @@
 int auxtrace_record__options(struct auxtrace_record *itr,
 			     struct perf_evlist *evlist,
 			     struct record_opts *opts);
-size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+				       struct perf_evlist *evlist);
 int auxtrace_record__info_fill(struct auxtrace_record *itr,
 			       struct perf_session *session,
 			       struct auxtrace_info_event *auxtrace_info,

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 540a7ef..0967ce6 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c

@@ -7,6 +7,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/libbpf.h>
+#include <bpf/bpf.h>
 #include <linux/err.h>
 #include <linux/string.h>
 #include "perf.h"
@@ -16,6 +17,7 @@
 #include "llvm-utils.h"
 #include "probe-event.h"
 #include "probe-finder.h" // for MAX_PROBES
+#include "parse-events.h"
 #include "llvm-utils.h"
 
 #define DEFINE_PRINT_FN(name, level) \
@@ -108,8 +110,8 @@
 }
 
 static void
-bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
-		     void *_priv)
+clear_prog_priv(struct bpf_program *prog __maybe_unused,
+		void *_priv)
 {
 	struct bpf_prog_priv *priv = _priv;
 
@@ -337,7 +339,7 @@
 	}
 	pr_debug("bpf: config '%s' is ok\n", config_str);
 
-	err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear);
+	err = bpf_program__set_private(prog, priv, clear_prog_priv);
 	if (err) {
 		pr_debug("Failed to set priv for program '%s'\n", config_str);
 		goto errout;
@@ -739,6 +741,682 @@
 	return 0;
 }
 
+enum bpf_map_op_type {
+	BPF_MAP_OP_SET_VALUE,
+	BPF_MAP_OP_SET_EVSEL,
+};
+
+enum bpf_map_key_type {
+	BPF_MAP_KEY_ALL,
+	BPF_MAP_KEY_RANGES,
+};
+
+struct bpf_map_op {
+	struct list_head list;
+	enum bpf_map_op_type op_type;
+	enum bpf_map_key_type key_type;
+	union {
+		struct parse_events_array array;
+	} k;
+	union {
+		u64 value;
+		struct perf_evsel *evsel;
+	} v;
+};
+
+struct bpf_map_priv {
+	struct list_head ops_list;
+};
+
+static void
+bpf_map_op__delete(struct bpf_map_op *op)
+{
+	if (!list_empty(&op->list))
+		list_del(&op->list);
+	if (op->key_type == BPF_MAP_KEY_RANGES)
+		parse_events__clear_array(&op->k.array);
+	free(op);
+}
+
+static void
+bpf_map_priv__purge(struct bpf_map_priv *priv)
+{
+	struct bpf_map_op *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
+		list_del_init(&pos->list);
+		bpf_map_op__delete(pos);
+	}
+}
+
+static void
+bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+		    void *_priv)
+{
+	struct bpf_map_priv *priv = _priv;
+
+	bpf_map_priv__purge(priv);
+	free(priv);
+}
+
+static int
+bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
+{
+	op->key_type = BPF_MAP_KEY_ALL;
+	if (!term)
+		return 0;
+
+	if (term->array.nr_ranges) {
+		size_t memsz = term->array.nr_ranges *
+				sizeof(op->k.array.ranges[0]);
+
+		op->k.array.ranges = memdup(term->array.ranges, memsz);
+		if (!op->k.array.ranges) {
+			pr_debug("No enough memory to alloc indices for map\n");
+			return -ENOMEM;
+		}
+		op->key_type = BPF_MAP_KEY_RANGES;
+		op->k.array.nr_ranges = term->array.nr_ranges;
+	}
+	return 0;
+}
+
+static struct bpf_map_op *
+bpf_map_op__new(struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	int err;
+
+	op = zalloc(sizeof(*op));
+	if (!op) {
+		pr_debug("Failed to alloc bpf_map_op\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	INIT_LIST_HEAD(&op->list);
+
+	err = bpf_map_op_setkey(op, term);
+	if (err) {
+		free(op);
+		return ERR_PTR(err);
+	}
+	return op;
+}
+
+static int
+bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
+{
+	struct bpf_map_priv *priv;
+	const char *map_name;
+	int err;
+
+	map_name = bpf_map__get_name(map);
+	err = bpf_map__get_private(map, (void **)&priv);
+	if (err) {
+		pr_debug("Failed to get private from map %s\n", map_name);
+		return err;
+	}
+
+	if (!priv) {
+		priv = zalloc(sizeof(*priv));
+		if (!priv) {
+			pr_debug("No enough memory to alloc map private\n");
+			return -ENOMEM;
+		}
+		INIT_LIST_HEAD(&priv->ops_list);
+
+		if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+			free(priv);
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		}
+	}
+
+	list_add_tail(&op->list, &priv->ops_list);
+	return 0;
+}
+
+static struct bpf_map_op *
+bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
+{
+	struct bpf_map_op *op;
+	int err;
+
+	op = bpf_map_op__new(term);
+	if (IS_ERR(op))
+		return op;
+
+	err = bpf_map__add_op(map, op);
+	if (err) {
+		bpf_map_op__delete(op);
+		return ERR_PTR(err);
+	}
+	return op;
+}
+
+static int
+__bpf_map__config_value(struct bpf_map *map,
+			struct parse_events_term *term)
+{
+	struct bpf_map_def def;
+	struct bpf_map_op *op;
+	const char *map_name;
+	int err;
+
+	map_name = bpf_map__get_name(map);
+
+	err = bpf_map__get_def(map, &def);
+	if (err) {
+		pr_debug("Unable to get map definition from '%s'\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (def.type != BPF_MAP_TYPE_ARRAY) {
+		pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+	}
+	if (def.key_size < sizeof(unsigned int)) {
+		pr_debug("Map %s has incorrect key size\n", map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
+	}
+	switch (def.value_size) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		break;
+	default:
+		pr_debug("Map %s has incorrect value size\n", map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+	}
+
+	op = bpf_map__add_newop(map, term);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+	op->op_type = BPF_MAP_OP_SET_VALUE;
+	op->v.value = term->val.num;
+	return 0;
+}
+
+static int
+bpf_map__config_value(struct bpf_map *map,
+		      struct parse_events_term *term,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	if (!term->err_val) {
+		pr_debug("Config value not set\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+	}
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
+		pr_debug("ERROR: wrong value type for 'value'\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+	}
+
+	return __bpf_map__config_value(map, term);
+}
+
+static int
+__bpf_map__config_event(struct bpf_map *map,
+			struct parse_events_term *term,
+			struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	struct bpf_map_def def;
+	struct bpf_map_op *op;
+	const char *map_name;
+	int err;
+
+	map_name = bpf_map__get_name(map);
+	evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
+	if (!evsel) {
+		pr_debug("Event (for '%s') '%s' doesn't exist\n",
+			 map_name, term->val.str);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+	}
+
+	err = bpf_map__get_def(map, &def);
+	if (err) {
+		pr_debug("Unable to get map definition from '%s'\n",
+			 map_name);
+		return err;
+	}
+
+	/*
+	 * No need to check key_size and value_size:
+	 * kernel has already checked them.
+	 */
+	if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+		pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+	}
+
+	op = bpf_map__add_newop(map, term);
+	if (IS_ERR(op))
+		return PTR_ERR(op);
+	op->op_type = BPF_MAP_OP_SET_EVSEL;
+	op->v.evsel = evsel;
+	return 0;
+}
+
+static int
+bpf_map__config_event(struct bpf_map *map,
+		      struct parse_events_term *term,
+		      struct perf_evlist *evlist)
+{
+	if (!term->err_val) {
+		pr_debug("Config value not set\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+	}
+
+	if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
+		pr_debug("ERROR: wrong value type for 'event'\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+	}
+
+	return __bpf_map__config_event(map, term, evlist);
+}
+
+struct bpf_obj_config__map_func {
+	const char *config_opt;
+	int (*config_func)(struct bpf_map *, struct parse_events_term *,
+			   struct perf_evlist *);
+};
+
+struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
+	{"value", bpf_map__config_value},
+	{"event", bpf_map__config_event},
+};
+
+static int
+config_map_indices_range_check(struct parse_events_term *term,
+			       struct bpf_map *map,
+			       const char *map_name)
+{
+	struct parse_events_array *array = &term->array;
+	struct bpf_map_def def;
+	unsigned int i;
+	int err;
+
+	if (!array->nr_ranges)
+		return 0;
+	if (!array->ranges) {
+		pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
+			 map_name, (int)array->nr_ranges);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	err = bpf_map__get_def(map, &def);
+	if (err) {
+		pr_debug("ERROR: Unable to get map definition from '%s'\n",
+			 map_name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	for (i = 0; i < array->nr_ranges; i++) {
+		unsigned int start = array->ranges[i].start;
+		size_t length = array->ranges[i].length;
+		unsigned int idx = start + length - 1;
+
+		if (idx >= def.max_entries) {
+			pr_debug("ERROR: index %d too large\n", idx);
+			return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
+		}
+	}
+	return 0;
+}
+
+static int
+bpf__obj_config_map(struct bpf_object *obj,
+		    struct parse_events_term *term,
+		    struct perf_evlist *evlist,
+		    int *key_scan_pos)
+{
+	/* key is "map:<mapname>.<config opt>" */
+	char *map_name = strdup(term->config + sizeof("map:") - 1);
+	struct bpf_map *map;
+	int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+	char *map_opt;
+	size_t i;
+
+	if (!map_name)
+		return -ENOMEM;
+
+	map_opt = strchr(map_name, '.');
+	if (!map_opt) {
+		pr_debug("ERROR: Invalid map config: %s\n", map_name);
+		goto out;
+	}
+
+	*map_opt++ = '\0';
+	if (*map_opt == '\0') {
+		pr_debug("ERROR: Invalid map option: %s\n", term->config);
+		goto out;
+	}
+
+	map = bpf_object__get_map_by_name(obj, map_name);
+	if (!map) {
+		pr_debug("ERROR: Map %s doesn't exist\n", map_name);
+		err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
+		goto out;
+	}
+
+	*key_scan_pos += strlen(map_opt);
+	err = config_map_indices_range_check(term, map, map_name);
+	if (err)
+		goto out;
+	*key_scan_pos -= strlen(map_opt);
+
+	for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
+		struct bpf_obj_config__map_func *func =
+				&bpf_obj_config__map_funcs[i];
+
+		if (strcmp(map_opt, func->config_opt) == 0) {
+			err = func->config_func(map, term, evlist);
+			goto out;
+		}
+	}
+
+	pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
+	err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+out:
+	free(map_name);
+	if (!err)
+		key_scan_pos += strlen(map_opt);
+	return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+		    struct parse_events_term *term,
+		    struct perf_evlist *evlist,
+		    int *error_pos)
+{
+	int key_scan_pos = 0;
+	int err;
+
+	if (!obj || !term || !term->config)
+		return -EINVAL;
+
+	if (!prefixcmp(term->config, "map:")) {
+		key_scan_pos = sizeof("map:") - 1;
+		err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
+		goto out;
+	}
+	err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+out:
+	if (error_pos)
+		*error_pos = key_scan_pos;
+	return err;
+
+}
+
+typedef int (*map_config_func_t)(const char *name, int map_fd,
+				 struct bpf_map_def *pdef,
+				 struct bpf_map_op *op,
+				 void *pkey, void *arg);
+
+static int
+foreach_key_array_all(map_config_func_t func,
+		      void *arg, const char *name,
+		      int map_fd, struct bpf_map_def *pdef,
+		      struct bpf_map_op *op)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < pdef->max_entries; i++) {
+		err = func(name, map_fd, pdef, op, &i, arg);
+		if (err) {
+			pr_debug("ERROR: failed to insert value to %s[%u]\n",
+				 name, i);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int
+foreach_key_array_ranges(map_config_func_t func, void *arg,
+			 const char *name, int map_fd,
+			 struct bpf_map_def *pdef,
+			 struct bpf_map_op *op)
+{
+	unsigned int i, j;
+	int err;
+
+	for (i = 0; i < op->k.array.nr_ranges; i++) {
+		unsigned int start = op->k.array.ranges[i].start;
+		size_t length = op->k.array.ranges[i].length;
+
+		for (j = 0; j < length; j++) {
+			unsigned int idx = start + j;
+
+			err = func(name, map_fd, pdef, op, &idx, arg);
+			if (err) {
+				pr_debug("ERROR: failed to insert value to %s[%u]\n",
+					 name, idx);
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
+static int
+bpf_map_config_foreach_key(struct bpf_map *map,
+			   map_config_func_t func,
+			   void *arg)
+{
+	int err, map_fd;
+	const char *name;
+	struct bpf_map_op *op;
+	struct bpf_map_def def;
+	struct bpf_map_priv *priv;
+
+	name = bpf_map__get_name(map);
+
+	err = bpf_map__get_private(map, (void **)&priv);
+	if (err) {
+		pr_debug("ERROR: failed to get private from map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	if (!priv || list_empty(&priv->ops_list)) {
+		pr_debug("INFO: nothing to config for map %s\n", name);
+		return 0;
+	}
+
+	err = bpf_map__get_def(map, &def);
+	if (err) {
+		pr_debug("ERROR: failed to get definition from map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	map_fd = bpf_map__get_fd(map);
+	if (map_fd < 0) {
+		pr_debug("ERROR: failed to get fd from map %s\n", name);
+		return map_fd;
+	}
+
+	list_for_each_entry(op, &priv->ops_list, list) {
+		switch (def.type) {
+		case BPF_MAP_TYPE_ARRAY:
+		case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+			switch (op->key_type) {
+			case BPF_MAP_KEY_ALL:
+				err = foreach_key_array_all(func, arg, name,
+							    map_fd, &def, op);
+				break;
+			case BPF_MAP_KEY_RANGES:
+				err = foreach_key_array_ranges(func, arg, name,
+							       map_fd, &def,
+							       op);
+				break;
+			default:
+				pr_debug("ERROR: keytype for map '%s' invalid\n",
+					 name);
+				return -BPF_LOADER_ERRNO__INTERNAL;
+			}
+			if (err)
+				return err;
+			break;
+		default:
+			pr_debug("ERROR: type of '%s' incorrect\n", name);
+			return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+		}
+	}
+
+	return 0;
+}
+
+static int
+apply_config_value_for_key(int map_fd, void *pkey,
+			   size_t val_size, u64 val)
+{
+	int err = 0;
+
+	switch (val_size) {
+	case 1: {
+		u8 _val = (u8)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 2: {
+		u16 _val = (u16)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 4: {
+		u32 _val = (u32)(val);
+		err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+		break;
+	}
+	case 8: {
+		err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
+		break;
+	}
+	default:
+		pr_debug("ERROR: invalid value size\n");
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+	}
+	if (err && errno)
+		err = -errno;
+	return err;
+}
+
+static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+			   struct perf_evsel *evsel)
+{
+	struct xyarray *xy = evsel->fd;
+	struct perf_event_attr *attr;
+	unsigned int key, events;
+	bool check_pass = false;
+	int *evt_fd;
+	int err;
+
+	if (!xy) {
+		pr_debug("ERROR: evsel not ready for map %s\n", name);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (xy->row_size / xy->entry_size != 1) {
+		pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+			 name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+	}
+
+	attr = &evsel->attr;
+	if (attr->inherit) {
+		pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+	}
+
+	if (perf_evsel__is_bpf_output(evsel))
+		check_pass = true;
+	if (attr->type == PERF_TYPE_RAW)
+		check_pass = true;
+	if (attr->type == PERF_TYPE_HARDWARE)
+		check_pass = true;
+	if (!check_pass) {
+		pr_debug("ERROR: Event type is wrong for map %s\n", name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+	}
+
+	events = xy->entries / (xy->row_size / xy->entry_size);
+	key = *((unsigned int *)pkey);
+	if (key >= events) {
+		pr_debug("ERROR: there is no event %d for map %s\n",
+			 key, name);
+		return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+	}
+	evt_fd = xyarray__entry(xy, key, 0);
+	err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+	if (err && errno)
+		err = -errno;
+	return err;
+}
+
+static int
+apply_obj_config_map_for_key(const char *name, int map_fd,
+			     struct bpf_map_def *pdef __maybe_unused,
+			     struct bpf_map_op *op,
+			     void *pkey, void *arg __maybe_unused)
+{
+	int err;
+
+	switch (op->op_type) {
+	case BPF_MAP_OP_SET_VALUE:
+		err = apply_config_value_for_key(map_fd, pkey,
+						 pdef->value_size,
+						 op->v.value);
+		break;
+	case BPF_MAP_OP_SET_EVSEL:
+		err = apply_config_evsel_for_key(name, map_fd, pkey,
+						 op->v.evsel);
+		break;
+	default:
+		pr_debug("ERROR: unknown value type for '%s'\n", name);
+		err = -BPF_LOADER_ERRNO__INTERNAL;
+	}
+	return err;
+}
+
+static int
+apply_obj_config_map(struct bpf_map *map)
+{
+	return bpf_map_config_foreach_key(map,
+					  apply_obj_config_map_for_key,
+					  NULL);
+}
+
+static int
+apply_obj_config_object(struct bpf_object *obj)
+{
+	struct bpf_map *map;
+	int err;
+
+	bpf_map__for_each(map, obj) {
+		err = apply_obj_config_map(map);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int bpf__apply_obj_config(void)
+{
+	struct bpf_object *obj, *tmp;
+	int err;
+
+	bpf_object__for_each_safe(obj, tmp) {
+		err = apply_obj_config_object(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 #define ERRNO_OFFSET(e)		((e) - __BPF_LOADER_ERRNO__START)
 #define ERRCODE_OFFSET(c)	ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
 #define NR_ERRNO	(__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -753,6 +1431,20 @@
 	[ERRCODE_OFFSET(PROLOGUE)]	= "Failed to generate prologue",
 	[ERRCODE_OFFSET(PROLOGUE2BIG)]	= "Prologue too big for program",
 	[ERRCODE_OFFSET(PROLOGUEOOB)]	= "Offset out of bound for prologue",
+	[ERRCODE_OFFSET(OBJCONF_OPT)]	= "Invalid object config option",
+	[ERRCODE_OFFSET(OBJCONF_CONF)]	= "Config value not set (missing '=')",
+	[ERRCODE_OFFSET(OBJCONF_MAP_OPT)]	= "Invalid object map config option",
+	[ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)]	= "Target map doesn't exist",
+	[ERRCODE_OFFSET(OBJCONF_MAP_VALUE)]	= "Incorrect value type for map",
+	[ERRCODE_OFFSET(OBJCONF_MAP_TYPE)]	= "Incorrect map type",
+	[ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)]	= "Incorrect map key size",
+	[ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)]	= "Incorrect map value size",
+	[ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)]	= "Event not found for map setting",
+	[ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)]	= "Invalid map size for event setting",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)]	= "Event dimension too large",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)]	= "Doesn't support inherit event",
+	[ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)]	= "Wrong event type for map",
+	[ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)]	= "Index too large",
 };
 
 static int
@@ -872,3 +1564,29 @@
 	bpf__strerror_end(buf, size);
 	return 0;
 }
+
+int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+			     struct parse_events_term *term __maybe_unused,
+			     struct perf_evlist *evlist __maybe_unused,
+			     int *error_pos __maybe_unused, int err,
+			     char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
+			    "Can't use this config term with this map type");
+	bpf__strerror_end(buf, size);
+	return 0;
+}
+
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+			    "Cannot set event to BPF map in multi-thread tracing");
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+			    "%s (Hint: use -i to turn off inherit)", emsg);
+	bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+			    "Can only put raw, hardware and BPF output event into a BPF map");
+	bpf__strerror_end(buf, size);
+	return 0;
+}

diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 6fdc045..be43119 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h

@@ -10,6 +10,7 @@
 #include <string.h>
 #include <bpf/libbpf.h>
 #include "probe-event.h"
+#include "evlist.h"
 #include "debug.h"
 
 enum bpf_loader_errno {
@@ -24,10 +25,25 @@
 	BPF_LOADER_ERRNO__PROLOGUE,	/* Failed to generate prologue */
 	BPF_LOADER_ERRNO__PROLOGUE2BIG,	/* Prologue too big for program */
 	BPF_LOADER_ERRNO__PROLOGUEOOB,	/* Offset out of bound for prologue */
+	BPF_LOADER_ERRNO__OBJCONF_OPT,	/* Invalid object config option */
+	BPF_LOADER_ERRNO__OBJCONF_CONF,	/* Config value not set (lost '=')) */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_OPT,	/* Invalid object map config option */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST,	/* Target map not exist */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE,	/* Incorrect value type for map */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,	/* Incorrect map type */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE,	/* Incorrect map key size */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT,	/* Event not found for map setting */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE,	/* Invalid map size for event setting */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,	/* Event dimension too large */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,	/* Doesn't support inherit event */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,	/* Wrong event type for map */
+	BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,	/* Index too large */
 	__BPF_LOADER_ERRNO__END,
 };
 
 struct bpf_object;
+struct parse_events_term;
 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
 
 typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
@@ -53,6 +69,16 @@
 		       char *buf, size_t size);
 int bpf__foreach_tev(struct bpf_object *obj,
 		     bpf_prog_iter_callback_t func, void *arg);
+
+int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
+		    struct perf_evlist *evlist, int *error_pos);
+int bpf__strerror_config_obj(struct bpf_object *obj,
+			     struct parse_events_term *term,
+			     struct perf_evlist *evlist,
+			     int *error_pos, int err, char *buf,
+			     size_t size);
+int bpf__apply_obj_config(void);
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused,
@@ -84,6 +110,21 @@
 }
 
 static inline int
+bpf__config_obj(struct bpf_object *obj __maybe_unused,
+		struct parse_events_term *term __maybe_unused,
+		struct perf_evlist *evlist __maybe_unused,
+		int *error_pos __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
+bpf__apply_obj_config(void)
+{
+	return 0;
+}
+
+static inline int
 __bpf_strerror(char *buf, size_t size)
 {
 	if (!size)
@@ -118,5 +159,23 @@
 {
 	return __bpf_strerror(buf, size);
 }
+
+static inline int
+bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+			 struct parse_events_term *term __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused,
+			 int *error_pos __maybe_unused,
+			 int err __maybe_unused,
+			 char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_apply_obj_config(int err __maybe_unused,
+			       char *buf, size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
 #endif
 #endif

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 6a7e273..f1479ee 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c

@@ -166,6 +166,50 @@
 	return build_id__filename(build_id_hex, bf, size);
 }
 
+bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
+{
+	char *id_name, *ch;
+	struct stat sb;
+
+	id_name = dso__build_id_filename(dso, bf, size);
+	if (!id_name)
+		goto err;
+	if (access(id_name, F_OK))
+		goto err;
+	if (lstat(id_name, &sb) == -1)
+		goto err;
+	if ((size_t)sb.st_size > size - 1)
+		goto err;
+	if (readlink(id_name, bf, size - 1) < 0)
+		goto err;
+
+	bf[sb.st_size] = '\0';
+
+	/*
+	 * link should be:
+	 * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92
+	 */
+	ch = strrchr(bf, '/');
+	if (!ch)
+		goto err;
+	if (ch - 3 < bf)
+		goto err;
+
+	return strncmp(".ko", ch - 3, 3) == 0;
+err:
+	/*
+	 * If dso__build_id_filename work, get id_name again,
+	 * because id_name points to bf and is broken.
+	 */
+	if (id_name)
+		id_name = dso__build_id_filename(dso, bf, size);
+	pr_err("Invalid build id: %s\n", id_name ? :
+					 dso->long_name ? :
+					 dso->short_name ? :
+					 "[unknown]");
+	return false;
+}
+
 #define dsos__for_each_with_build_id(pos, head)	\
 	list_for_each_entry(pos, head, node)	\
 		if (!pos->has_build_id)		\
@@ -211,6 +255,7 @@
 	dsos__for_each_with_build_id(pos, &machine->dsos.head) {
 		const char *name;
 		size_t name_len;
+		bool in_kernel = false;
 
 		if (!pos->hit)
 			continue;
@@ -227,8 +272,11 @@
 			name_len = pos->long_name_len + 1;
 		}
 
+		in_kernel = pos->kernel ||
+				is_kernel_module(name,
+					PERF_RECORD_MISC_CPUMODE_UNKNOWN);
 		err = write_buildid(name, name_len, pos->build_id, machine->pid,
-				    pos->kernel ? kmisc : umisc, fd);
+				    in_kernel ? kmisc : umisc, fd);
 		if (err)
 			break;
 	}

diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 27a14a8..64af3e2 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h

@@ -16,6 +16,7 @@
 int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
+bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
 			   struct perf_sample *sample, struct perf_evsel *evsel,

diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 07b5d63..3ca453f 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h

@@ -23,6 +23,8 @@
 #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
 #define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
 
+extern const char *config_exclusive_filename;
+
 typedef int (*config_fn_t)(const char *, const char *, void *);
 extern int perf_default_config(const char *, const char *, void *);
 extern int perf_config(config_fn_t fn, void *);
@@ -31,6 +33,7 @@
 extern int perf_config_bool(const char *, const char *);
 extern int config_error_nonbool(const char *);
 extern const char *perf_config_dirname(const char *, const char *);
+extern const char *perf_etc_perfconfig(void);
 
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 53c43eb..24b4bd0 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c

@@ -416,7 +416,7 @@
 /*
  * Fill the node with callchain values
  */
-static void
+static int
 fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 {
 	struct callchain_cursor_node *cursor_node;
@@ -433,7 +433,7 @@
 		call = zalloc(sizeof(*call));
 		if (!call) {
 			perror("not enough memory for the code path tree");
-			return;
+			return -1;
 		}
 		call->ip = cursor_node->ip;
 		call->ms.sym = cursor_node->sym;
@@ -443,6 +443,7 @@
 		callchain_cursor_advance(cursor);
 		cursor_node = callchain_cursor_current(cursor);
 	}
+	return 0;
 }
 
 static struct callchain_node *
@@ -453,7 +454,19 @@
 	struct callchain_node *new;
 
 	new = create_child(parent, false);
-	fill_node(new, cursor);
+	if (new == NULL)
+		return NULL;
+
+	if (fill_node(new, cursor) < 0) {
+		struct callchain_list *call, *tmp;
+
+		list_for_each_entry_safe(call, tmp, &new->val, list) {
+			list_del(&call->list);
+			free(call);
+		}
+		free(new);
+		return NULL;
+	}
 
 	new->children_hit = 0;
 	new->hit = period;
@@ -462,16 +475,32 @@
 	return new;
 }
 
-static s64 match_chain(struct callchain_cursor_node *node,
-		      struct callchain_list *cnode)
+enum match_result {
+	MATCH_ERROR  = -1,
+	MATCH_EQ,
+	MATCH_LT,
+	MATCH_GT,
+};
+
+static enum match_result match_chain(struct callchain_cursor_node *node,
+				     struct callchain_list *cnode)
 {
 	struct symbol *sym = node->sym;
+	u64 left, right;
 
 	if (cnode->ms.sym && sym &&
-	    callchain_param.key == CCKEY_FUNCTION)
-		return cnode->ms.sym->start - sym->start;
-	else
-		return cnode->ip - node->ip;
+	    callchain_param.key == CCKEY_FUNCTION) {
+		left = cnode->ms.sym->start;
+		right = sym->start;
+	} else {
+		left = cnode->ip;
+		right = node->ip;
+	}
+
+	if (left == right)
+		return MATCH_EQ;
+
+	return left > right ? MATCH_GT : MATCH_LT;
 }
 
 /*
@@ -479,7 +508,7 @@
  * give a part of its callchain to the created child.
  * Then create another child to host the given callchain of new branch
  */
-static void
+static int
 split_add_child(struct callchain_node *parent,
 		struct callchain_cursor *cursor,
 		struct callchain_list *to_split,
@@ -491,6 +520,8 @@
 
 	/* split */
 	new = create_child(parent, true);
+	if (new == NULL)
+		return -1;
 
 	/* split the callchain and move a part to the new child */
 	old_tail = parent->val.prev;
@@ -524,6 +555,8 @@
 
 		node = callchain_cursor_current(cursor);
 		new = add_child(parent, cursor, period);
+		if (new == NULL)
+			return -1;
 
 		/*
 		 * This is second child since we moved parent's children
@@ -534,7 +567,7 @@
 		cnode = list_first_entry(&first->val, struct callchain_list,
 					 list);
 
-		if (match_chain(node, cnode) < 0)
+		if (match_chain(node, cnode) == MATCH_LT)
 			pp = &p->rb_left;
 		else
 			pp = &p->rb_right;
@@ -545,14 +578,15 @@
 		parent->hit = period;
 		parent->count = 1;
 	}
+	return 0;
 }
 
-static int
+static enum match_result
 append_chain(struct callchain_node *root,
 	     struct callchain_cursor *cursor,
 	     u64 period);
 
-static void
+static int
 append_chain_children(struct callchain_node *root,
 		      struct callchain_cursor *cursor,
 		      u64 period)
@@ -564,36 +598,42 @@
 
 	node = callchain_cursor_current(cursor);
 	if (!node)
-		return;
+		return -1;
 
 	/* lookup in childrens */
 	while (*p) {
-		s64 ret;
+		enum match_result ret;
 
 		parent = *p;
 		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
 
 		/* If at least first entry matches, rely to children */
 		ret = append_chain(rnode, cursor, period);
-		if (ret == 0)
+		if (ret == MATCH_EQ)
 			goto inc_children_hit;
+		if (ret == MATCH_ERROR)
+			return -1;
 
-		if (ret < 0)
+		if (ret == MATCH_LT)
 			p = &parent->rb_left;
 		else
 			p = &parent->rb_right;
 	}
 	/* nothing in children, add to the current node */
 	rnode = add_child(root, cursor, period);
+	if (rnode == NULL)
+		return -1;
+
 	rb_link_node(&rnode->rb_node_in, parent, p);
 	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
 	root->children_hit += period;
 	root->children_count++;
+	return 0;
 }
 
-static int
+static enum match_result
 append_chain(struct callchain_node *root,
 	     struct callchain_cursor *cursor,
 	     u64 period)
@@ -602,7 +642,7 @@
 	u64 start = cursor->pos;
 	bool found = false;
 	u64 matches;
-	int cmp = 0;
+	enum match_result cmp = MATCH_ERROR;
 
 	/*
 	 * Lookup in the current node
@@ -618,7 +658,7 @@
 			break;
 
 		cmp = match_chain(node, cnode);
-		if (cmp)
+		if (cmp != MATCH_EQ)
 			break;
 
 		found = true;
@@ -628,7 +668,7 @@
 
 	/* matches not, relay no the parent */
 	if (!found) {
-		WARN_ONCE(!cmp, "Chain comparison error\n");
+		WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
 		return cmp;
 	}
 
@@ -636,21 +676,25 @@
 
 	/* we match only a part of the node. Split it and add the new chain */
 	if (matches < root->val_nr) {
-		split_add_child(root, cursor, cnode, start, matches, period);
-		return 0;
+		if (split_add_child(root, cursor, cnode, start, matches,
+				    period) < 0)
+			return MATCH_ERROR;
+
+		return MATCH_EQ;
 	}
 
 	/* we match 100% of the path, increment the hit */
 	if (matches == root->val_nr && cursor->pos == cursor->nr) {
 		root->hit += period;
 		root->count++;
-		return 0;
+		return MATCH_EQ;
 	}
 
 	/* We match the node and still have a part remaining */
-	append_chain_children(root, cursor, period);
+	if (append_chain_children(root, cursor, period) < 0)
+		return MATCH_ERROR;
 
-	return 0;
+	return MATCH_EQ;
 }
 
 int callchain_append(struct callchain_root *root,
@@ -662,7 +706,8 @@
 
 	callchain_cursor_commit(cursor);
 
-	append_chain_children(&root->node, cursor, period);
+	if (append_chain_children(&root->node, cursor, period) < 0)
+		return -1;
 
 	if (cursor->nr > root->max_depth)
 		root->max_depth = cursor->nr;
@@ -690,7 +735,8 @@
 
 	if (src->hit) {
 		callchain_cursor_commit(cursor);
-		append_chain_children(dst, cursor, src->hit);
+		if (append_chain_children(dst, cursor, src->hit) < 0)
+			return -1;
 	}
 
 	n = rb_first(&src->rb_root_in);

diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index e5fb88b..43e84aa 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c

@@ -32,14 +32,15 @@
 	return 0;
 }
 
-int perf_color_default_config(const char *var, const char *value, void *cb)
+int perf_color_default_config(const char *var, const char *value,
+			      void *cb __maybe_unused)
 {
 	if (!strcmp(var, "color.ui")) {
 		perf_use_color_default = perf_config_colorbool(var, value, -1);
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static int __color_vsnprintf(char *bf, size_t size, const char *color,

diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index d3e12e3..4e72763 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c

@@ -26,7 +26,7 @@
 static int config_linenr;
 static int config_file_eof;
 
-static const char *config_exclusive_filename;
+const char *config_exclusive_filename;
 
 static int get_next_char(void)
 {
@@ -434,7 +434,7 @@
 	return ret;
 }
 
-static const char *perf_etc_perfconfig(void)
+const char *perf_etc_perfconfig(void)
 {
 	static const char *system_wide;
 	if (!system_wide)

diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index fa93509..9bcf2be 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c

@@ -8,6 +8,10 @@
 #include <linux/bitmap.h>
 #include "asm/bug.h"
 
+static int max_cpu_num;
+static int max_node_num;
+static int *cpunode_map;
+
 static struct cpu_map *cpu_map__default_new(void)
 {
 	struct cpu_map *cpus;
@@ -486,6 +490,32 @@
 		pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
 }
 
+int cpu__max_node(void)
+{
+	if (unlikely(!max_node_num))
+		set_max_node_num();
+
+	return max_node_num;
+}
+
+int cpu__max_cpu(void)
+{
+	if (unlikely(!max_cpu_num))
+		set_max_cpu_num();
+
+	return max_cpu_num;
+}
+
+int cpu__get_node(int cpu)
+{
+	if (unlikely(cpunode_map == NULL)) {
+		pr_debug("cpu_map not initialized\n");
+		return -1;
+	}
+
+	return cpunode_map[cpu];
+}
+
 static int init_cpunode_map(void)
 {
 	int i;

diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 71c41b9..81a2562 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h

@@ -57,37 +57,11 @@
 	return map ? map->map[0] == -1 : true;
 }
 
-int max_cpu_num;
-int max_node_num;
-int *cpunode_map;
-
 int cpu__setup_cpunode_map(void);
 
-static inline int cpu__max_node(void)
-{
-	if (unlikely(!max_node_num))
-		pr_debug("cpu_map not initialized\n");
-
-	return max_node_num;
-}
-
-static inline int cpu__max_cpu(void)
-{
-	if (unlikely(!max_cpu_num))
-		pr_debug("cpu_map not initialized\n");
-
-	return max_cpu_num;
-}
-
-static inline int cpu__get_node(int cpu)
-{
-	if (unlikely(cpunode_map == NULL)) {
-		pr_debug("cpu_map not initialized\n");
-		return -1;
-	}
-
-	return cpunode_map[cpu];
-}
+int cpu__max_node(void);
+int cpu__max_cpu(void);
+int cpu__get_node(int cpu);
 
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
 		       int (*f)(struct cpu_map *map, int cpu, void *data),

diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index aada3ac..d4a5a21 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c

@@ -32,8 +32,17 @@
 
 const char *graph_line =
 	"_____________________________________________________________________"
+	"_____________________________________________________________________"
 	"_____________________________________________________________________";
 const char *graph_dotted_line =
 	"---------------------------------------------------------------------"
 	"---------------------------------------------------------------------"
 	"---------------------------------------------------------------------";
+const char *spaces =
+	"                                                                     "
+	"                                                                     "
+	"                                                                     ";
+const char *dots =
+	"....................................................................."
+	"....................................................................."
+	".....................................................................";

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 34cd1e4..811af89 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c

@@ -352,6 +352,84 @@
 	return ret;
 }
 
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+		      struct bt_ctf_event *event,
+		      struct perf_sample *sample)
+{
+	struct bt_ctf_field_type *len_type, *seq_type;
+	struct bt_ctf_field *len_field, *seq_field;
+	unsigned int raw_size = sample->raw_size;
+	unsigned int nr_elements = raw_size / sizeof(u32);
+	unsigned int i;
+	int ret;
+
+	if (nr_elements * sizeof(u32) != raw_size)
+		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+			   raw_size, nr_elements * sizeof(u32) - raw_size);
+
+	len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+	len_field = bt_ctf_field_create(len_type);
+	if (!len_field) {
+		pr_err("failed to create 'raw_len' for bpf output event\n");
+		ret = -1;
+		goto put_len_type;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+	if (ret) {
+		pr_err("failed to set field value for raw_len\n");
+		goto put_len_field;
+	}
+	ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+	if (ret) {
+		pr_err("failed to set payload to raw_len\n");
+		goto put_len_field;
+	}
+
+	seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+	seq_field = bt_ctf_field_create(seq_type);
+	if (!seq_field) {
+		pr_err("failed to create 'raw_data' for bpf output event\n");
+		ret = -1;
+		goto put_seq_type;
+	}
+
+	ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+	if (ret) {
+		pr_err("failed to set length of 'raw_data'\n");
+		goto put_seq_field;
+	}
+
+	for (i = 0; i < nr_elements; i++) {
+		struct bt_ctf_field *elem_field =
+			bt_ctf_field_sequence_get_field(seq_field, i);
+
+		ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+				((u32 *)(sample->raw_data))[i]);
+
+		bt_ctf_field_put(elem_field);
+		if (ret) {
+			pr_err("failed to set raw_data[%d]\n", i);
+			goto put_seq_field;
+		}
+	}
+
+	ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+	if (ret)
+		pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+	bt_ctf_field_put(seq_field);
+put_seq_type:
+	bt_ctf_field_type_put(seq_type);
+put_len_field:
+	bt_ctf_field_put(len_field);
+put_len_type:
+	bt_ctf_field_type_put(len_type);
+	return ret;
+}
+
 static int add_generic_values(struct ctf_writer *cw,
 			      struct bt_ctf_event *event,
 			      struct perf_evsel *evsel,
@@ -597,6 +675,12 @@
 			return -1;
 	}
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_values(event_class, event, sample);
+		if (ret)
+			return -1;
+	}
+
 	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
 	if (cs) {
 		if (is_flush_needed(cs))
@@ -744,6 +828,25 @@
 	return ret;
 }
 
+static int add_bpf_output_types(struct ctf_writer *cw,
+				struct bt_ctf_event_class *class)
+{
+	struct bt_ctf_field_type *len_type = cw->data.u32;
+	struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+	struct bt_ctf_field_type *seq_type;
+	int ret;
+
+	ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+	if (ret)
+		return ret;
+
+	seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+	if (!seq_type)
+		return -1;
+
+	return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
 static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 			     struct bt_ctf_event_class *event_class)
 {
@@ -755,7 +858,8 @@
 	 *                              ctf event header
 	 *   PERF_SAMPLE_READ         - TODO
 	 *   PERF_SAMPLE_CALLCHAIN    - TODO
-	 *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+	 *   PERF_SAMPLE_RAW          - tracepoint fields and BPF output
+	 *                              are handled separately
 	 *   PERF_SAMPLE_BRANCH_STACK - TODO
 	 *   PERF_SAMPLE_REGS_USER    - TODO
 	 *   PERF_SAMPLE_STACK_USER   - TODO
@@ -824,6 +928,12 @@
 			goto err;
 	}
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		ret = add_bpf_output_types(cw, event_class);
+		if (ret)
+			goto err;
+	}
+
 	ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
 	if (ret) {
 		pr("Failed to add event class into stream.\n");
@@ -858,6 +968,23 @@
 	return 0;
 }
 
+static void cleanup_events(struct perf_session *session)
+{
+	struct perf_evlist *evlist = session->evlist;
+	struct perf_evsel *evsel;
+
+	evlist__for_each(evlist, evsel) {
+		struct evsel_priv *priv;
+
+		priv = evsel->priv;
+		bt_ctf_event_class_put(priv->event_class);
+		zfree(&evsel->priv);
+	}
+
+	perf_evlist__delete(evlist);
+	session->evlist = NULL;
+}
+
 static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
 {
 	struct ctf_stream **stream;
@@ -953,6 +1080,12 @@
 	    bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
 		goto err;
 
+#if __BYTE_ORDER == __BIG_ENDIAN
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+	bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
 	pr2("Created type: INTEGER %d-bit %ssigned %s\n",
 	    size, sign ? "un" : "", hex ? "hex" : "");
 	return type;
@@ -1100,7 +1233,7 @@
 		return 0;
 	}
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 int bt_convert__perf2ctf(const char *input, const char *path, bool force)
@@ -1171,6 +1304,7 @@
 		(double) c.events_size / 1024.0 / 1024.0,
 		c.events_count);
 
+	cleanup_events(session);
 	perf_session__delete(session);
 	ctf_writer__cleanup(cw);
 

diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 86d9c73..8c4212a 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c

@@ -5,6 +5,7 @@
 #include <string.h>
 #include <stdarg.h>
 #include <stdio.h>
+#include <api/debug.h>
 
 #include "cache.h"
 #include "color.h"
@@ -22,7 +23,7 @@
 static int redirect_to_stderr;
 int debug_data_convert;
 
-static int _eprintf(int level, int var, const char *fmt, va_list args)
+int veprintf(int level, int var, const char *fmt, va_list args)
 {
 	int ret = 0;
 
@@ -36,24 +37,19 @@
 	return ret;
 }
 
-int veprintf(int level, int var, const char *fmt, va_list args)
-{
-	return _eprintf(level, var, fmt, args);
-}
-
 int eprintf(int level, int var, const char *fmt, ...)
 {
 	va_list args;
 	int ret;
 
 	va_start(args, fmt);
-	ret = _eprintf(level, var, fmt, args);
+	ret = veprintf(level, var, fmt, args);
 	va_end(args);
 
 	return ret;
 }
 
-static int __eprintf_time(u64 t, const char *fmt, va_list args)
+static int veprintf_time(u64 t, const char *fmt, va_list args)
 {
 	int ret = 0;
 	u64 secs, usecs, nsecs = t;
@@ -75,7 +71,7 @@
 
 	if (var >= level) {
 		va_start(args, fmt);
-		ret = __eprintf_time(t, fmt, args);
+		ret = veprintf_time(t, fmt, args);
 		va_end(args);
 	}
 
@@ -91,7 +87,7 @@
 	va_list args;
 
 	va_start(args, fmt);
-	_eprintf(1, verbose, fmt, args);
+	veprintf(1, verbose, fmt, args);
 	va_end(args);
 	eprintf(1, verbose, "\n");
 }
@@ -110,40 +106,61 @@
 	return ret;
 }
 
+static void trace_event_printer(enum binary_printer_ops op,
+				unsigned int val, void *extra)
+{
+	const char *color = PERF_COLOR_BLUE;
+	union perf_event *event = (union perf_event *)extra;
+	unsigned char ch = (unsigned char)val;
+
+	switch (op) {
+	case BINARY_PRINT_DATA_BEGIN:
+		printf(".");
+		color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+				event->header.size);
+		break;
+	case BINARY_PRINT_LINE_BEGIN:
+		printf(".");
+		break;
+	case BINARY_PRINT_ADDR:
+		color_fprintf(stdout, color, "  %04x: ", val);
+		break;
+	case BINARY_PRINT_NUM_DATA:
+		color_fprintf(stdout, color, " %02x", val);
+		break;
+	case BINARY_PRINT_NUM_PAD:
+		color_fprintf(stdout, color, "   ");
+		break;
+	case BINARY_PRINT_SEP:
+		color_fprintf(stdout, color, "  ");
+		break;
+	case BINARY_PRINT_CHAR_DATA:
+		color_fprintf(stdout, color, "%c",
+			      isprint(ch) ? ch : '.');
+		break;
+	case BINARY_PRINT_CHAR_PAD:
+		color_fprintf(stdout, color, " ");
+		break;
+	case BINARY_PRINT_LINE_END:
+		color_fprintf(stdout, color, "\n");
+		break;
+	case BINARY_PRINT_DATA_END:
+		printf("\n");
+		break;
+	default:
+		break;
+	}
+}
+
 void trace_event(union perf_event *event)
 {
 	unsigned char *raw_event = (void *)event;
-	const char *color = PERF_COLOR_BLUE;
-	int i, j;
 
 	if (!dump_trace)
 		return;
 
-	printf(".");
-	color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
-		      event->header.size);
-
-	for (i = 0; i < event->header.size; i++) {
-		if ((i & 15) == 0) {
-			printf(".");
-			color_fprintf(stdout, color, "  %04x: ", i);
-		}
-
-		color_fprintf(stdout, color, " %02x", raw_event[i]);
-
-		if (((i & 15) == 15) || i == event->header.size-1) {
-			color_fprintf(stdout, color, "  ");
-			for (j = 0; j < 15-(i & 15); j++)
-				color_fprintf(stdout, color, "   ");
-			for (j = i & ~15; j <= i; j++) {
-				color_fprintf(stdout, color, "%c",
-					      isprint(raw_event[j]) ?
-					      raw_event[j] : '.');
-			}
-			color_fprintf(stdout, color, "\n");
-		}
-	}
-	printf(".\n");
+	print_binary(raw_event, event->header.size, 16,
+		     trace_event_printer, event);
 }
 
 static struct debug_variable {
@@ -192,3 +209,23 @@
 	free(s);
 	return 0;
 }
+
+#define DEBUG_WRAPPER(__n, __l)				\
+static int pr_ ## __n ## _wrapper(const char *fmt, ...)	\
+{							\
+	va_list args;					\
+	int ret;					\
+							\
+	va_start(args, fmt);				\
+	ret = veprintf(__l, verbose, fmt, args);	\
+	va_end(args);					\
+	return ret;					\
+}
+
+DEBUG_WRAPPER(warning, 0);
+DEBUG_WRAPPER(debug, 1);
+
+void perf_debug_setup(void)
+{
+	libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
+}

diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 8b9a088..14bafda 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h

@@ -53,5 +53,6 @@
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
+void perf_debug_setup(void);
 
 #endif	/* __PERF_DEBUG_H */

diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
new file mode 100644
index 0000000..3e6062a
--- /dev/null
+++ b/tools/perf/util/demangle-java.c

@@ -0,0 +1,199 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+#include "symbol.h"
+
+#include "demangle-java.h"
+
+enum {
+	MODE_PREFIX = 0,
+	MODE_CLASS  = 1,
+	MODE_FUNC   = 2,
+	MODE_TYPE   = 3,
+	MODE_CTYPE  = 3, /* class arg */
+};
+
+#define BASE_ENT(c, n)	[c - 'A']=n
+static const char *base_types['Z' - 'A' + 1] = {
+	BASE_ENT('B', "byte" ),
+	BASE_ENT('C', "char" ),
+	BASE_ENT('D', "double" ),
+	BASE_ENT('F', "float" ),
+	BASE_ENT('I', "int" ),
+	BASE_ENT('J', "long" ),
+	BASE_ENT('S', "short" ),
+	BASE_ENT('Z', "bool" ),
+};
+
+/*
+ * demangle Java symbol between str and end positions and stores
+ * up to maxlen characters into buf. The parser starts in mode.
+ *
+ * Use MODE_PREFIX to process entire prototype till end position
+ * Use MODE_TYPE to process return type if str starts on return type char
+ *
+ *  Return:
+ *	success: buf
+ *	error  : NULL
+ */
+static char *
+__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode)
+{
+	int rlen = 0;
+	int array = 0;
+	int narg = 0;
+	const char *q;
+
+	if (!end)
+		end = str + strlen(str);
+
+	for (q = str; q != end; q++) {
+
+		if (rlen == (maxlen - 1))
+			break;
+
+		switch (*q) {
+		case 'L':
+			if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
+				if (mode == MODE_CTYPE) {
+					if (narg)
+						rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+					narg++;
+				}
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
+				if (mode == MODE_PREFIX)
+					mode = MODE_CLASS;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'F':
+		case 'I':
+		case 'J':
+		case 'S':
+		case 'Z':
+			if (mode == MODE_TYPE) {
+				if (narg)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']);
+				while (array--)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+				array = 0;
+				narg++;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case 'V':
+			if (mode == MODE_TYPE) {
+				rlen += scnprintf(buf + rlen, maxlen - rlen, "void");
+				while (array--)
+					rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+				array = 0;
+			} else
+				buf[rlen++] = *q;
+			break;
+		case '[':
+			if (mode != MODE_TYPE)
+				goto error;
+			array++;
+			break;
+		case '(':
+			if (mode != MODE_FUNC)
+				goto error;
+			buf[rlen++] = *q;
+			mode = MODE_TYPE;
+			break;
+		case ')':
+			if (mode != MODE_TYPE)
+				goto error;
+			buf[rlen++] = *q;
+			narg = 0;
+			break;
+		case ';':
+			if (mode != MODE_CLASS && mode != MODE_CTYPE)
+				goto error;
+			/* safe because at least one other char to process */
+			if (isalpha(*(q + 1)))
+				rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+			if (mode == MODE_CLASS)
+				mode = MODE_FUNC;
+			else if (mode == MODE_CTYPE)
+				mode = MODE_TYPE;
+			break;
+		case '/':
+			if (mode != MODE_CLASS && mode != MODE_CTYPE)
+				goto error;
+			rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+			break;
+		default :
+			buf[rlen++] = *q;
+		}
+	}
+	buf[rlen] = '\0';
+	return buf;
+error:
+	return NULL;
+}
+
+/*
+ * Demangle Java function signature (openJDK, not GCJ)
+ * input:
+ * 	str: string to parse. String is not modified
+ *    flags: comobination of JAVA_DEMANGLE_* flags to modify demangling
+ * return:
+ *	if input can be demangled, then a newly allocated string is returned.
+ *	if input cannot be demangled, then NULL is returned
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+char *
+java_demangle_sym(const char *str, int flags)
+{
+	char *buf, *ptr;
+	char *p;
+	size_t len, l1 = 0;
+
+	if (!str)
+		return NULL;
+
+	/* find start of retunr type */
+	p = strrchr(str, ')');
+	if (!p)
+		return NULL;
+
+	/*
+	 * expansion factor estimated to 3x
+	 */
+	len = strlen(str) * 3 + 1;
+	buf = malloc(len);
+	if (!buf)
+		return NULL;
+
+	buf[0] = '\0';
+	if (!(flags & JAVA_DEMANGLE_NORET)) {
+		/*
+		 * get return type first
+		 */
+		ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE);
+		if (!ptr)
+			goto error;
+
+		/* add space between return type and function prototype */
+		l1 = strlen(buf);
+		buf[l1++] = ' ';
+	}
+
+	/* process function up to return type */
+	ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX);
+	if (!ptr)
+		goto error;
+
+	return buf;
+error:
+	free(buf);
+	return NULL;
+}

diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h
new file mode 100644
index 0000000..a981c1f
--- /dev/null
+++ b/tools/perf/util/demangle-java.h

@@ -0,0 +1,10 @@
+#ifndef __PERF_DEMANGLE_JAVA
+#define __PERF_DEMANGLE_JAVA 1
+/*
+ * demangle function flags
+ */
+#define JAVA_DEMANGLE_NORET	0x1 /* do not process return type */
+
+char * java_demangle_sym(const char *str, int flags);
+
+#endif /* __PERF_DEMANGLE_JAVA */

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index e8e9a9d..8e639543 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c

@@ -52,6 +52,11 @@
 			debuglink--;
 		if (*debuglink == '/')
 			debuglink++;
+
+		ret = -1;
+		if (!is_regular_file(filename))
+			break;
+
 		ret = filename__read_debuglink(filename, debuglink,
 					       size - (debuglink - filename));
 		}

diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 7dd5939..49a11d9 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c

@@ -6,6 +6,8 @@
 
 void perf_env__exit(struct perf_env *env)
 {
+	int i;
+
 	zfree(&env->hostname);
 	zfree(&env->os_release);
 	zfree(&env->version);
@@ -19,6 +21,10 @@
 	zfree(&env->numa_nodes);
 	zfree(&env->pmu_mappings);
 	zfree(&env->cpu);
+
+	for (i = 0; i < env->caches_cnt; i++)
+		cpu_cache_level__free(&env->caches[i]);
+	zfree(&env->caches);
 }
 
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
@@ -75,3 +81,10 @@
 	env->nr_cpus_avail = nr_cpus;
 	return 0;
 }
+
+void cpu_cache_level__free(struct cpu_cache_level *cache)
+{
+	free(cache->type);
+	free(cache->map);
+	free(cache->size);
+}

diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 0132b95..56cffb6 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h

@@ -1,11 +1,23 @@
 #ifndef __PERF_ENV_H
 #define __PERF_ENV_H
 
+#include <linux/types.h>
+
 struct cpu_topology_map {
 	int	socket_id;
 	int	core_id;
 };
 
+struct cpu_cache_level {
+	u32	level;
+	u32	line_size;
+	u32	sets;
+	u32	ways;
+	char	*type;
+	char	*size;
+	char	*map;
+};
+
 struct perf_env {
 	char			*hostname;
 	char			*os_release;
@@ -31,6 +43,8 @@
 	char			*numa_nodes;
 	char			*pmu_mappings;
 	struct cpu_topology_map	*cpu;
+	struct cpu_cache_level	*caches;
+	int			 caches_cnt;
 };
 
 extern struct perf_env perf_env;
@@ -41,4 +55,5 @@
 
 int perf_env__read_cpu_topology_map(struct perf_env *env);
 
+void cpu_cache_level__free(struct cpu_cache_level *cache);
 #endif /* __PERF_ENV_H */

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 85155e9..7bad5c3 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c

@@ -282,7 +282,7 @@
 		strcpy(execname, "");
 
 		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n",
 		       &event->mmap2.start, &event->mmap2.len, prot,
 		       &event->mmap2.pgoff, &event->mmap2.maj,
 		       &event->mmap2.min,

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d81f13d..86a0383 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c

@@ -1181,12 +1181,12 @@
 	 */
 	if (cpus != evlist->cpus) {
 		cpu_map__put(evlist->cpus);
-		evlist->cpus = cpus;
+		evlist->cpus = cpu_map__get(cpus);
 	}
 
 	if (threads != evlist->threads) {
 		thread_map__put(evlist->threads);
-		evlist->threads = threads;
+		evlist->threads = thread_map__get(threads);
 	}
 
 	perf_evlist__propagate_maps(evlist);
@@ -1223,6 +1223,9 @@
 	int err = 0;
 
 	evlist__for_each(evlist, evsel) {
+		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+			continue;
+
 		err = perf_evsel__set_filter(evsel, filter);
 		if (err)
 			break;
@@ -1624,7 +1627,7 @@
 	return printed + fprintf(fp, "\n");
 }
 
-int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
+int perf_evlist__strerror_open(struct perf_evlist *evlist,
 			       int err, char *buf, size_t size)
 {
 	int printed, value;
@@ -1652,7 +1655,25 @@
 				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
 				    "Hint:\tThe current value is %d.", value);
 		break;
+	case EINVAL: {
+		struct perf_evsel *first = perf_evlist__first(evlist);
+		int max_freq;
+
+		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
+			goto out_default;
+
+		if (first->attr.sample_freq < (u64)max_freq)
+			goto out_default;
+
+		printed = scnprintf(buf, size,
+				    "Error:\t%s.\n"
+				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
+				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
+				    emsg, max_freq, first->attr.sample_freq);
+		break;
+	}
 	default:
+out_default:
 		scnprintf(buf, size, "%s", emsg);
 		break;
 	}
@@ -1723,3 +1744,19 @@
 
 	tracking_evsel->tracking = true;
 }
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
+			       const char *str)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each(evlist, evsel) {
+		if (!evsel->name)
+			continue;
+		if (strcmp(str, evsel->name) == 0)
+			return evsel;
+	}
+
+	return NULL;
+}

diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 7c4d9a2..a0d1522 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h

@@ -294,4 +294,7 @@
 				     struct perf_evsel *tracking_evsel);
 
 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
 #endif /* __PERF_EVLIST_H */

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index cdbaf9b..0902fe4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c

@@ -225,6 +225,11 @@
 	if (evsel != NULL)
 		perf_evsel__init(evsel, attr, idx);
 
+	if (perf_evsel__is_bpf_output(evsel)) {
+		evsel->attr.sample_type |= PERF_SAMPLE_RAW;
+		evsel->attr.sample_period = 1;
+	}
+
 	return evsel;
 }
 
@@ -898,6 +903,16 @@
 	if (evsel->precise_max)
 		perf_event_attr__set_max_precise_ip(attr);
 
+	if (opts->all_user) {
+		attr->exclude_kernel = 1;
+		attr->exclude_user   = 0;
+	}
+
+	if (opts->all_kernel) {
+		attr->exclude_kernel = 0;
+		attr->exclude_user   = 1;
+	}
+
 	/*
 	 * Apply event specific term settings,
 	 * it overloads any global configuration.
@@ -2362,12 +2377,15 @@
 	case EPERM:
 	case EACCES:
 		return scnprintf(msg, size,
-		 "You may not have permission to collect %sstats.\n"
-		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
-		 " -1 - Not paranoid at all\n"
-		 "  0 - Disallow raw tracepoint access for unpriv\n"
-		 "  1 - Disallow cpu events for unpriv\n"
-		 "  2 - Disallow kernel profiling for unpriv",
+		 "You may not have permission to collect %sstats.\n\n"
+		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
+		 "which controls use of the performance events system by\n"
+		 "unprivileged users (without CAP_SYS_ADMIN).\n\n"
+		 "The default value is 1:\n\n"
+		 "  -1: Allow use of (almost) all events by all users\n"
+		 ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n"
+		 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
+		 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN",
 				 target->system_wide ? "system-wide " : "");
 	case ENOENT:
 		return scnprintf(msg, size, "The %s event is not supported.",

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 8e75434..501ea6e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h

@@ -93,10 +93,8 @@
 	const char		*unit;
 	struct event_format	*tp_format;
 	off_t			id_offset;
-	union {
-		void		*priv;
-		u64		db_id;
-	};
+	void			*priv;
+	u64			db_id;
 	struct cgroup_sel	*cgrp;
 	void			*handler;
 	struct cpu_map		*cpus;
@@ -364,6 +362,14 @@
 #undef FUNCTION_EVENT
 }
 
+static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel)
+{
+	struct perf_event_attr *attr = &evsel->attr;
+
+	return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
+		(attr->type == PERF_TYPE_SOFTWARE);
+}
+
 struct perf_attr_details {
 	bool freq;
 	bool verbose;

diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
new file mode 100644
index 0000000..c1ef805
--- /dev/null
+++ b/tools/perf/util/genelf.c

@@ -0,0 +1,449 @@
+/*
+ * genelf.c
+ * Copyright (C) 2014, Google, Inc
+ *
+ * Contributed by:
+ * 	Stephane Eranian <eranian@gmail.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define JVMTI
+
+#define BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef HAVE_LIBCRYPTO
+
+#define BUILD_ID_MD5
+#undef BUILD_ID_SHA	/* does not seem to work well when linked with Java */
+#undef BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef BUILD_ID_SHA
+#include <openssl/sha.h>
+#endif
+
+#ifdef BUILD_ID_MD5
+#include <openssl/md5.h>
+#endif
+#endif
+
+
+typedef struct {
+  unsigned int namesz;  /* Size of entry's owner string */
+  unsigned int descsz;  /* Size of the note descriptor */
+  unsigned int type;    /* Interpretation of the descriptor */
+  char         name[0]; /* Start of the name+desc data */
+} Elf_Note;
+
+struct options {
+	char *output;
+	int fd;
+};
+
+static char shd_string_table[] = {
+	0,
+	'.', 't', 'e', 'x', 't', 0,			/*  1 */
+	'.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /*  7 */
+	'.', 's', 'y', 'm', 't', 'a', 'b', 0,		/* 17 */
+	'.', 's', 't', 'r', 't', 'a', 'b', 0,		/* 25 */
+	'.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
+	'.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+};
+
+static struct buildid_note {
+	Elf_Note desc;		/* descsz: size of build-id, must be multiple of 4 */
+	char	 name[4];	/* GNU\0 */
+	char	 build_id[20];
+} bnote;
+
+static Elf_Sym symtab[]={
+	/* symbol 0 MUST be the undefined symbol */
+	{ .st_name  = 0, /* index in sym_string table */
+	  .st_info  = ELF_ST_TYPE(STT_NOTYPE),
+	  .st_shndx = 0, /* for now */
+	  .st_value = 0x0,
+	  .st_other = ELF_ST_VIS(STV_DEFAULT),
+	  .st_size  = 0,
+	},
+	{ .st_name  = 1, /* index in sym_string table */
+	  .st_info  = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC),
+	  .st_shndx = 1,
+	  .st_value = 0, /* for now */
+	  .st_other = ELF_ST_VIS(STV_DEFAULT),
+	  .st_size  = 0, /* for now */
+	}
+};
+
+#ifdef BUILD_ID_URANDOM
+static void
+gen_build_id(struct buildid_note *note,
+	     unsigned long load_addr __maybe_unused,
+	     const void *code __maybe_unused,
+	     size_t csize __maybe_unused)
+{
+	int fd;
+	size_t sz = sizeof(note->build_id);
+	ssize_t sret;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd == -1)
+		err(1, "cannot access /dev/urandom for builid");
+
+	sret = read(fd, note->build_id, sz);
+
+	close(fd);
+
+	if (sret != (ssize_t)sz)
+		memset(note->build_id, 0, sz);
+}
+#endif
+
+#ifdef BUILD_ID_SHA
+static void
+gen_build_id(struct buildid_note *note,
+	     unsigned long load_addr __maybe_unused,
+	     const void *code,
+	     size_t csize)
+{
+	if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
+		errx(1, "build_id too small for SHA1");
+
+	SHA1(code, csize, (unsigned char *)note->build_id);
+}
+#endif
+
+#ifdef BUILD_ID_MD5
+static void
+gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
+{
+	MD5_CTX context;
+
+	if (sizeof(note->build_id) < 16)
+		errx(1, "build_id too small for MD5");
+
+	MD5_Init(&context);
+	MD5_Update(&context, &load_addr, sizeof(load_addr));
+	MD5_Update(&context, code, csize);
+	MD5_Final((unsigned char *)note->build_id, &context);
+}
+#endif
+
+/*
+ * fd: file descriptor open for writing for the output file
+ * load_addr: code load address (could be zero, just used for buildid)
+ * sym: function name (for native code - used as the symbol)
+ * code: the native code
+ * csize: the code size in bytes
+ */
+int
+jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+	      const void *code, int csize,
+	      void *debug, int nr_debug_entries)
+{
+	Elf *e;
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Ehdr *ehdr;
+	Elf_Shdr *shdr;
+	char *strsym = NULL;
+	int symlen;
+	int retval = -1;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		warnx("ELF initialization failed");
+		return -1;
+	}
+
+	e = elf_begin(fd, ELF_C_WRITE, NULL);
+	if (!e) {
+		warnx("elf_begin failed");
+		goto error;
+	}
+
+	/*
+	 * setup ELF header
+	 */
+	ehdr = elf_newehdr(e);
+	if (!ehdr) {
+		warnx("cannot get ehdr");
+		goto error;
+	}
+
+	ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN;
+	ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS;
+	ehdr->e_machine = GEN_ELF_ARCH;
+	ehdr->e_type = ET_DYN;
+	ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_shstrndx= 2; /* shdr index for section name */
+
+	/*
+	 * setup text section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 16;
+	d->d_off = 0LL;
+	d->d_buf = (void *)code;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = csize;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 1;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = GEN_ELF_TEXT_OFFSET;
+	shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup section headers string table
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = shd_string_table;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = sizeof(shd_string_table);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */
+	shdr->sh_type = SHT_STRTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup symtab section
+	 */
+	symtab[1].st_size  = csize;
+	symtab[1].st_value = GEN_ELF_TEXT_OFFSET;
+
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 8;
+	d->d_off = 0LL;
+	d->d_buf = symtab;
+	d->d_type = ELF_T_SYM;
+	d->d_size = sizeof(symtab);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */
+	shdr->sh_type = SHT_SYMTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = sizeof(Elf_Sym);
+	shdr->sh_link = 4; /* index of .strtab section */
+
+	/*
+	 * setup symbols string table
+	 * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
+	 */
+	symlen = 2 + strlen(sym);
+	strsym = calloc(1, symlen);
+	if (!strsym) {
+		warnx("cannot allocate strsym");
+		goto error;
+	}
+	strcpy(strsym + 1, sym);
+
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = strsym;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = symlen;
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 25; /* offset in shd_string_table */
+	shdr->sh_type = SHT_STRTAB;
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup build-id section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		goto error;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		goto error;
+	}
+
+	/*
+	 * build-id generation
+	 */
+	gen_build_id(&bnote, load_addr, code, csize);
+	bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
+	bnote.desc.descsz = sizeof(bnote.build_id);
+	bnote.desc.type   = NT_GNU_BUILD_ID;
+	strcpy(bnote.name, "GNU");
+
+	d->d_align = 4;
+	d->d_off = 0LL;
+	d->d_buf = &bnote;
+	d->d_type = ELF_T_BYTE;
+	d->d_size = sizeof(bnote);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		goto error;
+	}
+
+	shdr->sh_name = 33; /* offset in shd_string_table */
+	shdr->sh_type = SHT_NOTE;
+	shdr->sh_addr = 0x0;
+	shdr->sh_flags = SHF_ALLOC;
+	shdr->sh_size = sizeof(bnote);
+	shdr->sh_entsize = 0;
+
+	if (debug && nr_debug_entries) {
+		retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
+		if (retval)
+			goto error;
+	} else {
+		if (elf_update(e, ELF_C_WRITE) < 0) {
+			warnx("elf_update 4 failed");
+			goto error;
+		}
+	}
+
+	retval = 0;
+error:
+	(void)elf_end(e);
+
+	free(strsym);
+
+
+	return retval;
+}
+
+#ifndef JVMTI
+
+static unsigned char x86_code[] = {
+    0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
+    0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
+    0xCD, 0x80            /* int $0x80 */
+};
+
+static struct options options;
+
+int main(int argc, char **argv)
+{
+	int c, fd, ret;
+
+	while ((c = getopt(argc, argv, "o:h")) != -1) {
+		switch (c) {
+		case 'o':
+			options.output = optarg;
+			break;
+		case 'h':
+			printf("Usage: genelf -o output_file [-h]\n");
+			return 0;
+		default:
+			errx(1, "unknown option");
+		}
+	}
+
+	fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666);
+	if (fd == -1)
+		err(1, "cannot create file %s", options.output);
+
+	ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code));
+	close(fd);
+
+	if (ret != 0)
+		unlink(options.output);
+
+	return ret;
+}
+#endif

diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
new file mode 100644
index 0000000..45bf9c6
--- /dev/null
+++ b/tools/perf/util/genelf.h

@@ -0,0 +1,67 @@
+#ifndef __GENELF_H__
+#define __GENELF_H__
+
+/* genelf.c */
+extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+			 const void *code, int csize,
+			 void *debug, int nr_debug_entries);
+/* genelf_debug.c */
+extern int jit_add_debug_info(Elf *e, uint64_t code_addr,
+			      void *debug, int nr_debug_entries);
+
+#if   defined(__arm__)
+#define GEN_ELF_ARCH	EM_ARM
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__aarch64__)
+#define GEN_ELF_ARCH	EM_AARCH64
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__x86_64__)
+#define GEN_ELF_ARCH	EM_X86_64
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__i386__)
+#define GEN_ELF_ARCH	EM_386
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__ppcle__)
+#define GEN_ELF_ARCH	EM_PPC
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH	EM_PPC64
+#define GEN_ELF_ENDIAN	ELFDATA2MSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__powerpcle__)
+#define GEN_ELF_ARCH	EM_PPC64
+#define GEN_ELF_ENDIAN	ELFDATA2LSB
+#define GEN_ELF_CLASS	ELFCLASS64
+#else
+#error "unsupported architecture"
+#endif
+
+#if GEN_ELF_CLASS == ELFCLASS64
+#define elf_newehdr	elf64_newehdr
+#define elf_getshdr	elf64_getshdr
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Sym		Elf64_Sym
+#define ELF_ST_TYPE(a)	ELF64_ST_TYPE(a)
+#define ELF_ST_BIND(a)	ELF64_ST_BIND(a)
+#define ELF_ST_VIS(a)	ELF64_ST_VISIBILITY(a)
+#else
+#define elf_newehdr	elf32_newehdr
+#define elf_getshdr	elf32_getshdr
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Sym		Elf32_Sym
+#define ELF_ST_TYPE(a)	ELF32_ST_TYPE(a)
+#define ELF_ST_BIND(a)	ELF32_ST_BIND(a)
+#define ELF_ST_VIS(a)	ELF32_ST_VISIBILITY(a)
+#endif
+
+/* The .text section is directly after the ELF header */
+#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr)
+
+#endif

diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
new file mode 100644
index 0000000..5980f7d
--- /dev/null
+++ b/tools/perf/util/genelf_debug.c

@@ -0,0 +1,610 @@
+/*
+ * genelf_debug.c
+ * Copyright (C) 2015, Google, Inc
+ *
+ * Contributed by:
+ * 	Stephane Eranian <eranian@google.com>
+ *
+ * Released under the GPL v2.
+ *
+ * based on GPLv2 source code from Oprofile
+ * @remark Copyright 2007 OProfile authors
+ * @author Philippe Elie
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define BUFFER_EXT_DFL_SIZE	(4 * 1024)
+
+typedef uint32_t uword;
+typedef uint16_t uhalf;
+typedef int32_t  sword;
+typedef int16_t  shalf;
+typedef uint8_t  ubyte;
+typedef int8_t   sbyte;
+
+struct buffer_ext {
+	size_t cur_pos;
+	size_t max_sz;
+	void *data;
+};
+
+static void
+buffer_ext_dump(struct buffer_ext *be, const char *msg)
+{
+	size_t i;
+	warnx("DUMP for %s", msg);
+	for (i = 0 ; i < be->cur_pos; i++)
+		warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff);
+}
+
+static inline int
+buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz)
+{
+	void *tmp;
+	size_t be_sz = be->max_sz;
+
+retry:
+	if ((be->cur_pos + sz) < be_sz) {
+		memcpy(be->data + be->cur_pos, addr, sz);
+		be->cur_pos += sz;
+		return 0;
+	}
+
+	if (!be_sz)
+		be_sz = BUFFER_EXT_DFL_SIZE;
+	else
+		be_sz <<= 1;
+
+	tmp = realloc(be->data, be_sz);
+	if (!tmp)
+		return -1;
+
+	be->data   = tmp;
+	be->max_sz = be_sz;
+
+	goto retry;
+}
+
+static void
+buffer_ext_init(struct buffer_ext *be)
+{
+	be->data = NULL;
+	be->cur_pos = 0;
+	be->max_sz = 0;
+}
+
+static inline size_t
+buffer_ext_size(struct buffer_ext *be)
+{
+	return be->cur_pos;
+}
+
+static inline void *
+buffer_ext_addr(struct buffer_ext *be)
+{
+	return be->data;
+}
+
+struct debug_line_header {
+	// Not counting this field
+	uword total_length;
+	// version number (2 currently)
+	uhalf version;
+	// relative offset from next field to
+	// program statement
+	uword prolog_length;
+	ubyte minimum_instruction_length;
+	ubyte default_is_stmt;
+	// line_base - see DWARF 2 specs
+	sbyte line_base;
+	// line_range - see DWARF 2 specs
+	ubyte line_range;
+	// number of opcode + 1
+	ubyte opcode_base;
+	/* follow the array of opcode args nr: ubytes [nr_opcode_base] */
+	/* follow the search directories index, zero terminated string
+	 * terminated by an empty string.
+	 */
+	/* follow an array of { filename, LEB128, LEB128, LEB128 }, first is
+	 * the directory index entry, 0 means current directory, then mtime
+	 * and filesize, last entry is followed by en empty string.
+	 */
+	/* follow the first program statement */
+} __attribute__((packed));
+
+/* DWARF 2 spec talk only about one possible compilation unit header while
+ * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
+ * related to the used arch, an ELF 32 can hold more than 4 Go of debug
+ * information. For now we handle only DWARF 2 32 bits comp unit. It'll only
+ * become a problem if we generate more than 4GB of debug information.
+ */
+struct compilation_unit_header {
+	uword total_length;
+	uhalf version;
+	uword debug_abbrev_offset;
+	ubyte pointer_size;
+} __attribute__((packed));
+
+#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
+
+/* field filled at run time are marked with -1 */
+static struct debug_line_header const default_debug_line_header = {
+	.total_length = -1,
+	.version = 2,
+	.prolog_length = -1,
+	.minimum_instruction_length = 1,	/* could be better when min instruction size != 1 */
+	.default_is_stmt = 1,	/* we don't take care about basic block */
+	.line_base = -5,	/* sensible value for line base ... */
+	.line_range = -14,     /* ... and line range are guessed statically */
+	.opcode_base = DW_LNS_num_opcode
+};
+
+static ubyte standard_opcode_length[] =
+{
+	0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1
+};
+#if 0
+{
+	[DW_LNS_advance_pc]   = 1,
+	[DW_LNS_advance_line] = 1,
+	[DW_LNS_set_file] =  1,
+	[DW_LNS_set_column] = 1,
+	[DW_LNS_fixed_advance_pc] = 1,
+	[DW_LNS_set_isa] = 1,
+};
+#endif
+
+/* field filled at run time are marked with -1 */
+static struct compilation_unit_header default_comp_unit_header = {
+	.total_length = -1,
+	.version = 2,
+	.debug_abbrev_offset = 0,     /* we reuse the same abbrev entries for all comp unit */
+	.pointer_size = sizeof(void *)
+};
+
+static void emit_uword(struct buffer_ext *be, uword data)
+{
+	buffer_ext_add(be, &data, sizeof(uword));
+}
+
+static void emit_string(struct buffer_ext *be, const char *s)
+{
+	buffer_ext_add(be, (void *)s, strlen(s) + 1);
+}
+
+static void emit_unsigned_LEB128(struct buffer_ext *be,
+				 unsigned long data)
+{
+	do {
+		ubyte cur = data & 0x7F;
+		data >>= 7;
+		if (data)
+			cur |= 0x80;
+		buffer_ext_add(be, &cur, 1);
+	} while (data);
+}
+
+static void emit_signed_LEB128(struct buffer_ext *be, long data)
+{
+	int more = 1;
+	int negative = data < 0;
+	int size = sizeof(long) * CHAR_BIT;
+	while (more) {
+		ubyte cur = data & 0x7F;
+		data >>= 7;
+		if (negative)
+			data |= - (1 << (size - 7));
+		if ((data == 0 && !(cur & 0x40)) ||
+		    (data == -1l && (cur & 0x40)))
+			more = 0;
+		else
+			cur |= 0x80;
+		buffer_ext_add(be, &cur, 1);
+	}
+}
+
+static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode,
+				 void *data, size_t data_len)
+{
+	buffer_ext_add(be, (char *)"", 1);
+
+	emit_unsigned_LEB128(be, data_len + 1);
+
+	buffer_ext_add(be, &opcode, 1);
+	buffer_ext_add(be, data, data_len);
+}
+
+static void emit_opcode(struct buffer_ext *be, ubyte opcode)
+{
+	buffer_ext_add(be, &opcode, 1);
+}
+
+static void emit_opcode_signed(struct buffer_ext  *be,
+			       ubyte opcode, long data)
+{
+	buffer_ext_add(be, &opcode, 1);
+	emit_signed_LEB128(be, data);
+}
+
+static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode,
+				 unsigned long data)
+{
+	buffer_ext_add(be, &opcode, 1);
+	emit_unsigned_LEB128(be, data);
+}
+
+static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc)
+{
+	emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc);
+}
+
+static void emit_advance_lineno(struct buffer_ext  *be, long delta_lineno)
+{
+	emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno);
+}
+
+static void emit_lne_end_of_sequence(struct buffer_ext *be)
+{
+	emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0);
+}
+
+static void emit_set_file(struct buffer_ext *be, unsigned long idx)
+{
+	emit_opcode_unsigned(be, DW_LNS_set_file, idx);
+}
+
+static void emit_lne_define_filename(struct buffer_ext *be,
+				     const char *filename)
+{
+	buffer_ext_add(be, (void *)"", 1);
+
+	/* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */
+	emit_unsigned_LEB128(be, strlen(filename) + 5);
+	emit_opcode(be, DW_LNE_define_file);
+	emit_string(be, filename);
+	/* directory index 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+	/* last modification date on file 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+	/* filesize 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void emit_lne_set_address(struct buffer_ext *be,
+				 void *address)
+{
+	emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long));
+}
+
+static ubyte get_special_opcode(struct debug_entry *ent,
+				unsigned int last_line,
+				unsigned long last_vma)
+{
+	unsigned int temp;
+	unsigned long delta_addr;
+
+	/*
+	 * delta from line_base
+	 */
+	temp = (ent->lineno - last_line) - default_debug_line_header.line_base;
+
+	if (temp >= default_debug_line_header.line_range)
+		return 0;
+
+	/*
+	 * delta of addresses
+	 */
+	delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length;
+
+	/* This is not sufficient to ensure opcode will be in [0-256] but
+	 * sufficient to ensure when summing with the delta lineno we will
+	 * not overflow the unsigned long opcode */
+
+	if (delta_addr <= 256 / default_debug_line_header.line_range) {
+		unsigned long opcode = temp +
+			(delta_addr * default_debug_line_header.line_range) +
+			default_debug_line_header.opcode_base;
+
+		return opcode <= 255 ? opcode : 0;
+	}
+	return 0;
+}
+
+static void emit_lineno_info(struct buffer_ext *be,
+			     struct debug_entry *ent, size_t nr_entry,
+			     unsigned long code_addr)
+{
+	size_t i;
+
+	/*
+	 * Machine state at start of a statement program
+	 * address = 0
+	 * file    = 1
+	 * line    = 1
+	 * column  = 0
+	 * is_stmt = default_is_stmt as given in the debug_line_header
+	 * basic block = 0
+	 * end sequence = 0
+	 */
+
+	/* start state of the state machine we take care of */
+	unsigned long last_vma = code_addr;
+	char const  *cur_filename = NULL;
+	unsigned long cur_file_idx = 0;
+	int last_line = 1;
+
+	emit_lne_set_address(be, (void *)code_addr);
+
+	for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) {
+		int need_copy = 0;
+		ubyte special_opcode;
+
+		/*
+		 * check if filename changed, if so add it
+		 */
+		if (!cur_filename || strcmp(cur_filename, ent->name)) {
+			emit_lne_define_filename(be, ent->name);
+			cur_filename = ent->name;
+			emit_set_file(be, ++cur_file_idx);
+			need_copy = 1;
+		}
+
+		special_opcode = get_special_opcode(ent, last_line, last_vma);
+		if (special_opcode != 0) {
+			last_line = ent->lineno;
+			last_vma  = ent->addr;
+			emit_opcode(be, special_opcode);
+		} else {
+			/*
+			 * lines differ, emit line delta
+			 */
+			if (last_line != ent->lineno) {
+				emit_advance_lineno(be, ent->lineno - last_line);
+				last_line = ent->lineno;
+				need_copy = 1;
+			}
+			/*
+			 * addresses differ, emit address delta
+			 */
+			if (last_vma != ent->addr) {
+				emit_advance_pc(be, ent->addr - last_vma);
+				last_vma = ent->addr;
+				need_copy = 1;
+			}
+			/*
+			 * add new row to matrix
+			 */
+			if (need_copy)
+				emit_opcode(be, DW_LNS_copy);
+		}
+	}
+}
+
+static void add_debug_line(struct buffer_ext *be,
+	struct debug_entry *ent, size_t nr_entry,
+	unsigned long code_addr)
+{
+	struct debug_line_header * dbg_header;
+	size_t old_size;
+
+	old_size = buffer_ext_size(be);
+
+	buffer_ext_add(be, (void *)&default_debug_line_header,
+		 sizeof(default_debug_line_header));
+
+	buffer_ext_add(be, &standard_opcode_length,  sizeof(standard_opcode_length));
+
+	// empty directory entry
+	buffer_ext_add(be, (void *)"", 1);
+
+	// empty filename directory
+	buffer_ext_add(be, (void *)"", 1);
+
+	dbg_header = buffer_ext_addr(be) + old_size;
+	dbg_header->prolog_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct debug_line_header, minimum_instruction_length);
+
+	emit_lineno_info(be, ent, nr_entry, code_addr);
+
+	emit_lne_end_of_sequence(be);
+
+	dbg_header = buffer_ext_addr(be) + old_size;
+	dbg_header->total_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct debug_line_header, version);
+}
+
+static void
+add_debug_abbrev(struct buffer_ext *be)
+{
+        emit_unsigned_LEB128(be, 1);
+        emit_unsigned_LEB128(be, DW_TAG_compile_unit);
+        emit_unsigned_LEB128(be, DW_CHILDREN_yes);
+        emit_unsigned_LEB128(be, DW_AT_stmt_list);
+        emit_unsigned_LEB128(be, DW_FORM_data4);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void
+add_compilation_unit(struct buffer_ext *be,
+		     size_t offset_debug_line)
+{
+	struct compilation_unit_header *comp_unit_header;
+	size_t old_size = buffer_ext_size(be);
+
+	buffer_ext_add(be, &default_comp_unit_header,
+		       sizeof(default_comp_unit_header));
+
+	emit_unsigned_LEB128(be, 1);
+	emit_uword(be, offset_debug_line);
+
+	comp_unit_header = buffer_ext_addr(be) + old_size;
+	comp_unit_header->total_length = (buffer_ext_size(be) - old_size) -
+		offsetof(struct compilation_unit_header, version);
+}
+
+static int
+jit_process_debug_info(uint64_t code_addr,
+		       void *debug, int nr_debug_entries,
+		       struct buffer_ext *dl,
+		       struct buffer_ext *da,
+		       struct buffer_ext *di)
+{
+	struct debug_entry *ent = debug;
+	int i;
+
+	for (i = 0; i < nr_debug_entries; i++) {
+		ent->addr = ent->addr - code_addr;
+		ent = debug_entry_next(ent);
+	}
+	add_compilation_unit(di, buffer_ext_size(dl));
+	add_debug_line(dl, debug, nr_debug_entries, 0);
+	add_debug_abbrev(da);
+	if (0) buffer_ext_dump(da, "abbrev");
+
+	return 0;
+}
+
+int
+jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries)
+{
+	Elf_Data *d;
+	Elf_Scn *scn;
+	Elf_Shdr *shdr;
+	struct buffer_ext dl, di, da;
+	int ret;
+
+	buffer_ext_init(&dl);
+	buffer_ext_init(&di);
+	buffer_ext_init(&da);
+
+	ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di);
+	if (ret)
+		return -1;
+	/*
+	 * setup .debug_line section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&dl);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&dl);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 52; /* .debug_line */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup .debug_info section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&di);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&di);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 64; /* .debug_info */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * setup .debug_abbrev section
+	 */
+	scn = elf_newscn(e);
+	if (!scn) {
+		warnx("cannot create section");
+		return -1;
+	}
+
+	d = elf_newdata(scn);
+	if (!d) {
+		warnx("cannot get new data");
+		return -1;
+	}
+
+	d->d_align = 1;
+	d->d_off = 0LL;
+	d->d_buf = buffer_ext_addr(&da);
+	d->d_type = ELF_T_BYTE;
+	d->d_size = buffer_ext_size(&da);
+	d->d_version = EV_CURRENT;
+
+	shdr = elf_getshdr(scn);
+	if (!shdr) {
+		warnx("cannot get section header");
+		return -1;
+	}
+
+	shdr->sh_name = 76; /* .debug_info */
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+	shdr->sh_flags = 0;
+	shdr->sh_entsize = 0;
+
+	/*
+	 * now we update the ELF image with all the sections
+	 */
+	if (elf_update(e, ELF_C_WRITE) < 0) {
+		warnx("elf_update debug failed");
+		return -1;
+	}
+	return 0;
+}

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f50b723..73e38e4 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c

@@ -23,6 +23,8 @@
 #include "strbuf.h"
 #include "build-id.h"
 #include "data.h"
+#include <api/fs/fs.h>
+#include "asm/bug.h"
 
 /*
  * magic2 = "PERFILE2"
@@ -868,6 +870,199 @@
 	return err;
 }
 
+static int cpu_cache_level__sort(const void *a, const void *b)
+{
+	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
+	struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
+
+	return cache_a->level - cache_b->level;
+}
+
+static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
+{
+	if (a->level != b->level)
+		return false;
+
+	if (a->line_size != b->line_size)
+		return false;
+
+	if (a->sets != b->sets)
+		return false;
+
+	if (a->ways != b->ways)
+		return false;
+
+	if (strcmp(a->type, b->type))
+		return false;
+
+	if (strcmp(a->size, b->size))
+		return false;
+
+	if (strcmp(a->map, b->map))
+		return false;
+
+	return true;
+}
+
+static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
+{
+	char path[PATH_MAX], file[PATH_MAX];
+	struct stat st;
+	size_t len;
+
+	scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
+	scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
+
+	if (stat(file, &st))
+		return 1;
+
+	scnprintf(file, PATH_MAX, "%s/level", path);
+	if (sysfs__read_int(file, (int *) &cache->level))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
+	if (sysfs__read_int(file, (int *) &cache->line_size))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
+	if (sysfs__read_int(file, (int *) &cache->sets))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
+	if (sysfs__read_int(file, (int *) &cache->ways))
+		return -1;
+
+	scnprintf(file, PATH_MAX, "%s/type", path);
+	if (sysfs__read_str(file, &cache->type, &len))
+		return -1;
+
+	cache->type[len] = 0;
+	cache->type = rtrim(cache->type);
+
+	scnprintf(file, PATH_MAX, "%s/size", path);
+	if (sysfs__read_str(file, &cache->size, &len)) {
+		free(cache->type);
+		return -1;
+	}
+
+	cache->size[len] = 0;
+	cache->size = rtrim(cache->size);
+
+	scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+	if (sysfs__read_str(file, &cache->map, &len)) {
+		free(cache->map);
+		free(cache->type);
+		return -1;
+	}
+
+	cache->map[len] = 0;
+	cache->map = rtrim(cache->map);
+	return 0;
+}
+
+static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
+{
+	fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
+}
+
+static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
+{
+	u32 i, cnt = 0;
+	long ncpus;
+	u32 nr, cpu;
+	u16 level;
+
+	ncpus = sysconf(_SC_NPROCESSORS_CONF);
+	if (ncpus < 0)
+		return -1;
+
+	nr = (u32)(ncpus & UINT_MAX);
+
+	for (cpu = 0; cpu < nr; cpu++) {
+		for (level = 0; level < 10; level++) {
+			struct cpu_cache_level c;
+			int err;
+
+			err = cpu_cache_level__read(&c, cpu, level);
+			if (err < 0)
+				return err;
+
+			if (err == 1)
+				break;
+
+			for (i = 0; i < cnt; i++) {
+				if (cpu_cache_level__cmp(&c, &caches[i]))
+					break;
+			}
+
+			if (i == cnt)
+				caches[cnt++] = c;
+			else
+				cpu_cache_level__free(&c);
+
+			if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
+				goto out;
+		}
+	}
+ out:
+	*cntp = cnt;
+	return 0;
+}
+
+#define MAX_CACHES 2000
+
+static int write_cache(int fd, struct perf_header *h __maybe_unused,
+			  struct perf_evlist *evlist __maybe_unused)
+{
+	struct cpu_cache_level caches[MAX_CACHES];
+	u32 cnt = 0, i, version = 1;
+	int ret;
+
+	ret = build_caches(caches, MAX_CACHES, &cnt);
+	if (ret)
+		goto out;
+
+	qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
+
+	ret = do_write(fd, &version, sizeof(u32));
+	if (ret < 0)
+		goto out;
+
+	ret = do_write(fd, &cnt, sizeof(u32));
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < cnt; i++) {
+		struct cpu_cache_level *c = &caches[i];
+
+		#define _W(v)					\
+			ret = do_write(fd, &c->v, sizeof(u32));	\
+			if (ret < 0)				\
+				goto out;
+
+		_W(level)
+		_W(line_size)
+		_W(sets)
+		_W(ways)
+		#undef _W
+
+		#define _W(v)						\
+			ret = do_write_string(fd, (const char *) c->v);	\
+			if (ret < 0)					\
+				goto out;
+
+		_W(type)
+		_W(size)
+		_W(map)
+		#undef _W
+	}
+
+out:
+	for (i = 0; i < cnt; i++)
+		cpu_cache_level__free(&caches[i]);
+	return ret;
+}
+
 static int write_stat(int fd __maybe_unused,
 		      struct perf_header *h __maybe_unused,
 		      struct perf_evlist *evlist __maybe_unused)
@@ -1172,6 +1367,18 @@
 	fprintf(fp, "# contains stat data\n");
 }
 
+static void print_cache(struct perf_header *ph __maybe_unused,
+			int fd __maybe_unused, FILE *fp __maybe_unused)
+{
+	int i;
+
+	fprintf(fp, "# CPU cache info:\n");
+	for (i = 0; i < ph->env.caches_cnt; i++) {
+		fprintf(fp, "#  ");
+		cpu_cache_level__fprintf(fp, &ph->env.caches[i]);
+	}
+}
+
 static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
 			       FILE *fp)
 {
@@ -1920,6 +2127,68 @@
 	return err;
 }
 
+static int process_cache(struct perf_file_section *section __maybe_unused,
+			 struct perf_header *ph __maybe_unused, int fd __maybe_unused,
+			 void *data __maybe_unused)
+{
+	struct cpu_cache_level *caches;
+	u32 cnt, i, version;
+
+	if (readn(fd, &version, sizeof(version)) != sizeof(version))
+		return -1;
+
+	if (ph->needs_swap)
+		version = bswap_32(version);
+
+	if (version != 1)
+		return -1;
+
+	if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt))
+		return -1;
+
+	if (ph->needs_swap)
+		cnt = bswap_32(cnt);
+
+	caches = zalloc(sizeof(*caches) * cnt);
+	if (!caches)
+		return -1;
+
+	for (i = 0; i < cnt; i++) {
+		struct cpu_cache_level c;
+
+		#define _R(v)						\
+			if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\
+				goto out_free_caches;			\
+			if (ph->needs_swap)				\
+				c.v = bswap_32(c.v);			\
+
+		_R(level)
+		_R(line_size)
+		_R(sets)
+		_R(ways)
+		#undef _R
+
+		#define _R(v)				\
+			c.v = do_read_string(fd, ph);	\
+			if (!c.v)			\
+				goto out_free_caches;
+
+		_R(type)
+		_R(size)
+		_R(map)
+		#undef _R
+
+		caches[i] = c;
+	}
+
+	ph->env.caches = caches;
+	ph->env.caches_cnt = cnt;
+	return 0;
+out_free_caches:
+	free(caches);
+	return -1;
+}
+
 struct feature_ops {
 	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
 	void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1962,6 +2231,7 @@
 	FEAT_OPP(HEADER_GROUP_DESC,	group_desc),
 	FEAT_OPP(HEADER_AUXTRACE,	auxtrace),
 	FEAT_OPA(HEADER_STAT,		stat),
+	FEAT_OPF(HEADER_CACHE,		cache),
 };
 
 struct header_print_data {

diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index cff9892..3d87ca8 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h

@@ -32,6 +32,7 @@
 	HEADER_GROUP_DESC,
 	HEADER_AUXTRACE,
 	HEADER_STAT,
+	HEADER_CACHE,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };

diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index dc1e41c..43a98a4 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c

@@ -6,7 +6,8 @@
 static int autocorrect;
 static struct cmdnames aliases;
 
-static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+static int perf_unknown_cmd_config(const char *var, const char *value,
+				   void *cb __maybe_unused)
 {
 	if (!strcmp(var, "help.autocorrect"))
 		autocorrect = perf_config_int(var,value);
@@ -14,7 +15,7 @@
 	if (!prefixcmp(var, "alias."))
 		add_cmdname(&aliases, var + 6, strlen(var + 6));
 
-	return perf_default_config(var, value, cb);
+	return 0;
 }
 
 static int levenshtein_compare(const void *p1, const void *p2)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 68a7612..290b3cb 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c

@@ -179,6 +179,9 @@
 	if (h->transaction)
 		hists__new_col_len(hists, HISTC_TRANSACTION,
 				   hist_entry__transaction_len());
+
+	if (h->trace_output)
+		hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
 }
 
 void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -245,6 +248,8 @@
 	/* XXX need decay for weight too? */
 }
 
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
 static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
 {
 	u64 prev_period = he->stat.period;
@@ -260,21 +265,45 @@
 
 	diff = prev_period - he->stat.period;
 
-	hists->stats.total_period -= diff;
-	if (!he->filtered)
-		hists->stats.total_non_filtered_period -= diff;
+	if (!he->depth) {
+		hists->stats.total_period -= diff;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period -= diff;
+	}
+
+	if (!he->leaf) {
+		struct hist_entry *child;
+		struct rb_node *node = rb_first(&he->hroot_out);
+		while (node) {
+			child = rb_entry(node, struct hist_entry, rb_node);
+			node = rb_next(node);
+
+			if (hists__decay_entry(hists, child))
+				hists__delete_entry(hists, child);
+		}
+	}
 
 	return he->stat.period == 0;
 }
 
 static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
 {
-	rb_erase(&he->rb_node, &hists->entries);
+	struct rb_root *root_in;
+	struct rb_root *root_out;
 
-	if (sort__need_collapse)
-		rb_erase(&he->rb_node_in, &hists->entries_collapsed);
-	else
-		rb_erase(&he->rb_node_in, hists->entries_in);
+	if (he->parent_he) {
+		root_in  = &he->parent_he->hroot_in;
+		root_out = &he->parent_he->hroot_out;
+	} else {
+		if (sort__need_collapse)
+			root_in = &hists->entries_collapsed;
+		else
+			root_in = hists->entries_in;
+		root_out = &hists->entries;
+	}
+
+	rb_erase(&he->rb_node_in, root_in);
+	rb_erase(&he->rb_node, root_out);
 
 	--hists->nr_entries;
 	if (!he->filtered)
@@ -393,6 +422,9 @@
 		}
 		INIT_LIST_HEAD(&he->pairs.node);
 		thread__get(he->thread);
+
+		if (!symbol_conf.report_hierarchy)
+			he->leaf = true;
 	}
 
 	return he;
@@ -405,6 +437,16 @@
 	return 0;
 }
 
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+{
+	if (!symbol_conf.use_callchain)
+		return;
+
+	he->hists->callchain_period += period;
+	if (!he->filtered)
+		he->hists->callchain_non_filtered_period += period;
+}
+
 static struct hist_entry *hists__findnew_entry(struct hists *hists,
 					       struct hist_entry *entry,
 					       struct addr_location *al,
@@ -432,8 +474,10 @@
 		cmp = hist_entry__cmp(he, entry);
 
 		if (!cmp) {
-			if (sample_self)
+			if (sample_self) {
 				he_stat__add_period(&he->stat, period, weight);
+				hist_entry__add_callchain_period(he, period);
+			}
 			if (symbol_conf.cumulate_callchain)
 				he_stat__add_period(he->stat_acc, period, weight);
 
@@ -466,6 +510,8 @@
 	if (!he)
 		return NULL;
 
+	if (sample_self)
+		hist_entry__add_callchain_period(he, period);
 	hists->nr_entries++;
 
 	rb_link_node(&he->rb_node_in, parent, p);
@@ -951,10 +997,15 @@
 int64_t
 hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 {
+	struct hists *hists = left->hists;
 	struct perf_hpp_fmt *fmt;
 	int64_t cmp = 0;
 
-	perf_hpp__for_each_sort_list(fmt) {
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    !perf_hpp__defined_dynamic_entry(fmt, hists))
+			continue;
+
 		cmp = fmt->cmp(fmt, left, right);
 		if (cmp)
 			break;
@@ -966,10 +1017,15 @@
 int64_t
 hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 {
+	struct hists *hists = left->hists;
 	struct perf_hpp_fmt *fmt;
 	int64_t cmp = 0;
 
-	perf_hpp__for_each_sort_list(fmt) {
+	hists__for_each_sort_list(hists, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    !perf_hpp__defined_dynamic_entry(fmt, hists))
+			continue;
+
 		cmp = fmt->collapse(fmt, left, right);
 		if (cmp)
 			break;
@@ -1006,17 +1062,250 @@
 }
 
 /*
+ * If this is not the last column, then we need to pad it according to the
+ * pre-calculated max lenght for this column, otherwise don't bother adding
+ * spaces because that would break viewing this with, for instance, 'less',
+ * that would show tons of trailing spaces when a long C++ demangled method
+ * names is sampled.
+*/
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+				   struct perf_hpp_fmt *fmt, int printed)
+{
+	if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
+		const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists));
+		if (printed < width) {
+			advance_hpp(hpp, printed);
+			printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
+		}
+	}
+
+	return printed;
+}
+
+/*
  * collapse the histogram
  */
 
-bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
-				  struct rb_root *root, struct hist_entry *he)
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+				       enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+	return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+						enum hist_filter type,
+						fmt_chk_fn check)
+{
+	struct perf_hpp_fmt *fmt;
+	bool type_match = false;
+	struct hist_entry *parent = he->parent_he;
+
+	switch (type) {
+	case HIST_FILTER__THREAD:
+		if (symbol_conf.comm_list == NULL &&
+		    symbol_conf.pid_list == NULL &&
+		    symbol_conf.tid_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__DSO:
+		if (symbol_conf.dso_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__SYMBOL:
+		if (symbol_conf.sym_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__PARENT:
+	case HIST_FILTER__GUEST:
+	case HIST_FILTER__HOST:
+	case HIST_FILTER__SOCKET:
+	default:
+		return;
+	}
+
+	/* if it's filtered by own fmt, it has to have filter bits */
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (check(fmt)) {
+			type_match = true;
+			break;
+		}
+	}
+
+	if (type_match) {
+		/*
+		 * If the filter is for current level entry, propagate
+		 * filter marker to parents.  The marker bit was
+		 * already set by default so it only needs to clear
+		 * non-filtered entries.
+		 */
+		if (!(he->filtered & (1 << type))) {
+			while (parent) {
+				parent->filtered &= ~(1 << type);
+				parent = parent->parent_he;
+			}
+		}
+	} else {
+		/*
+		 * If current entry doesn't have matching formats, set
+		 * filter marker for upper level entries.  it will be
+		 * cleared if its lower level entries is not filtered.
+		 *
+		 * For lower-level entries, it inherits parent's
+		 * filter bit so that lower level entries of a
+		 * non-filtered entry won't set the filter marker.
+		 */
+		if (parent == NULL)
+			he->filtered |= (1 << type);
+		else
+			he->filtered |= (parent->filtered & (1 << type));
+	}
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+					    check_thread_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+					    perf_hpp__is_dso_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+					    perf_hpp__is_sym_entry);
+
+	hists__apply_filters(he->hists, he);
+}
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+						 struct rb_root *root,
+						 struct hist_entry *he,
+						 struct hist_entry *parent_he,
+						 struct perf_hpp_list *hpp_list)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter, *new;
+	struct perf_hpp_fmt *fmt;
+	int64_t cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		cmp = 0;
+		perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, iter, he);
+			if (cmp)
+				break;
+		}
+
+		if (!cmp) {
+			he_stat__add_stat(&iter->stat, &he->stat);
+			return iter;
+		}
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	new = hist_entry__new(he, true);
+	if (new == NULL)
+		return NULL;
+
+	hists->nr_entries++;
+
+	/* save related format list for output */
+	new->hpp_list = hpp_list;
+	new->parent_he = parent_he;
+
+	hist_entry__apply_hierarchy_filters(new);
+
+	/* some fields are now passed to 'new' */
+	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+		if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+			he->trace_output = NULL;
+		else
+			new->trace_output = NULL;
+
+		if (perf_hpp__is_srcline_entry(fmt))
+			he->srcline = NULL;
+		else
+			new->srcline = NULL;
+
+		if (perf_hpp__is_srcfile_entry(fmt))
+			he->srcfile = NULL;
+		else
+			new->srcfile = NULL;
+	}
+
+	rb_link_node(&new->rb_node_in, parent, p);
+	rb_insert_color(&new->rb_node_in, root);
+	return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+					 struct rb_root *root,
+					 struct hist_entry *he)
+{
+	struct perf_hpp_list_node *node;
+	struct hist_entry *new_he = NULL;
+	struct hist_entry *parent = NULL;
+	int depth = 0;
+	int ret = 0;
+
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		/* skip period (overhead) and elided columns */
+		if (node->level == 0 || node->skip)
+			continue;
+
+		/* insert copy of 'he' for each fmt into the hierarchy */
+		new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
+		if (new_he == NULL) {
+			ret = -1;
+			break;
+		}
+
+		root = &new_he->hroot_in;
+		new_he->depth = depth++;
+		parent = new_he;
+	}
+
+	if (new_he) {
+		new_he->leaf = true;
+
+		if (symbol_conf.use_callchain) {
+			callchain_cursor_reset(&callchain_cursor);
+			if (callchain_merge(&callchain_cursor,
+					    new_he->callchain,
+					    he->callchain) < 0)
+				ret = -1;
+		}
+	}
+
+	/* 'he' is no longer used */
+	hist_entry__delete(he);
+
+	/* return 0 (or -1) since it already applied filters */
+	return ret;
+}
+
+int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
+				 struct hist_entry *he)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
 	struct hist_entry *iter;
 	int64_t cmp;
 
+	if (symbol_conf.report_hierarchy)
+		return hists__hierarchy_insert_entry(hists, root, he);
+
 	while (*p != NULL) {
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node_in);
@@ -1024,18 +1313,21 @@
 		cmp = hist_entry__collapse(iter, he);
 
 		if (!cmp) {
+			int ret = 0;
+
 			he_stat__add_stat(&iter->stat, &he->stat);
 			if (symbol_conf.cumulate_callchain)
 				he_stat__add_stat(iter->stat_acc, he->stat_acc);
 
 			if (symbol_conf.use_callchain) {
 				callchain_cursor_reset(&callchain_cursor);
-				callchain_merge(&callchain_cursor,
-						iter->callchain,
-						he->callchain);
+				if (callchain_merge(&callchain_cursor,
+						    iter->callchain,
+						    he->callchain) < 0)
+					ret = -1;
 			}
 			hist_entry__delete(he);
-			return false;
+			return ret;
 		}
 
 		if (cmp < 0)
@@ -1047,7 +1339,7 @@
 
 	rb_link_node(&he->rb_node_in, parent, p);
 	rb_insert_color(&he->rb_node_in, root);
-	return true;
+	return 1;
 }
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
@@ -1073,14 +1365,15 @@
 	hists__filter_entry_by_socket(hists, he);
 }
 
-void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 {
 	struct rb_root *root;
 	struct rb_node *next;
 	struct hist_entry *n;
+	int ret;
 
 	if (!sort__need_collapse)
-		return;
+		return 0;
 
 	hists->nr_entries = 0;
 
@@ -1095,7 +1388,11 @@
 		next = rb_next(&n->rb_node_in);
 
 		rb_erase(&n->rb_node_in, root);
-		if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) {
+		ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
+		if (ret < 0)
+			return -1;
+
+		if (ret) {
 			/*
 			 * If it wasn't combined with one of the entries already
 			 * collapsed, we need to apply the filters that may have
@@ -1106,14 +1403,16 @@
 		if (prog)
 			ui_progress__update(prog, 1);
 	}
+	return 0;
 }
 
 static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
 {
+	struct hists *hists = a->hists;
 	struct perf_hpp_fmt *fmt;
 	int64_t cmp = 0;
 
-	perf_hpp__for_each_sort_list(fmt) {
+	hists__for_each_sort_list(hists, fmt) {
 		if (perf_hpp__should_skip(fmt, a->hists))
 			continue;
 
@@ -1154,6 +1453,113 @@
 	hists->stats.total_period += h->stat.period;
 }
 
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	node = rb_first(&hists->entries);
+
+	hists->stats.total_period = 0;
+	hists->stats.total_non_filtered_period = 0;
+
+	/*
+	 * recalculate total period using top-level entries only
+	 * since lower level entries only see non-filtered entries
+	 * but upper level entries have sum of both entries.
+	 */
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node);
+		node = rb_next(node);
+
+		hists->stats.total_period += he->stat.period;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period += he->stat.period;
+	}
+}
+
+static void hierarchy_insert_output_entry(struct rb_root *root,
+					  struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct perf_hpp_fmt *fmt;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+
+	/* update column width of dynamic entry */
+	perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt))
+			fmt->sort(fmt, he, NULL);
+	}
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+					   struct ui_progress *prog,
+					   struct rb_root *root_in,
+					   struct rb_root *root_out,
+					   u64 min_callchain_hits,
+					   bool use_callchain)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	*root_out = RB_ROOT;
+	node = rb_first(root_in);
+
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node_in);
+		node = rb_next(node);
+
+		hierarchy_insert_output_entry(root_out, he);
+
+		if (prog)
+			ui_progress__update(prog, 1);
+
+		if (!he->leaf) {
+			hists__hierarchy_output_resort(hists, prog,
+						       &he->hroot_in,
+						       &he->hroot_out,
+						       min_callchain_hits,
+						       use_callchain);
+			hists->nr_entries++;
+			if (!he->filtered) {
+				hists->nr_non_filtered_entries++;
+				hists__calc_col_len(hists, he);
+			}
+
+			continue;
+		}
+
+		if (!use_callchain)
+			continue;
+
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
+
+		callchain_param.sort(&he->sorted_chain, he->callchain,
+				     min_callchain_hits, &callchain_param);
+	}
+}
+
 static void __hists__insert_output_entry(struct rb_root *entries,
 					 struct hist_entry *he,
 					 u64 min_callchain_hits,
@@ -1162,10 +1568,20 @@
 	struct rb_node **p = &entries->rb_node;
 	struct rb_node *parent = NULL;
 	struct hist_entry *iter;
+	struct perf_hpp_fmt *fmt;
 
-	if (use_callchain)
+	if (use_callchain) {
+		if (callchain_param.mode == CHAIN_GRAPH_REL) {
+			u64 total = he->stat.period;
+
+			if (symbol_conf.cumulate_callchain)
+				total = he->stat_acc->period;
+
+			min_callchain_hits = total * (callchain_param.min_percent / 100);
+		}
 		callchain_param.sort(&he->sorted_chain, he->callchain,
 				      min_callchain_hits, &callchain_param);
+	}
 
 	while (*p != NULL) {
 		parent = *p;
@@ -1179,23 +1595,41 @@
 
 	rb_link_node(&he->rb_node, parent, p);
 	rb_insert_color(&he->rb_node, entries);
+
+	perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
+		if (perf_hpp__is_dynamic_entry(fmt) &&
+		    perf_hpp__defined_dynamic_entry(fmt, he->hists))
+			fmt->sort(fmt, he, NULL);  /* update column width */
+	}
 }
 
-void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+static void output_resort(struct hists *hists, struct ui_progress *prog,
+			  bool use_callchain)
 {
 	struct rb_root *root;
 	struct rb_node *next;
 	struct hist_entry *n;
+	u64 callchain_total;
 	u64 min_callchain_hits;
-	struct perf_evsel *evsel = hists_to_evsel(hists);
-	bool use_callchain;
 
-	if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
-		use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
-	else
-		use_callchain = symbol_conf.use_callchain;
+	callchain_total = hists->callchain_period;
+	if (symbol_conf.filter_relative)
+		callchain_total = hists->callchain_non_filtered_period;
 
-	min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
+	min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
+
+	hists__reset_stats(hists);
+	hists__reset_col_len(hists);
+
+	if (symbol_conf.report_hierarchy) {
+		hists__hierarchy_output_resort(hists, prog,
+					       &hists->entries_collapsed,
+					       &hists->entries,
+					       min_callchain_hits,
+					       use_callchain);
+		hierarchy_recalc_total_periods(hists);
+		return;
+	}
 
 	if (sort__need_collapse)
 		root = &hists->entries_collapsed;
@@ -1205,9 +1639,6 @@
 	next = rb_first(root);
 	hists->entries = RB_ROOT;
 
-	hists__reset_stats(hists);
-	hists__reset_col_len(hists);
-
 	while (next) {
 		n = rb_entry(next, struct hist_entry, rb_node_in);
 		next = rb_next(&n->rb_node_in);
@@ -1223,15 +1654,136 @@
 	}
 }
 
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
+{
+	bool use_callchain;
+
+	if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
+		use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+	else
+		use_callchain = symbol_conf.use_callchain;
+
+	output_resort(evsel__hists(evsel), prog, use_callchain);
+}
+
+void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+{
+	output_resort(hists, prog, symbol_conf.use_callchain);
+}
+
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+	if (he->leaf || hmd == HMD_FORCE_SIBLING)
+		return false;
+
+	if (he->unfolded || hmd == HMD_FORCE_CHILD)
+		return true;
+
+	return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	while (can_goto_child(he, HMD_NORMAL)) {
+		node = rb_last(&he->hroot_out);
+		he = rb_entry(node, struct hist_entry, rb_node);
+	}
+	return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	if (can_goto_child(he, hmd))
+		node = rb_first(&he->hroot_out);
+	else
+		node = rb_next(node);
+
+	while (node == NULL) {
+		he = he->parent_he;
+		if (he == NULL)
+			break;
+
+		node = rb_next(&he->rb_node);
+	}
+	return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+	struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+	node = rb_prev(node);
+	if (node)
+		return rb_hierarchy_last(node);
+
+	he = he->parent_he;
+	if (he == NULL)
+		return NULL;
+
+	return &he->rb_node;
+}
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
+{
+	struct rb_node *node;
+	struct hist_entry *child;
+	float percent;
+
+	if (he->leaf)
+		return false;
+
+	node = rb_first(&he->hroot_out);
+	child = rb_entry(node, struct hist_entry, rb_node);
+
+	while (node && child->filtered) {
+		node = rb_next(node);
+		child = rb_entry(node, struct hist_entry, rb_node);
+	}
+
+	if (node)
+		percent = hist_entry__get_percent_limit(child);
+	else
+		percent = 0;
+
+	return node && percent >= limit;
+}
+
 static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
 				       enum hist_filter filter)
 {
 	h->filtered &= ~(1 << filter);
+
+	if (symbol_conf.report_hierarchy) {
+		struct hist_entry *parent = h->parent_he;
+
+		while (parent) {
+			he_stat__add_stat(&parent->stat, &h->stat);
+
+			parent->filtered &= ~(1 << filter);
+
+			if (parent->filtered)
+				goto next;
+
+			/* force fold unfiltered entry for simplicity */
+			parent->unfolded = false;
+			parent->has_no_entry = false;
+			parent->row_offset = 0;
+			parent->nr_rows = 0;
+next:
+			parent = parent->parent_he;
+		}
+	}
+
 	if (h->filtered)
 		return;
 
 	/* force fold unfiltered entry for simplicity */
 	h->unfolded = false;
+	h->has_no_entry = false;
 	h->row_offset = 0;
 	h->nr_rows = 0;
 
@@ -1254,28 +1806,6 @@
 	return false;
 }
 
-void hists__filter_by_dso(struct hists *hists)
-{
-	struct rb_node *nd;
-
-	hists->stats.nr_non_filtered_samples = 0;
-
-	hists__reset_filter_stats(hists);
-	hists__reset_col_len(hists);
-
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-		if (symbol_conf.exclude_other && !h->parent)
-			continue;
-
-		if (hists__filter_entry_by_dso(hists, h))
-			continue;
-
-		hists__remove_entry_filter(hists, h, HIST_FILTER__DSO);
-	}
-}
-
 static bool hists__filter_entry_by_thread(struct hists *hists,
 					  struct hist_entry *he)
 {
@@ -1288,25 +1818,6 @@
 	return false;
 }
 
-void hists__filter_by_thread(struct hists *hists)
-{
-	struct rb_node *nd;
-
-	hists->stats.nr_non_filtered_samples = 0;
-
-	hists__reset_filter_stats(hists);
-	hists__reset_col_len(hists);
-
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-		if (hists__filter_entry_by_thread(hists, h))
-			continue;
-
-		hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD);
-	}
-}
-
 static bool hists__filter_entry_by_symbol(struct hists *hists,
 					  struct hist_entry *he)
 {
@@ -1320,25 +1831,6 @@
 	return false;
 }
 
-void hists__filter_by_symbol(struct hists *hists)
-{
-	struct rb_node *nd;
-
-	hists->stats.nr_non_filtered_samples = 0;
-
-	hists__reset_filter_stats(hists);
-	hists__reset_col_len(hists);
-
-	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-		if (hists__filter_entry_by_symbol(hists, h))
-			continue;
-
-		hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL);
-	}
-}
-
 static bool hists__filter_entry_by_socket(struct hists *hists,
 					  struct hist_entry *he)
 {
@@ -1351,7 +1843,9 @@
 	return false;
 }
 
-void hists__filter_by_socket(struct hists *hists)
+typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
+
+static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
 {
 	struct rb_node *nd;
 
@@ -1363,13 +1857,157 @@
 	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-		if (hists__filter_entry_by_socket(hists, h))
+		if (filter(hists, h))
 			continue;
 
-		hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET);
+		hists__remove_entry_filter(hists, h, type);
 	}
 }
 
+static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct hist_entry *iter;
+	struct rb_root new_root = RB_ROOT;
+	struct rb_node *nd;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct hist_entry, rb_node);
+
+		if (hist_entry__sort(he, iter) > 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&he->rb_node, parent, p);
+	rb_insert_color(&he->rb_node, root);
+
+	if (he->leaf || he->filtered)
+		return;
+
+	nd = rb_first(&he->hroot_out);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase(&h->rb_node, &he->hroot_out);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	he->hroot_out = new_root;
+}
+
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
+{
+	struct rb_node *nd;
+	struct rb_root new_root = RB_ROOT;
+
+	hists->stats.nr_non_filtered_samples = 0;
+
+	hists__reset_filter_stats(hists);
+	hists__reset_col_len(hists);
+
+	nd = rb_first(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+		int ret;
+
+		ret = hist_entry__filter(h, type, arg);
+
+		/*
+		 * case 1. non-matching type
+		 * zero out the period, set filter marker and move to child
+		 */
+		if (ret < 0) {
+			memset(&h->stat, 0, sizeof(h->stat));
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+		}
+		/*
+		 * case 2. matched type (filter out)
+		 * set filter marker and move to next
+		 */
+		else if (ret == 1) {
+			h->filtered |= (1 << type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+		/*
+		 * case 3. ok (not filtered)
+		 * add period to hists and parents, erase the filter marker
+		 * and move to next sibling
+		 */
+		else {
+			hists__remove_entry_filter(hists, h, type);
+
+			nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+		}
+	}
+
+	hierarchy_recalc_total_periods(hists);
+
+	/*
+	 * resort output after applying a new filter since filter in a lower
+	 * hierarchy can change periods in a upper hierarchy.
+	 */
+	nd = rb_first(&hists->entries);
+	while (nd) {
+		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+		nd = rb_next(nd);
+		rb_erase(&h->rb_node, &hists->entries);
+
+		resort_filtered_entry(&new_root, h);
+	}
+
+	hists->entries = new_root;
+}
+
+void hists__filter_by_thread(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+					hists->thread_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__THREAD,
+				      hists__filter_entry_by_thread);
+}
+
+void hists__filter_by_dso(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+					hists->dso_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__DSO,
+				      hists__filter_entry_by_dso);
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+					hists->symbol_filter_str);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+				      hists__filter_entry_by_symbol);
+}
+
+void hists__filter_by_socket(struct hists *hists)
+{
+	if (symbol_conf.report_hierarchy)
+		hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+					&hists->socket_filter);
+	else
+		hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+				      hists__filter_entry_by_socket);
+}
+
 void events_stats__inc(struct events_stats *stats, u32 type)
 {
 	++stats->nr_events[0];
@@ -1585,7 +2223,7 @@
 	return 0;
 }
 
-int __hists__init(struct hists *hists)
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
 {
 	memset(hists, 0, sizeof(*hists));
 	hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
@@ -1594,6 +2232,8 @@
 	hists->entries = RB_ROOT;
 	pthread_mutex_init(&hists->lock, NULL);
 	hists->socket_filter = -1;
+	hists->hpp_list = hpp_list;
+	INIT_LIST_HEAD(&hists->hpp_formats);
 	return 0;
 }
 
@@ -1622,15 +2262,26 @@
 static void hists_evsel__exit(struct perf_evsel *evsel)
 {
 	struct hists *hists = evsel__hists(evsel);
+	struct perf_hpp_fmt *fmt, *pos;
+	struct perf_hpp_list_node *node, *tmp;
 
 	hists__delete_all_entries(hists);
+
+	list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
+			list_del(&fmt->list);
+			free(fmt);
+		}
+		list_del(&node->list);
+		free(node);
+	}
 }
 
 static int hists_evsel__init(struct perf_evsel *evsel)
 {
 	struct hists *hists = evsel__hists(evsel);
 
-	__hists__init(hists);
+	__hists__init(hists, &perf_hpp_list);
 	return 0;
 }
 
@@ -1649,3 +2300,9 @@
 
 	return err;
 }
+
+void perf_hpp_list__init(struct perf_hpp_list *list)
+{
+	INIT_LIST_HEAD(&list->fields);
+	INIT_LIST_HEAD(&list->sorts);
+}

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index d4ec482..ead18c8 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h

@@ -66,6 +66,8 @@
 	struct rb_root		entries_collapsed;
 	u64			nr_entries;
 	u64			nr_non_filtered_entries;
+	u64			callchain_period;
+	u64			callchain_non_filtered_period;
 	struct thread		*thread_filter;
 	const struct dso	*dso_filter;
 	const char		*uid_filter_str;
@@ -75,6 +77,9 @@
 	u64			event_stream;
 	u16			col_len[HISTC_NR_COLS];
 	int			socket_filter;
+	struct perf_hpp_list	*hpp_list;
+	struct list_head	hpp_formats;
+	int			nr_hpp_node;
 };
 
 struct hist_entry_iter;
@@ -121,15 +126,21 @@
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 			 int max_stack_depth, void *arg);
 
+struct perf_hpp;
+struct perf_hpp_fmt;
+
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
 int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
 			      struct hists *hists);
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+				   struct perf_hpp_fmt *fmt, int printed);
 void hist_entry__delete(struct hist_entry *he);
 
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
 void hists__output_resort(struct hists *hists, struct ui_progress *prog);
-void hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
 
 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
 void hists__delete_entries(struct hists *hists);
@@ -185,10 +196,10 @@
 }
 
 int hists__init(void);
-int __hists__init(struct hists *hists);
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
-bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
+int hists__collapse_insert_entry(struct hists *hists,
 				  struct rb_root *root, struct hist_entry *he);
 
 struct perf_hpp {
@@ -214,28 +225,64 @@
 			    struct hist_entry *a, struct hist_entry *b);
 	int64_t (*sort)(struct perf_hpp_fmt *fmt,
 			struct hist_entry *a, struct hist_entry *b);
+	bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+	void (*free)(struct perf_hpp_fmt *fmt);
 
 	struct list_head list;
 	struct list_head sort_list;
 	bool elide;
 	int len;
 	int user_len;
+	int idx;
+	int level;
 };
 
-extern struct list_head perf_hpp__list;
-extern struct list_head perf_hpp__sort_list;
+struct perf_hpp_list {
+	struct list_head fields;
+	struct list_head sorts;
+};
 
-#define perf_hpp__for_each_format(format) \
-	list_for_each_entry(format, &perf_hpp__list, list)
+extern struct perf_hpp_list perf_hpp_list;
 
-#define perf_hpp__for_each_format_safe(format, tmp)	\
-	list_for_each_entry_safe(format, tmp, &perf_hpp__list, list)
+struct perf_hpp_list_node {
+	struct list_head	list;
+	struct perf_hpp_list	hpp;
+	int			level;
+	bool			skip;
+};
 
-#define perf_hpp__for_each_sort_list(format) \
-	list_for_each_entry(format, &perf_hpp__sort_list, sort_list)
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+				    struct perf_hpp_fmt *format);
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+					struct perf_hpp_fmt *format);
 
-#define perf_hpp__for_each_sort_list_safe(format, tmp)	\
-	list_for_each_entry_safe(format, tmp, &perf_hpp__sort_list, sort_list)
+static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__column_register(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__register_sort_field(&perf_hpp_list, format);
+}
+
+#define perf_hpp_list__for_each_format(_list, format) \
+	list_for_each_entry(format, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_format_safe(_list, format, tmp)	\
+	list_for_each_entry_safe(format, tmp, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_sort_list(_list, format) \
+	list_for_each_entry(format, &(_list)->sorts, sort_list)
+
+#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp)	\
+	list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
+
+#define hists__for_each_format(hists, format) \
+	perf_hpp_list__for_each_format((hists)->hpp_list, fmt)
+
+#define hists__for_each_sort_list(hists, format) \
+	perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt)
 
 extern struct perf_hpp_fmt perf_hpp__format[];
 
@@ -254,21 +301,29 @@
 };
 
 void perf_hpp__init(void);
-void perf_hpp__column_register(struct perf_hpp_fmt *format);
 void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
-void perf_hpp__column_enable(unsigned col);
-void perf_hpp__column_disable(unsigned col);
 void perf_hpp__cancel_cumulate(void);
+void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+				  struct perf_evlist *evlist);
 
-void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
-void perf_hpp__setup_output_field(void);
-void perf_hpp__reset_output_field(void);
-void perf_hpp__append_sort_keys(void);
 
 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
-bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
 bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
 
 static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
 					 struct hists *hists)
@@ -372,6 +427,7 @@
 #endif
 
 unsigned int hists__sort_list_width(struct hists *hists);
+unsigned int hists__overhead_width(struct hists *hists);
 
 void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
 			  struct perf_sample *sample, bool nonany_branch_mode);
@@ -381,4 +437,26 @@
 			    const char *arg, int unset __maybe_unused);
 int perf_hist_config(const char *var, const char *value);
 
+void perf_hpp_list__init(struct perf_hpp_list *list);
+
+enum hierarchy_move_dir {
+	HMD_NORMAL,
+	HMD_FORCE_SIBLING,
+	HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+				    enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+	return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT  3
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+
 #endif	/* __PERF_HIST_H */

diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
new file mode 100644
index 0000000..a1e99da
--- /dev/null
+++ b/tools/perf/util/jit.h

@@ -0,0 +1,15 @@
+#ifndef __JIT_H__
+#define __JIT_H__
+
+#include <data.h>
+
+extern int jit_process(struct perf_session *session,
+		       struct perf_data_file *output,
+		       struct machine *machine,
+		       char *filename,
+		       pid_t pid,
+		       u64 *nbytes);
+
+extern int jit_inject_record(const char *filename);
+
+#endif /* __JIT_H__ */

diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
new file mode 100644
index 0000000..cd272cc
--- /dev/null
+++ b/tools/perf/util/jitdump.c

@@ -0,0 +1,697 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "util.h"
+#include "event.h"
+#include "debug.h"
+#include "evlist.h"
+#include "symbol.h"
+#include "strlist.h"
+#include <elf.h>
+
+#include "session.h"
+#include "jit.h"
+#include "jitdump.h"
+#include "genelf.h"
+#include "../builtin.h"
+
+struct jit_buf_desc {
+	struct perf_data_file *output;
+	struct perf_session *session;
+	struct machine *machine;
+	union jr_entry   *entry;
+	void             *buf;
+	uint64_t	 sample_type;
+	size_t           bufsize;
+	FILE             *in;
+	bool		 needs_bswap; /* handles cross-endianess */
+	void		 *debug_data;
+	size_t		 nr_debug_entries;
+	uint32_t         code_load_count;
+	u64		 bytes_written;
+	struct rb_root   code_root;
+	char		 dir[PATH_MAX];
+};
+
+struct debug_line_info {
+	unsigned long vma;
+	unsigned int lineno;
+	/* The filename format is unspecified, absolute path, relative etc. */
+	char const filename[0];
+};
+
+struct jit_tool {
+	struct perf_tool tool;
+	struct perf_data_file	output;
+	struct perf_data_file	input;
+	u64 bytes_written;
+};
+
+#define hmax(a, b) ((a) > (b) ? (a) : (b))
+#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
+
+static int
+jit_emit_elf(char *filename,
+	     const char *sym,
+	     uint64_t code_addr,
+	     const void *code,
+	     int csize,
+	     void *debug,
+	     int nr_debug_entries)
+{
+	int ret, fd;
+
+	if (verbose > 0)
+		fprintf(stderr, "write ELF image %s\n", filename);
+
+	fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+	if (fd == -1) {
+		pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno));
+		return -1;
+	}
+
+        ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries);
+
+        close(fd);
+
+        if (ret)
+                unlink(filename);
+
+	return ret;
+}
+
+static void
+jit_close(struct jit_buf_desc *jd)
+{
+	if (!(jd && jd->in))
+		return;
+	funlockfile(jd->in);
+	fclose(jd->in);
+	jd->in = NULL;
+}
+
+static int
+jit_validate_events(struct perf_session *session)
+{
+	struct perf_evsel *evsel;
+
+	/*
+	 * check that all events use CLOCK_MONOTONIC
+	 */
+	evlist__for_each(session->evlist, evsel) {
+		if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC)
+			return -1;
+	}
+	return 0;
+}
+
+static int
+jit_open(struct jit_buf_desc *jd, const char *name)
+{
+	struct jitheader header;
+	struct jr_prefix *prefix;
+	ssize_t bs, bsz = 0;
+	void *n, *buf = NULL;
+	int ret, retval = -1;
+
+	jd->in = fopen(name, "r");
+	if (!jd->in)
+		return -1;
+
+	bsz = hmax(sizeof(header), sizeof(*prefix));
+
+	buf = malloc(bsz);
+	if (!buf)
+		goto error;
+
+	/*
+	 * protect from writer modifying the file while we are reading it
+	 */
+	flockfile(jd->in);
+
+	ret = fread(buf, sizeof(header), 1, jd->in);
+	if (ret != 1)
+		goto error;
+
+	memcpy(&header, buf, sizeof(header));
+
+	if (header.magic != JITHEADER_MAGIC) {
+		if (header.magic != JITHEADER_MAGIC_SW)
+			goto error;
+		jd->needs_bswap = true;
+	}
+
+	if (jd->needs_bswap) {
+		header.version    = bswap_32(header.version);
+		header.total_size = bswap_32(header.total_size);
+		header.pid	  = bswap_32(header.pid);
+		header.elf_mach   = bswap_32(header.elf_mach);
+		header.timestamp  = bswap_64(header.timestamp);
+		header.flags      = bswap_64(header.flags);
+	}
+
+	if (verbose > 2)
+		pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n",
+			header.version,
+			header.total_size,
+			(unsigned long long)header.timestamp,
+			header.pid,
+			header.elf_mach);
+
+	if (header.flags & JITDUMP_FLAGS_RESERVED) {
+		pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
+		       (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
+		goto error;
+	}
+
+	/*
+	 * validate event is using the correct clockid
+	 */
+	if (jit_validate_events(jd->session)) {
+		pr_err("error, jitted code must be sampled with perf record -k 1\n");
+		goto error;
+	}
+
+	bs = header.total_size - sizeof(header);
+
+	if (bs > bsz) {
+		n = realloc(buf, bs);
+		if (!n)
+			goto error;
+		bsz = bs;
+		buf = n;
+		/* read extra we do not know about */
+		ret = fread(buf, bs - bsz, 1, jd->in);
+		if (ret != 1)
+			goto error;
+	}
+	/*
+	 * keep dirname for generating files and mmap records
+	 */
+	strcpy(jd->dir, name);
+	dirname(jd->dir);
+
+	return 0;
+error:
+	funlockfile(jd->in);
+	fclose(jd->in);
+	return retval;
+}
+
+static union jr_entry *
+jit_get_next_entry(struct jit_buf_desc *jd)
+{
+	struct jr_prefix *prefix;
+	union jr_entry *jr;
+	void *addr;
+	size_t bs, size;
+	int id, ret;
+
+	if (!(jd && jd->in))
+		return NULL;
+
+	if (jd->buf == NULL) {
+		size_t sz = getpagesize();
+		if (sz < sizeof(*prefix))
+			sz = sizeof(*prefix);
+
+		jd->buf = malloc(sz);
+		if (jd->buf == NULL)
+			return NULL;
+
+		jd->bufsize = sz;
+	}
+
+	prefix = jd->buf;
+
+	/*
+	 * file is still locked at this point
+	 */
+	ret = fread(prefix, sizeof(*prefix), 1, jd->in);
+	if (ret  != 1)
+		return NULL;
+
+	if (jd->needs_bswap) {
+		prefix->id   	   = bswap_32(prefix->id);
+		prefix->total_size = bswap_32(prefix->total_size);
+		prefix->timestamp  = bswap_64(prefix->timestamp);
+	}
+	id   = prefix->id;
+	size = prefix->total_size;
+
+	bs = (size_t)size;
+	if (bs < sizeof(*prefix))
+		return NULL;
+
+	if (id >= JIT_CODE_MAX) {
+		pr_warning("next_entry: unknown prefix %d, skipping\n", id);
+		return NULL;
+	}
+	if (bs > jd->bufsize) {
+		void *n;
+		n = realloc(jd->buf, bs);
+		if (!n)
+			return NULL;
+		jd->buf = n;
+		jd->bufsize = bs;
+	}
+
+	addr = ((void *)jd->buf) + sizeof(*prefix);
+
+	ret = fread(addr, bs - sizeof(*prefix), 1, jd->in);
+	if (ret != 1)
+		return NULL;
+
+	jr = (union jr_entry *)jd->buf;
+
+	switch(id) {
+	case JIT_CODE_DEBUG_INFO:
+		if (jd->needs_bswap) {
+			uint64_t n;
+			jr->info.code_addr = bswap_64(jr->info.code_addr);
+			jr->info.nr_entry  = bswap_64(jr->info.nr_entry);
+			for (n = 0 ; n < jr->info.nr_entry; n++) {
+				jr->info.entries[n].addr    = bswap_64(jr->info.entries[n].addr);
+				jr->info.entries[n].lineno  = bswap_32(jr->info.entries[n].lineno);
+				jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim);
+			}
+		}
+		break;
+	case JIT_CODE_CLOSE:
+		break;
+	case JIT_CODE_LOAD:
+		if (jd->needs_bswap) {
+			jr->load.pid       = bswap_32(jr->load.pid);
+			jr->load.tid       = bswap_32(jr->load.tid);
+			jr->load.vma       = bswap_64(jr->load.vma);
+			jr->load.code_addr = bswap_64(jr->load.code_addr);
+			jr->load.code_size = bswap_64(jr->load.code_size);
+			jr->load.code_index= bswap_64(jr->load.code_index);
+		}
+		jd->code_load_count++;
+		break;
+	case JIT_CODE_MOVE:
+		if (jd->needs_bswap) {
+			jr->move.pid           = bswap_32(jr->move.pid);
+			jr->move.tid           = bswap_32(jr->move.tid);
+			jr->move.vma           = bswap_64(jr->move.vma);
+			jr->move.old_code_addr = bswap_64(jr->move.old_code_addr);
+			jr->move.new_code_addr = bswap_64(jr->move.new_code_addr);
+			jr->move.code_size     = bswap_64(jr->move.code_size);
+			jr->move.code_index    = bswap_64(jr->move.code_index);
+		}
+		break;
+	case JIT_CODE_MAX:
+	default:
+		return NULL;
+	}
+	return jr;
+}
+
+static int
+jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
+{
+	ssize_t size;
+
+	size = perf_data_file__write(jd->output, event, event->header.size);
+	if (size < 0)
+		return -1;
+
+	jd->bytes_written += size;
+	return 0;
+}
+
+static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	struct perf_sample sample;
+	union perf_event *event;
+	struct perf_tool *tool = jd->session->tool;
+	uint64_t code, addr;
+	uintptr_t uaddr;
+	char *filename;
+	struct stat st;
+	size_t size;
+	u16 idr_size;
+	const char *sym;
+	uint32_t count;
+	int ret, csize;
+	pid_t pid, tid;
+	struct {
+		u32 pid, tid;
+		u64 time;
+	} *id;
+
+	pid   = jr->load.pid;
+	tid   = jr->load.tid;
+	csize = jr->load.code_size;
+	addr  = jr->load.code_addr;
+	sym   = (void *)((unsigned long)jr + sizeof(jr->load));
+	code  = (unsigned long)jr + jr->load.p.total_size - csize;
+	count = jr->load.code_index;
+	idr_size = jd->machine->id_hdr_size;
+
+	event = calloc(1, sizeof(*event) + idr_size);
+	if (!event)
+		return -1;
+
+	filename = event->mmap2.filename;
+	size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so",
+			jd->dir,
+			pid,
+			count);
+
+	size++; /* for \0 */
+
+	size = PERF_ALIGN(size, sizeof(u64));
+	uaddr = (uintptr_t)code;
+	ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries);
+
+	if (jd->debug_data && jd->nr_debug_entries) {
+		free(jd->debug_data);
+		jd->debug_data = NULL;
+		jd->nr_debug_entries = 0;
+	}
+
+	if (ret) {
+		free(event);
+		return -1;
+	}
+	if (stat(filename, &st))
+		memset(&st, 0, sizeof(stat));
+
+	event->mmap2.header.type = PERF_RECORD_MMAP2;
+	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+	event->mmap2.header.size = (sizeof(event->mmap2) -
+			(sizeof(event->mmap2.filename) - size) + idr_size);
+
+	event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+	event->mmap2.start = addr;
+	event->mmap2.len   = csize;
+	event->mmap2.pid   = pid;
+	event->mmap2.tid   = tid;
+	event->mmap2.ino   = st.st_ino;
+	event->mmap2.maj   = major(st.st_dev);
+	event->mmap2.min   = minor(st.st_dev);
+	event->mmap2.prot  = st.st_mode;
+	event->mmap2.flags = MAP_SHARED;
+	event->mmap2.ino_generation = 1;
+
+	id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+	if (jd->sample_type & PERF_SAMPLE_TID) {
+		id->pid  = pid;
+		id->tid  = tid;
+	}
+	if (jd->sample_type & PERF_SAMPLE_TIME)
+		id->time = jr->load.p.timestamp;
+
+	/*
+	 * create pseudo sample to induce dso hit increment
+	 * use first address as sample address
+	 */
+	memset(&sample, 0, sizeof(sample));
+	sample.pid  = pid;
+	sample.tid  = tid;
+	sample.time = id->time;
+	sample.ip   = addr;
+
+	ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+	if (ret)
+		return ret;
+
+	ret = jit_inject_event(jd, event);
+	/*
+	 * mark dso as use to generate buildid in the header
+	 */
+	if (!ret)
+		build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+	return ret;
+}
+
+static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	struct perf_sample sample;
+	union perf_event *event;
+	struct perf_tool *tool = jd->session->tool;
+	char *filename;
+	size_t size;
+	struct stat st;
+	u16 idr_size;
+	int ret;
+	pid_t pid, tid;
+	struct {
+		u32 pid, tid;
+		u64 time;
+	} *id;
+
+	pid = jr->move.pid;
+	tid =  jr->move.tid;
+	idr_size = jd->machine->id_hdr_size;
+
+	/*
+	 * +16 to account for sample_id_all (hack)
+	 */
+	event = calloc(1, sizeof(*event) + 16);
+	if (!event)
+		return -1;
+
+	filename = event->mmap2.filename;
+	size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64,
+	         jd->dir,
+	         pid,
+		 jr->move.code_index);
+
+	size++; /* for \0 */
+
+	if (stat(filename, &st))
+		memset(&st, 0, sizeof(stat));
+
+	size = PERF_ALIGN(size, sizeof(u64));
+
+	event->mmap2.header.type = PERF_RECORD_MMAP2;
+	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+	event->mmap2.header.size = (sizeof(event->mmap2) -
+			(sizeof(event->mmap2.filename) - size) + idr_size);
+	event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+	event->mmap2.start = jr->move.new_code_addr;
+	event->mmap2.len   = jr->move.code_size;
+	event->mmap2.pid   = pid;
+	event->mmap2.tid   = tid;
+	event->mmap2.ino   = st.st_ino;
+	event->mmap2.maj   = major(st.st_dev);
+	event->mmap2.min   = minor(st.st_dev);
+	event->mmap2.prot  = st.st_mode;
+	event->mmap2.flags = MAP_SHARED;
+	event->mmap2.ino_generation = 1;
+
+	id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+	if (jd->sample_type & PERF_SAMPLE_TID) {
+		id->pid  = pid;
+		id->tid  = tid;
+	}
+	if (jd->sample_type & PERF_SAMPLE_TIME)
+		id->time = jr->load.p.timestamp;
+
+	/*
+	 * create pseudo sample to induce dso hit increment
+	 * use first address as sample address
+	 */
+	memset(&sample, 0, sizeof(sample));
+	sample.pid  = pid;
+	sample.tid  = tid;
+	sample.time = id->time;
+	sample.ip   = jr->move.new_code_addr;
+
+	ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+	if (ret)
+		return ret;
+
+	ret = jit_inject_event(jd, event);
+	if (!ret)
+		build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+	return ret;
+}
+
+static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+	void *data;
+	size_t sz;
+
+	if (!(jd && jr))
+		return -1;
+
+	sz  = jr->prefix.total_size - sizeof(jr->info);
+	data = malloc(sz);
+	if (!data)
+		return -1;
+
+	memcpy(data, &jr->info.entries, sz);
+
+	jd->debug_data       = data;
+
+	/*
+	 * we must use nr_entry instead of size here because
+	 * we cannot distinguish actual entry from padding otherwise
+	 */
+	jd->nr_debug_entries = jr->info.nr_entry;
+
+	return 0;
+}
+
+static int
+jit_process_dump(struct jit_buf_desc *jd)
+{
+	union jr_entry *jr;
+	int ret;
+
+	while ((jr = jit_get_next_entry(jd))) {
+		switch(jr->prefix.id) {
+		case JIT_CODE_LOAD:
+			ret = jit_repipe_code_load(jd, jr);
+			break;
+		case JIT_CODE_MOVE:
+			ret = jit_repipe_code_move(jd, jr);
+			break;
+		case JIT_CODE_DEBUG_INFO:
+			ret = jit_repipe_debug_info(jd, jr);
+			break;
+		default:
+			ret = 0;
+			continue;
+		}
+	}
+	return ret;
+}
+
+static int
+jit_inject(struct jit_buf_desc *jd, char *path)
+{
+	int ret;
+
+	if (verbose > 0)
+		fprintf(stderr, "injecting: %s\n", path);
+
+	ret = jit_open(jd, path);
+	if (ret)
+		return -1;
+
+	ret = jit_process_dump(jd);
+
+	jit_close(jd);
+
+	if (verbose > 0)
+		fprintf(stderr, "injected: %s (%d)\n", path, ret);
+
+	return 0;
+}
+
+/*
+ * File must be with pattern .../jit-XXXX.dump
+ * where XXXX is the PID of the process which did the mmap()
+ * as captured in the RECORD_MMAP record
+ */
+static int
+jit_detect(char *mmap_name, pid_t pid)
+ {
+	char *p;
+	char *end = NULL;
+	pid_t pid2;
+
+	if (verbose > 2)
+		fprintf(stderr, "jit marker trying : %s\n", mmap_name);
+	/*
+	 * get file name
+	 */
+	p = strrchr(mmap_name, '/');
+	if (!p)
+		return -1;
+
+	/*
+	 * match prefix
+	 */
+	if (strncmp(p, "/jit-", 5))
+		return -1;
+
+	/*
+	 * skip prefix
+	 */
+	p += 5;
+
+	/*
+	 * must be followed by a pid
+	 */
+	if (!isdigit(*p))
+		return -1;
+
+	pid2 = (int)strtol(p, &end, 10);
+	if (!end)
+		return -1;
+
+	/*
+	 * pid does not match mmap pid
+	 * pid==0 in system-wide mode (synthesized)
+	 */
+	if (pid && pid2 != pid)
+		return -1;
+	/*
+	 * validate suffix
+	 */
+	if (strcmp(end, ".dump"))
+		return -1;
+
+	if (verbose > 0)
+		fprintf(stderr, "jit marker found: %s\n", mmap_name);
+
+	return 0;
+}
+
+int
+jit_process(struct perf_session *session,
+	    struct perf_data_file *output,
+	    struct machine *machine,
+	    char *filename,
+	    pid_t pid,
+	    u64 *nbytes)
+{
+	struct perf_evsel *first;
+	struct jit_buf_desc jd;
+	int ret;
+
+	/*
+	 * first, detect marker mmap (i.e., the jitdump mmap)
+	 */
+	if (jit_detect(filename, pid))
+		return 0;
+
+	memset(&jd, 0, sizeof(jd));
+
+	jd.session = session;
+	jd.output  = output;
+	jd.machine = machine;
+
+	/*
+	 * track sample_type to compute id_all layout
+	 * perf sets the same sample type to all events as of now
+	 */
+	first = perf_evlist__first(session->evlist);
+	jd.sample_type = first->attr.sample_type;
+
+	*nbytes = 0;
+
+	ret = jit_inject(&jd, filename);
+	if (!ret) {
+		*nbytes = jd.bytes_written;
+		ret = 1;
+	}
+
+	return ret;
+}

diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
new file mode 100644
index 0000000..b66c1f5
--- /dev/null
+++ b/tools/perf/util/jitdump.h

@@ -0,0 +1,124 @@
+/*
+ * jitdump.h: jitted code info encapsulation file format
+ *
+ * Adapted from OProfile GPLv2 support jidump.h:
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+
+/* JiTD */
+#define JITHEADER_MAGIC		0x4A695444
+#define JITHEADER_MAGIC_SW	0x4454694A
+
+#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+
+#define JITHEADER_VERSION 1
+
+enum jitdump_flags_bits {
+	JITDUMP_FLAGS_MAX_BIT,
+};
+
+#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
+				(~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
+
+struct jitheader {
+	uint32_t magic;		/* characters "jItD" */
+	uint32_t version;	/* header version */
+	uint32_t total_size;	/* total size of header */
+	uint32_t elf_mach;	/* elf mach target */
+	uint32_t pad1;		/* reserved */
+	uint32_t pid;		/* JIT process id */
+	uint64_t timestamp;	/* timestamp */
+	uint64_t flags;		/* flags */
+};
+
+enum jit_record_type {
+	JIT_CODE_LOAD		= 0,
+        JIT_CODE_MOVE           = 1,
+	JIT_CODE_DEBUG_INFO	= 2,
+	JIT_CODE_CLOSE		= 3,
+
+	JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+	uint32_t id;
+	uint32_t total_size;
+	uint64_t timestamp;
+};
+
+struct jr_code_load {
+	struct jr_prefix p;
+
+	uint32_t pid;
+	uint32_t tid;
+	uint64_t vma;
+	uint64_t code_addr;
+	uint64_t code_size;
+	uint64_t code_index;
+};
+
+struct jr_code_close {
+	struct jr_prefix p;
+};
+
+struct jr_code_move {
+	struct jr_prefix p;
+
+	uint32_t pid;
+	uint32_t tid;
+	uint64_t vma;
+	uint64_t old_code_addr;
+	uint64_t new_code_addr;
+	uint64_t code_size;
+	uint64_t code_index;
+};
+
+struct debug_entry {
+	uint64_t addr;
+	int lineno;	    /* source line number starting at 1 */
+	int discrim;	    /* column discriminator, 0 is default */
+	const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+};
+
+struct jr_code_debug_info {
+	struct jr_prefix p;
+
+	uint64_t code_addr;
+	uint64_t nr_entry;
+	struct debug_entry entries[0];
+};
+
+union jr_entry {
+        struct jr_code_debug_info info;
+        struct jr_code_close close;
+        struct jr_code_load load;
+        struct jr_code_move move;
+        struct jr_prefix prefix;
+};
+
+static inline struct debug_entry *
+debug_entry_next(struct debug_entry *ent)
+{
+	void *a = ent + 1;
+	size_t l = strlen(ent->name) + 1;
+	return a + l;
+}
+
+static inline char *
+debug_entry_file(struct debug_entry *ent)
+{
+	void *a = ent + 1;
+	return a;
+}
+
+#endif /* !JITDUMP_H */

diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index ae825d4..d01e735 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h

@@ -122,6 +122,7 @@
 
 bool kvm_exit_event(struct perf_evsel *evsel);
 bool kvm_entry_event(struct perf_evsel *evsel);
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm);
 
 #define define_exit_reasons_table(name, symbols)	\
 	static struct exit_reasons_table name[] = {	\
@@ -133,8 +134,13 @@
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
+extern const char *vcpu_id_str;
+extern const int decode_str_len;
+extern const char *kvm_exit_reason;
+extern const char *kvm_entry_trace;
+extern const char *kvm_exit_trace;
 
 #endif /* __PERF_KVM_STAT_H */

diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 2c2b443..1a3e45b 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h

@@ -180,6 +180,16 @@
 }
 
 static inline
+struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
+						   enum map_type type, const char *name,
+						   struct map **mapp,
+						   symbol_filter_t filter)
+{
+	return map_groups__find_symbol_by_name(&machine->kmaps, type, name,
+					       mapp, filter);
+}
+
+static inline
 struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
 					     struct map **mapp,
 					     symbol_filter_t filter)

diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
new file mode 100644
index 0000000..75465f8
--- /dev/null
+++ b/tools/perf/util/mem-events.c

@@ -0,0 +1,255 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
+#include "mem-events.h"
+#include "debug.h"
+#include "symbol.h"
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+	E("ldlat-loads",	"cpu/mem-loads,ldlat=30/P",	"mem-loads"),
+	E("ldlat-stores",	"cpu/mem-stores/P",		"mem-stores"),
+};
+#undef E
+
+#undef E
+
+char *perf_mem_events__name(int i)
+{
+	return (char *)perf_mem_events[i].name;
+}
+
+int perf_mem_events__parse(const char *str)
+{
+	char *tok, *saveptr = NULL;
+	bool found = false;
+	char *buf;
+	int j;
+
+	/* We need buffer that we know we can write to. */
+	buf = malloc(strlen(str) + 1);
+	if (!buf)
+		return -ENOMEM;
+
+	strcpy(buf, str);
+
+	tok = strtok_r((char *)buf, ",", &saveptr);
+
+	while (tok) {
+		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+			struct perf_mem_event *e = &perf_mem_events[j];
+
+			if (strstr(e->tag, tok))
+				e->record = found = true;
+		}
+
+		tok = strtok_r(NULL, ",", &saveptr);
+	}
+
+	free(buf);
+
+	if (found)
+		return 0;
+
+	pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
+	return -1;
+}
+
+int perf_mem_events__init(void)
+{
+	const char *mnt = sysfs__mount();
+	bool found = false;
+	int j;
+
+	if (!mnt)
+		return -ENOENT;
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		char path[PATH_MAX];
+		struct perf_mem_event *e = &perf_mem_events[j];
+		struct stat st;
+
+		scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+			  mnt, e->sysfs_name);
+
+		if (!stat(path, &st))
+			e->supported = found = true;
+	}
+
+	return found ? 0 : -ENOENT;
+}
+
+static const char * const tlb_access[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"L2",
+	"Walker",
+	"Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t l = 0, i;
+	u64 m = PERF_MEM_TLB_NA;
+	u64 hit, miss;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_dtlb;
+
+	hit = m & PERF_MEM_TLB_HIT;
+	miss = m & PERF_MEM_TLB_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+	for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, tlb_access[i]);
+	}
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const mem_lvl[] = {
+	"N/A",
+	"HIT",
+	"MISS",
+	"L1",
+	"LFB",
+	"L2",
+	"L3",
+	"Local RAM",
+	"Remote RAM (1 hop)",
+	"Remote RAM (2 hops)",
+	"Remote Cache (1 hop)",
+	"Remote Cache (2 hops)",
+	"I/O",
+	"Uncached",
+};
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m =  PERF_MEM_LVL_NA;
+	u64 hit, miss;
+
+	if (mem_info)
+		m  = mem_info->data_src.mem_lvl;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	hit = m & PERF_MEM_LVL_HIT;
+	miss = m & PERF_MEM_LVL_MISS;
+
+	/* already taken care of */
+	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
+
+	for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, mem_lvl[i]);
+	}
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+	if (hit)
+		l += scnprintf(out + l, sz - l, " hit");
+	if (miss)
+		l += scnprintf(out + l, sz - l, " miss");
+
+	return l;
+}
+
+static const char * const snoop_access[] = {
+	"N/A",
+	"None",
+	"Miss",
+	"Hit",
+	"HitM",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	size_t i, l = 0;
+	u64 m = PERF_MEM_SNOOP_NA;
+
+	sz -= 1; /* -1 for null termination */
+	out[0] = '\0';
+
+	if (mem_info)
+		m = mem_info->data_src.mem_snoop;
+
+	for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+		if (!(m & 0x1))
+			continue;
+		if (l) {
+			strcat(out, " or ");
+			l += 4;
+		}
+		l += scnprintf(out + l, sz - l, snoop_access[i]);
+	}
+
+	if (*out == '\0')
+		l += scnprintf(out, sz - l, "N/A");
+
+	return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	u64 mask = PERF_MEM_LOCK_NA;
+	int l;
+
+	if (mem_info)
+		mask = mem_info->data_src.mem_lock;
+
+	if (mask & PERF_MEM_LOCK_NA)
+		l = scnprintf(out, sz, "N/A");
+	else if (mask & PERF_MEM_LOCK_LOCKED)
+		l = scnprintf(out, sz, "Yes");
+	else
+		l = scnprintf(out, sz, "No");
+
+	return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+	int i = 0;
+
+	i += perf_mem__lvl_scnprintf(out, sz, mem_info);
+	i += scnprintf(out + i, sz - i, "|SNP ");
+	i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|TLB ");
+	i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+	i += scnprintf(out + i, sz - i, "|LCK ");
+	i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+
+	return i;
+}

diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
new file mode 100644
index 0000000..5d6d930
--- /dev/null
+++ b/tools/perf/util/mem-events.h

@@ -0,0 +1,35 @@
+#ifndef __PERF_MEM_EVENTS_H
+#define __PERF_MEM_EVENTS_H
+
+#include <stdbool.h>
+
+struct perf_mem_event {
+	bool		record;
+	bool		supported;
+	const char	*tag;
+	const char	*name;
+	const char	*sysfs_name;
+};
+
+enum {
+	PERF_MEM_EVENTS__LOAD,
+	PERF_MEM_EVENTS__STORE,
+	PERF_MEM_EVENTS__MAX,
+};
+
+extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+
+int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+char *perf_mem_events__name(int i);
+
+struct mem_info;
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
+
+#endif /* __PERF_MEM_EVENTS_H */

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 813d9b2..4c19d5e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c

@@ -279,7 +279,24 @@
 	return "unknown";
 }
 
+static int parse_events__is_name_term(struct parse_events_term *term)
+{
+	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
+}
 
+static char *get_config_name(struct list_head *head_terms)
+{
+	struct parse_events_term *term;
+
+	if (!head_terms)
+		return NULL;
+
+	list_for_each_entry(term, head_terms, list)
+		if (parse_events__is_name_term(term))
+			return term->val.str;
+
+	return NULL;
+}
 
 static struct perf_evsel *
 __add_event(struct list_head *list, int *idx,
@@ -333,11 +350,25 @@
 	return -1;
 }
 
+typedef int config_term_func_t(struct perf_event_attr *attr,
+			       struct parse_events_term *term,
+			       struct parse_events_error *err);
+static int config_term_common(struct perf_event_attr *attr,
+			      struct parse_events_term *term,
+			      struct parse_events_error *err);
+static int config_attr(struct perf_event_attr *attr,
+		       struct list_head *head,
+		       struct parse_events_error *err,
+		       config_term_func_t config_term);
+
 int parse_events_add_cache(struct list_head *list, int *idx,
-			   char *type, char *op_result1, char *op_result2)
+			   char *type, char *op_result1, char *op_result2,
+			   struct parse_events_error *err,
+			   struct list_head *head_config)
 {
 	struct perf_event_attr attr;
-	char name[MAX_NAME_LEN];
+	LIST_HEAD(config_terms);
+	char name[MAX_NAME_LEN], *config_name;
 	int cache_type = -1, cache_op = -1, cache_result = -1;
 	char *op_result[2] = { op_result1, op_result2 };
 	int i, n;
@@ -351,6 +382,7 @@
 	if (cache_type == -1)
 		return -EINVAL;
 
+	config_name = get_config_name(head_config);
 	n = snprintf(name, MAX_NAME_LEN, "%s", type);
 
 	for (i = 0; (i < 2) && (op_result[i]); i++) {
@@ -391,7 +423,16 @@
 	memset(&attr, 0, sizeof(attr));
 	attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
 	attr.type = PERF_TYPE_HW_CACHE;
-	return add_event(list, idx, &attr, name, NULL);
+
+	if (head_config) {
+		if (config_attr(&attr, head_config, err,
+				config_term_common))
+			return -EINVAL;
+
+		if (get_config_terms(head_config, &config_terms))
+			return -ENOMEM;
+	}
+	return add_event(list, idx, &attr, config_name ? : name, &config_terms);
 }
 
 static void tracepoint_error(struct parse_events_error *e, int err,
@@ -540,6 +581,7 @@
 struct __add_bpf_event_param {
 	struct parse_events_evlist *data;
 	struct list_head *list;
+	struct list_head *head_config;
 };
 
 static int add_bpf_event(struct probe_trace_event *tev, int fd,
@@ -556,7 +598,8 @@
 		 tev->group, tev->event, fd);
 
 	err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group,
-					  tev->event, evlist->error, NULL);
+					  tev->event, evlist->error,
+					  param->head_config);
 	if (err) {
 		struct perf_evsel *evsel, *tmp;
 
@@ -581,11 +624,12 @@
 
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
 			      struct list_head *list,
-			      struct bpf_object *obj)
+			      struct bpf_object *obj,
+			      struct list_head *head_config)
 {
 	int err;
 	char errbuf[BUFSIZ];
-	struct __add_bpf_event_param param = {data, list};
+	struct __add_bpf_event_param param = {data, list, head_config};
 	static bool registered_unprobe_atexit = false;
 
 	if (IS_ERR(obj) || !obj) {
@@ -631,17 +675,99 @@
 	return err;
 }
 
+static int
+parse_events_config_bpf(struct parse_events_evlist *data,
+			struct bpf_object *obj,
+			struct list_head *head_config)
+{
+	struct parse_events_term *term;
+	int error_pos;
+
+	if (!head_config || list_empty(head_config))
+		return 0;
+
+	list_for_each_entry(term, head_config, list) {
+		char errbuf[BUFSIZ];
+		int err;
+
+		if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+			snprintf(errbuf, sizeof(errbuf),
+				 "Invalid config term for BPF object");
+			errbuf[BUFSIZ - 1] = '\0';
+
+			data->error->idx = term->err_term;
+			data->error->str = strdup(errbuf);
+			return -EINVAL;
+		}
+
+		err = bpf__config_obj(obj, term, data->evlist, &error_pos);
+		if (err) {
+			bpf__strerror_config_obj(obj, term, data->evlist,
+						 &error_pos, err, errbuf,
+						 sizeof(errbuf));
+			data->error->help = strdup(
+"Hint:\tValid config terms:\n"
+"     \tmap:[<arraymap>].value<indices>=[value]\n"
+"     \tmap:[<eventmap>].event<indices>=[event]\n"
+"\n"
+"     \twhere <indices> is something like [0,3...5] or [all]\n"
+"     \t(add -v to see detail)");
+			data->error->str = strdup(errbuf);
+			if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+				data->error->idx = term->err_val;
+			else
+				data->error->idx = term->err_term + error_pos;
+			return err;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Split config terms:
+ * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ...
+ *  'call-graph=fp' is 'evt config', should be applied to each
+ *  events in bpf.c.
+ * 'map:array.value[0]=1' is 'obj config', should be processed
+ * with parse_events_config_bpf.
+ *
+ * Move object config terms from the first list to obj_head_config.
+ */
+static void
+split_bpf_config_terms(struct list_head *evt_head_config,
+		       struct list_head *obj_head_config)
+{
+	struct parse_events_term *term, *temp;
+
+	/*
+	 * Currectly, all possible user config term
+	 * belong to bpf object. parse_events__is_hardcoded_term()
+	 * happends to be a good flag.
+	 *
+	 * See parse_events_config_bpf() and
+	 * config_term_tracepoint().
+	 */
+	list_for_each_entry_safe(term, temp, evt_head_config, list)
+		if (!parse_events__is_hardcoded_term(term))
+			list_move_tail(&term->list, obj_head_config);
+}
+
 int parse_events_load_bpf(struct parse_events_evlist *data,
 			  struct list_head *list,
 			  char *bpf_file_name,
-			  bool source)
+			  bool source,
+			  struct list_head *head_config)
 {
+	int err;
 	struct bpf_object *obj;
+	LIST_HEAD(obj_head_config);
+
+	if (head_config)
+		split_bpf_config_terms(head_config, &obj_head_config);
 
 	obj = bpf__prepare_load(bpf_file_name, source);
 	if (IS_ERR(obj)) {
 		char errbuf[BUFSIZ];
-		int err;
 
 		err = PTR_ERR(obj);
 
@@ -659,7 +785,18 @@
 		return err;
 	}
 
-	return parse_events_load_bpf_obj(data, list, obj);
+	err = parse_events_load_bpf_obj(data, list, obj, head_config);
+	if (err)
+		return err;
+	err = parse_events_config_bpf(data, obj, &obj_head_config);
+
+	/*
+	 * Caller doesn't know anything about obj_head_config,
+	 * so combine them together again before returnning.
+	 */
+	if (head_config)
+		list_splice_tail(&obj_head_config, head_config);
+	return err;
 }
 
 static int
@@ -746,9 +883,59 @@
 	return -EINVAL;
 }
 
-typedef int config_term_func_t(struct perf_event_attr *attr,
-			       struct parse_events_term *term,
-			       struct parse_events_error *err);
+/*
+ * Update according to parse-events.l
+ */
+static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+	[PARSE_EVENTS__TERM_TYPE_USER]			= "<sysfs term>",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG]		= "config",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG1]		= "config1",
+	[PARSE_EVENTS__TERM_TYPE_CONFIG2]		= "config2",
+	[PARSE_EVENTS__TERM_TYPE_NAME]			= "name",
+	[PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD]		= "period",
+	[PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ]		= "freq",
+	[PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE]	= "branch_type",
+	[PARSE_EVENTS__TERM_TYPE_TIME]			= "time",
+	[PARSE_EVENTS__TERM_TYPE_CALLGRAPH]		= "call-graph",
+	[PARSE_EVENTS__TERM_TYPE_STACKSIZE]		= "stack-size",
+	[PARSE_EVENTS__TERM_TYPE_NOINHERIT]		= "no-inherit",
+	[PARSE_EVENTS__TERM_TYPE_INHERIT]		= "inherit",
+};
+
+static bool config_term_shrinked;
+
+static bool
+config_term_avail(int term_type, struct parse_events_error *err)
+{
+	if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) {
+		err->str = strdup("Invalid term_type");
+		return false;
+	}
+	if (!config_term_shrinked)
+		return true;
+
+	switch (term_type) {
+	case PARSE_EVENTS__TERM_TYPE_CONFIG:
+	case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+	case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+	case PARSE_EVENTS__TERM_TYPE_NAME:
+		return true;
+	default:
+		if (!err)
+			return false;
+
+		/* term_type is validated so indexing is safe */
+		if (asprintf(&err->str, "'%s' is not usable in 'perf stat'",
+			     config_term_names[term_type]) < 0)
+			err->str = NULL;
+		return false;
+	}
+}
+
+void parse_events__shrink_config_terms(void)
+{
+	config_term_shrinked = true;
+}
 
 static int config_term_common(struct perf_event_attr *attr,
 			      struct parse_events_term *term,
@@ -815,6 +1002,17 @@
 		return -EINVAL;
 	}
 
+	/*
+	 * Check term availbility after basic checking so
+	 * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
+	 *
+	 * If check availbility at the entry of this function,
+	 * user will see "'<sysfs term>' is not usable in 'perf stat'"
+	 * if an invalid config term is provided for legacy events
+	 * (for example, instructions/badterm/...), which is confusing.
+	 */
+	if (!config_term_avail(term->type_term, err))
+		return -EINVAL;
 	return 0;
 #undef CHECK_TYPE_VAL
 }
@@ -961,23 +1159,8 @@
 			return -ENOMEM;
 	}
 
-	return add_event(list, &data->idx, &attr, NULL, &config_terms);
-}
-
-static int parse_events__is_name_term(struct parse_events_term *term)
-{
-	return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
-}
-
-static char *pmu_event_name(struct list_head *head_terms)
-{
-	struct parse_events_term *term;
-
-	list_for_each_entry(term, head_terms, list)
-		if (parse_events__is_name_term(term))
-			return term->val.str;
-
-	return NULL;
+	return add_event(list, &data->idx, &attr,
+			 get_config_name(head_config), &config_terms);
 }
 
 int parse_events_add_pmu(struct parse_events_evlist *data,
@@ -1024,7 +1207,7 @@
 		return -EINVAL;
 
 	evsel = __add_event(list, &data->idx, &attr,
-			    pmu_event_name(head_config), pmu->cpus,
+			    get_config_name(head_config), pmu->cpus,
 			    &config_terms);
 	if (evsel) {
 		evsel->unit = info.unit;
@@ -1386,8 +1569,7 @@
 		return 0;
 	}
 
-	if (data.terms)
-		parse_events__free_terms(data.terms);
+	parse_events_terms__delete(data.terms);
 	return ret;
 }
 
@@ -1395,9 +1577,10 @@
 		 struct parse_events_error *err)
 {
 	struct parse_events_evlist data = {
-		.list  = LIST_HEAD_INIT(data.list),
-		.idx   = evlist->nr_entries,
-		.error = err,
+		.list   = LIST_HEAD_INIT(data.list),
+		.idx    = evlist->nr_entries,
+		.error  = err,
+		.evlist = evlist,
 	};
 	int ret;
 
@@ -2068,12 +2251,29 @@
 			term->err_term, term->err_val);
 }
 
-void parse_events__free_terms(struct list_head *terms)
+void parse_events_terms__purge(struct list_head *terms)
 {
 	struct parse_events_term *term, *h;
 
-	list_for_each_entry_safe(term, h, terms, list)
+	list_for_each_entry_safe(term, h, terms, list) {
+		if (term->array.nr_ranges)
+			free(term->array.ranges);
+		list_del_init(&term->list);
 		free(term);
+	}
+}
+
+void parse_events_terms__delete(struct list_head *terms)
+{
+	if (!terms)
+		return;
+	parse_events_terms__purge(terms);
+	free(terms);
+}
+
+void parse_events__clear_array(struct parse_events_array *a)
+{
+	free(a->ranges);
 }
 
 void parse_events_evlist_error(struct parse_events_evlist *data,
@@ -2088,6 +2288,33 @@
 	WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
 }
 
+static void config_terms_list(char *buf, size_t buf_sz)
+{
+	int i;
+	bool first = true;
+
+	buf[0] = '\0';
+	for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
+		const char *name = config_term_names[i];
+
+		if (!config_term_avail(i, NULL))
+			continue;
+		if (!name)
+			continue;
+		if (name[0] == '<')
+			continue;
+
+		if (strlen(buf) + strlen(name) + 2 >= buf_sz)
+			return;
+
+		if (!first)
+			strcat(buf, ",");
+		else
+			first = false;
+		strcat(buf, name);
+	}
+}
+
 /*
  * Return string contains valid config terms of an event.
  * @additional_terms: For terms such as PMU sysfs terms.
@@ -2095,17 +2322,18 @@
 char *parse_events_formats_error_string(char *additional_terms)
 {
 	char *str;
-	static const char *static_terms = "config,config1,config2,name,"
-					  "period,freq,branch_type,time,"
-					  "call-graph,stack-size\n";
+	/* "branch_type" is the longest name */
+	char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
+			  (sizeof("branch_type") - 1)];
 
+	config_terms_list(static_terms, sizeof(static_terms));
 	/* valid terms */
 	if (additional_terms) {
-		if (!asprintf(&str, "valid terms: %s,%s",
-			      additional_terms, static_terms))
+		if (asprintf(&str, "valid terms: %s,%s",
+			     additional_terms, static_terms) < 0)
 			goto fail;
 	} else {
-		if (!asprintf(&str, "valid terms: %s", static_terms))
+		if (asprintf(&str, "valid terms: %s", static_terms) < 0)
 			goto fail;
 	}
 	return str;

diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index f1a6db1..67e4930 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h

@@ -68,11 +68,21 @@
 	PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
 	PARSE_EVENTS__TERM_TYPE_STACKSIZE,
 	PARSE_EVENTS__TERM_TYPE_NOINHERIT,
-	PARSE_EVENTS__TERM_TYPE_INHERIT
+	PARSE_EVENTS__TERM_TYPE_INHERIT,
+	__PARSE_EVENTS__TERM_TYPE_NR,
+};
+
+struct parse_events_array {
+	size_t nr_ranges;
+	struct {
+		unsigned int start;
+		size_t length;
+	} *ranges;
 };
 
 struct parse_events_term {
 	char *config;
+	struct parse_events_array array;
 	union {
 		char *str;
 		u64  num;
@@ -98,12 +108,14 @@
 	int			   idx;
 	int			   nr_groups;
 	struct parse_events_error *error;
+	struct perf_evlist	  *evlist;
 };
 
 struct parse_events_terms {
 	struct list_head *terms;
 };
 
+void parse_events__shrink_config_terms(void);
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
 int parse_events_term__num(struct parse_events_term **term,
 			   int type_term, char *config, u64 num,
@@ -115,7 +127,9 @@
 			      char *config, unsigned idx);
 int parse_events_term__clone(struct parse_events_term **new,
 			     struct parse_events_term *term);
-void parse_events__free_terms(struct list_head *terms);
+void parse_events_terms__delete(struct list_head *terms);
+void parse_events_terms__purge(struct list_head *terms);
+void parse_events__clear_array(struct parse_events_array *a);
 int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
@@ -126,18 +140,22 @@
 int parse_events_load_bpf(struct parse_events_evlist *data,
 			  struct list_head *list,
 			  char *bpf_file_name,
-			  bool source);
+			  bool source,
+			  struct list_head *head_config);
 /* Provide this function for perf test */
 struct bpf_object;
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
 			      struct list_head *list,
-			      struct bpf_object *obj);
+			      struct bpf_object *obj,
+			      struct list_head *head_config);
 int parse_events_add_numeric(struct parse_events_evlist *data,
 			     struct list_head *list,
 			     u32 type, u64 config,
 			     struct list_head *head_config);
 int parse_events_add_cache(struct list_head *list, int *idx,
-			   char *type, char *op_result1, char *op_result2);
+			   char *type, char *op_result1, char *op_result2,
+			   struct parse_events_error *error,
+			   struct list_head *head_config);
 int parse_events_add_breakpoint(struct list_head *list, int *idx,
 				void *ptr, char *type, u64 len);
 int parse_events_add_pmu(struct parse_events_evlist *data,

diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 58c5831..1477fbc 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l

@@ -9,8 +9,8 @@
 %{
 #include <errno.h>
 #include "../perf.h"
-#include "parse-events-bison.h"
 #include "parse-events.h"
+#include "parse-events-bison.h"
 
 char *parse_events_get_text(yyscan_t yyscanner);
 YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
@@ -111,6 +111,7 @@
 %x mem
 %s config
 %x event
+%x array
 
 group		[^,{}/]*[{][^}]*[}][^,{}/]*
 event_pmu	[^,{}/]+[/][^/]*[/][^,{}/]*
@@ -122,7 +123,7 @@
 num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?.]*
-name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.]*
+name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
 /* If you add a modifier you need to update check_modifier() */
 modifier_event	[ukhpPGHSDI]+
 modifier_bp	[rwx]{1,3}
@@ -176,10 +177,17 @@
 
 }
 
+<array>{
+"]"			{ BEGIN(config); return ']'; }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+,			{ return ','; }
+"\.\.\."		{ return PE_ARRAY_RANGE; }
+}
+
 <config>{
 	/*
-	 * Please update parse_events_formats_error_string any time
-	 * new static term is added.
+	 * Please update config_term_names when new static term is added.
 	 */
 config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
 config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
@@ -196,6 +204,8 @@
 ,			{ return ','; }
 "/"			{ BEGIN(INITIAL); return '/'; }
 {name_minus}		{ return str(yyscanner, PE_NAME); }
+\[all\]			{ return PE_ARRAY_ALL; }
+"["			{ BEGIN(array); return '['; }
 }
 
 <mem>{
@@ -238,6 +248,7 @@
 alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
 emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
 dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+bpf-output					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
 
 	/*
 	 * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.

diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index ad37996..5be4a5f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y

@@ -28,7 +28,7 @@
 	INIT_LIST_HEAD(list);         \
 } while (0)
 
-static inc_group_count(struct list_head *list,
+static void inc_group_count(struct list_head *list,
 		       struct parse_events_evlist *data)
 {
 	/* Count groups only have more than 1 members */
@@ -48,6 +48,7 @@
 %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
 %token PE_ERROR
 %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
 %type <num> PE_VALUE
 %type <num> PE_VALUE_SYM_HW
 %type <num> PE_VALUE_SYM_SW
@@ -64,6 +65,7 @@
 %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
 %type <num> value_sym
 %type <head> event_config
+%type <head> opt_event_config
 %type <term> event_term
 %type <head> event_pmu
 %type <head> event_legacy_symbol
@@ -82,6 +84,9 @@
 %type <head> group_def
 %type <head> group
 %type <head> groups
+%type <array> array
+%type <array> array_term
+%type <array> array_terms
 
 %union
 {
@@ -93,6 +98,7 @@
 		char *sys;
 		char *event;
 	} tracepoint_name;
+	struct parse_events_array array;
 }
 %%
 
@@ -211,24 +217,14 @@
 	   event_bpf_file
 
 event_pmu:
-PE_NAME '/' event_config '/'
+PE_NAME opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_pmu(data, list, $1, $3));
-	parse_events__free_terms($3);
-	$$ = list;
-}
-|
-PE_NAME '/' '/'
-{
-	struct parse_events_evlist *data = _data;
-	struct list_head *list;
-
-	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_pmu(data, list, $1, NULL));
+	ABORT_ON(parse_events_add_pmu(data, list, $1, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 |
@@ -246,7 +242,7 @@
 
 	ALLOC_LIST(list);
 	ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
-	parse_events__free_terms(head);
+	parse_events_terms__delete(head);
 	$$ = list;
 }
 |
@@ -266,7 +262,7 @@
 
 	ALLOC_LIST(list);
 	ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
-	parse_events__free_terms(head);
+	parse_events_terms__delete(head);
 	$$ = list;
 }
 
@@ -285,7 +281,7 @@
 
 	ALLOC_LIST(list);
 	ABORT_ON(parse_events_add_numeric(data, list, type, config, $3));
-	parse_events__free_terms($3);
+	parse_events_terms__delete($3);
 	$$ = list;
 }
 |
@@ -302,33 +298,39 @@
 }
 
 event_legacy_cache:
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
 {
 	struct parse_events_evlist *data = _data;
+	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5));
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5, error, $6));
+	parse_events_terms__delete($6);
 	$$ = list;
 }
 |
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config
 {
 	struct parse_events_evlist *data = _data;
+	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL));
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL, error, $4));
+	parse_events_terms__delete($4);
 	$$ = list;
 }
 |
-PE_NAME_CACHE_TYPE
+PE_NAME_CACHE_TYPE opt_event_config
 {
 	struct parse_events_evlist *data = _data;
+	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL));
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL, error, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 
@@ -378,7 +380,7 @@
 }
 
 event_legacy_tracepoint:
-tracepoint_name
+tracepoint_name opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct parse_events_error *error = data->error;
@@ -389,24 +391,7 @@
 		error->idx = @1.first_column;
 
 	if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
-					error, NULL))
-		return -1;
-
-	$$ = list;
-}
-|
-tracepoint_name '/' event_config '/'
-{
-	struct parse_events_evlist *data = _data;
-	struct parse_events_error *error = data->error;
-	struct list_head *list;
-
-	ALLOC_LIST(list);
-	if (error)
-		error->idx = @1.first_column;
-
-	if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
-					error, $3))
+					error, $2))
 		return -1;
 
 	$$ = list;
@@ -433,49 +418,68 @@
 }
 
 event_legacy_numeric:
-PE_VALUE ':' PE_VALUE
+PE_VALUE ':' PE_VALUE opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL));
+	ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, $4));
+	parse_events_terms__delete($4);
 	$$ = list;
 }
 
 event_legacy_raw:
-PE_RAW
+PE_RAW opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL));
+	ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 
 event_bpf_file:
-PE_BPF_OBJECT
+PE_BPF_OBJECT opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct parse_events_error *error = data->error;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_load_bpf(data, list, $1, false));
+	ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 |
-PE_BPF_SOURCE
+PE_BPF_SOURCE opt_event_config
 {
 	struct parse_events_evlist *data = _data;
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_load_bpf(data, list, $1, true));
+	ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2));
+	parse_events_terms__delete($2);
 	$$ = list;
 }
 
+opt_event_config:
+'/' event_config '/'
+{
+	$$ = $2;
+}
+|
+'/' '/'
+{
+	$$ = NULL;
+}
+|
+{
+	$$ = NULL;
+}
+
 start_terms: event_config
 {
 	struct parse_events_terms *data = _data;
@@ -573,6 +577,86 @@
 	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
 	$$ = term;
 }
+|
+PE_NAME array '=' PE_NAME
+{
+	struct parse_events_term *term;
+	int i;
+
+	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $4, &@1, &@4));
+
+	term->array = $2;
+	$$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+					$1, $4, &@1, &@4));
+	term->array = $2;
+	$$ = term;
+}
+
+array:
+'[' array_terms ']'
+{
+	$$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+	$$.nr_ranges = 0;
+	$$.ranges = NULL;
+}
+
+array_terms:
+array_terms ',' array_term
+{
+	struct parse_events_array new_array;
+
+	new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+	new_array.ranges = malloc(sizeof(new_array.ranges[0]) *
+				  new_array.nr_ranges);
+	ABORT_ON(!new_array.ranges);
+	memcpy(&new_array.ranges[0], $1.ranges,
+	       $1.nr_ranges * sizeof(new_array.ranges[0]));
+	memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+	       $3.nr_ranges * sizeof(new_array.ranges[0]));
+	free($1.ranges);
+	free($3.ranges);
+	$$ = new_array;
+}
+|
+array_term
+
+array_term:
+PE_VALUE
+{
+	struct parse_events_array array;
+
+	array.nr_ranges = 1;
+	array.ranges = malloc(sizeof(array.ranges[0]));
+	ABORT_ON(!array.ranges);
+	array.ranges[0].start = $1;
+	array.ranges[0].length = 1;
+	$$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+	struct parse_events_array array;
+
+	ABORT_ON($3 < $1);
+	array.nr_ranges = 1;
+	array.ranges = malloc(sizeof(array.ranges[0]));
+	ABORT_ON(!array.ranges);
+	array.ranges[0].start = $1;
+	array.ranges[0].length = $3 - $1 + 1;
+	$$ = array;
+}
 
 sep_dc: ':' |
 

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b597bcc..adef23b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c

@@ -98,7 +98,7 @@
 	char scale[128];
 	int fd, ret = -1;
 	char path[PATH_MAX];
-	const char *lc;
+	char *lc;
 
 	snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
 
@@ -124,6 +124,17 @@
 	lc = setlocale(LC_NUMERIC, NULL);
 
 	/*
+	 * The lc string may be allocated in static storage,
+	 * so get a dynamic copy to make it survive setlocale
+	 * call below.
+	 */
+	lc = strdup(lc);
+	if (!lc) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/*
 	 * force to C locale to ensure kernel
 	 * scale string is converted correctly.
 	 * kernel uses default C locale.
@@ -135,6 +146,8 @@
 	/* restore locale */
 	setlocale(LC_NUMERIC, lc);
 
+	free(lc);
+
 	ret = 0;
 error:
 	close(fd);
@@ -153,7 +166,7 @@
 	if (fd == -1)
 		return -1;
 
-		sret = read(fd, alias->unit, UNIT_MAX_LEN);
+	sret = read(fd, alias->unit, UNIT_MAX_LEN);
 	if (sret < 0)
 		goto error;
 
@@ -284,13 +297,12 @@
 {
 	struct dirent *evt_ent;
 	DIR *event_dir;
-	int ret = 0;
 
 	event_dir = opendir(dir);
 	if (!event_dir)
 		return -EINVAL;
 
-	while (!ret && (evt_ent = readdir(event_dir))) {
+	while ((evt_ent = readdir(event_dir))) {
 		char path[PATH_MAX];
 		char *name = evt_ent->d_name;
 		FILE *file;
@@ -306,17 +318,19 @@
 
 		snprintf(path, PATH_MAX, "%s/%s", dir, name);
 
-		ret = -EINVAL;
 		file = fopen(path, "r");
-		if (!file)
-			break;
+		if (!file) {
+			pr_debug("Cannot open %s\n", path);
+			continue;
+		}
 
-		ret = perf_pmu__new_alias(head, dir, name, file);
+		if (perf_pmu__new_alias(head, dir, name, file) < 0)
+			pr_debug("Cannot set up %s\n", name);
 		fclose(file);
 	}
 
 	closedir(event_dir);
-	return ret;
+	return 0;
 }
 
 /*
@@ -354,7 +368,7 @@
 	list_for_each_entry(term, &alias->terms, list) {
 		ret = parse_events_term__clone(&cloned, term);
 		if (ret) {
-			parse_events__free_terms(&list);
+			parse_events_terms__purge(&list);
 			return ret;
 		}
 		list_add_tail(&cloned->list, &list);

diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 544509c..b3aabc0 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c

@@ -187,6 +187,9 @@
 				 const char *ev_name,
 				 struct print_arg *args)
 {
+	if (args == NULL)
+		return;
+
 	switch (args->type) {
 	case PRINT_NULL:
 		break;

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index d72fafc..fbd0524 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c

@@ -205,6 +205,9 @@
 				 const char *ev_name,
 				 struct print_arg *args)
 {
+	if (args == NULL)
+		return;
+
 	switch (args->type) {
 	case PRINT_NULL:
 		break;
@@ -1091,8 +1094,6 @@
 		goto error;
 	}
 
-	free(command_line);
-
 	set_table_handlers(tables);
 
 	if (tables->db_export_mode) {
@@ -1101,6 +1102,8 @@
 			goto error;
 	}
 
+	free(command_line);
+
 	return err;
 error:
 	Py_Finalize();

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 40b7a0d..60b3593 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c

@@ -240,14 +240,6 @@
 	return 0;
 }
 
-static int process_build_id_stub(struct perf_tool *tool __maybe_unused,
-				 union perf_event *event __maybe_unused,
-				 struct perf_session *session __maybe_unused)
-{
-	dump_printf(": unhandled!\n");
-	return 0;
-}
-
 static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
 				       union perf_event *event __maybe_unused,
 				       struct ordered_events *oe __maybe_unused)
@@ -260,23 +252,6 @@
 				  union perf_event *event,
 				  struct ordered_events *oe);
 
-static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
-				 union perf_event *event __maybe_unused,
-				 struct perf_session *perf_session
-				 __maybe_unused)
-{
-	dump_printf(": unhandled!\n");
-	return 0;
-}
-
-static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused,
-				union perf_event *event __maybe_unused,
-				struct perf_session *session __maybe_unused)
-{
-	dump_printf(": unhandled!\n");
-	return 0;
-}
-
 static int skipn(int fd, off_t n)
 {
 	char buf[4096];
@@ -303,10 +278,9 @@
 	return event->auxtrace.size;
 }
 
-static
-int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
-				      union perf_event *event __maybe_unused,
-				      struct perf_session *session __maybe_unused)
+static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
+				  union perf_event *event __maybe_unused,
+				  struct perf_session *session __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
@@ -410,7 +384,7 @@
 	if (tool->tracing_data == NULL)
 		tool->tracing_data = process_event_synth_tracing_data_stub;
 	if (tool->build_id == NULL)
-		tool->build_id = process_build_id_stub;
+		tool->build_id = process_event_op2_stub;
 	if (tool->finished_round == NULL) {
 		if (tool->ordered_events)
 			tool->finished_round = process_finished_round;
@@ -418,13 +392,13 @@
 			tool->finished_round = process_finished_round_stub;
 	}
 	if (tool->id_index == NULL)
-		tool->id_index = process_id_index_stub;
+		tool->id_index = process_event_op2_stub;
 	if (tool->auxtrace_info == NULL)
-		tool->auxtrace_info = process_event_auxtrace_info_stub;
+		tool->auxtrace_info = process_event_op2_stub;
 	if (tool->auxtrace == NULL)
 		tool->auxtrace = process_event_auxtrace_stub;
 	if (tool->auxtrace_error == NULL)
-		tool->auxtrace_error = process_event_auxtrace_error_stub;
+		tool->auxtrace_error = process_event_op2_stub;
 	if (tool->thread_map == NULL)
 		tool->thread_map = process_event_thread_map_stub;
 	if (tool->cpu_map == NULL)

diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 1833103..c8680984 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py

@@ -22,6 +22,7 @@
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
 
+src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
@@ -30,6 +31,9 @@
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
 				if len(f.strip()) > 0 and f[0] != '#']
 
+# use full paths with source files
+ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)
+
 perf = Extension('perf',
 		  sources = ext_sources,
 		  include_dirs = ['util/include'],

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index ec72234..93fa136 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c

@@ -6,6 +6,7 @@
 #include "evsel.h"
 #include "evlist.h"
 #include <traceevent/event-parse.h>
+#include "mem-events.h"
 
 regex_t		parent_regex;
 const char	default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -25,9 +26,19 @@
 int		sort__has_sym = 0;
 int		sort__has_dso = 0;
 int		sort__has_socket = 0;
+int		sort__has_thread = 0;
+int		sort__has_comm = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
 
-
+/*
+ * Replaces all occurrences of a char used with the:
+ *
+ * -t, --field-separator
+ *
+ * option, that uses a special separator character and don't pad with spaces,
+ * replacing all occurances of this separator in symbol names (and other
+ * output) with a '.' character, that thus it's the only non valid separator.
+*/
 static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 {
 	int n;
@@ -80,10 +91,21 @@
 			       width, width, comm ?: "");
 }
 
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct thread *th = arg;
+
+	if (type != HIST_FILTER__THREAD)
+		return -1;
+
+	return th && he->thread != th;
+}
+
 struct sort_entry sort_thread = {
 	.se_header	= "  Pid:Command",
 	.se_cmp		= sort__thread_cmp,
 	.se_snprintf	= hist_entry__thread_snprintf,
+	.se_filter	= hist_entry__thread_filter,
 	.se_width_idx	= HISTC_THREAD,
 };
 
@@ -121,6 +143,7 @@
 	.se_collapse	= sort__comm_collapse,
 	.se_sort	= sort__comm_sort,
 	.se_snprintf	= hist_entry__comm_snprintf,
+	.se_filter	= hist_entry__thread_filter,
 	.se_width_idx	= HISTC_COMM,
 };
 
@@ -170,10 +193,21 @@
 	return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
 }
 
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->ms.map || he->ms.map->dso != dso);
+}
+
 struct sort_entry sort_dso = {
 	.se_header	= "Shared Object",
 	.se_cmp		= sort__dso_cmp,
 	.se_snprintf	= hist_entry__dso_snprintf,
+	.se_filter	= hist_entry__dso_filter,
 	.se_width_idx	= HISTC_DSO,
 };
 
@@ -246,10 +280,8 @@
 			ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
 			ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
 					ip - map->unmap_ip(map, sym->start));
-			ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
-				       width - ret, "");
 		} else {
-			ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
+			ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
 					       width - ret,
 					       sym->name);
 		}
@@ -257,14 +289,9 @@
 		size_t len = BITS_PER_LONG / 4;
 		ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
 				       len, ip);
-		ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
-				       width - ret, "");
 	}
 
-	if (ret > width)
-		bf[width] = '\0';
-
-	return width;
+	return ret;
 }
 
 static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
@@ -274,46 +301,56 @@
 					 he->level, bf, size, width);
 }
 
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
 struct sort_entry sort_sym = {
 	.se_header	= "Symbol",
 	.se_cmp		= sort__sym_cmp,
 	.se_sort	= sort__sym_sort,
 	.se_snprintf	= hist_entry__sym_snprintf,
+	.se_filter	= hist_entry__sym_filter,
 	.se_width_idx	= HISTC_SYMBOL,
 };
 
 /* --sort srcline */
 
+static char *hist_entry__get_srcline(struct hist_entry *he)
+{
+	struct map *map = he->ms.map;
+
+	if (!map)
+		return SRCLINE_UNKNOWN;
+
+	return get_srcline(map->dso, map__rip_2objdump(map, he->ip),
+			   he->ms.sym, true);
+}
+
 static int64_t
 sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	if (!left->srcline) {
-		if (!left->ms.map)
-			left->srcline = SRCLINE_UNKNOWN;
-		else {
-			struct map *map = left->ms.map;
-			left->srcline = get_srcline(map->dso,
-					   map__rip_2objdump(map, left->ip),
-						    left->ms.sym, true);
-		}
-	}
-	if (!right->srcline) {
-		if (!right->ms.map)
-			right->srcline = SRCLINE_UNKNOWN;
-		else {
-			struct map *map = right->ms.map;
-			right->srcline = get_srcline(map->dso,
-					     map__rip_2objdump(map, right->ip),
-						     right->ms.sym, true);
-		}
-	}
+	if (!left->srcline)
+		left->srcline = hist_entry__get_srcline(left);
+	if (!right->srcline)
+		right->srcline = hist_entry__get_srcline(right);
+
 	return strcmp(right->srcline, left->srcline);
 }
 
 static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline);
+	if (!he->srcline)
+		he->srcline = hist_entry__get_srcline(he);
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
 }
 
 struct sort_entry sort_srcline = {
@@ -327,11 +364,14 @@
 
 static char no_srcfile[1];
 
-static char *get_srcfile(struct hist_entry *e)
+static char *hist_entry__get_srcfile(struct hist_entry *e)
 {
 	char *sf, *p;
 	struct map *map = e->ms.map;
 
+	if (!map)
+		return no_srcfile;
+
 	sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
 			 e->ms.sym, false, true);
 	if (!strcmp(sf, SRCLINE_UNKNOWN))
@@ -348,25 +388,21 @@
 static int64_t
 sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	if (!left->srcfile) {
-		if (!left->ms.map)
-			left->srcfile = no_srcfile;
-		else
-			left->srcfile = get_srcfile(left);
-	}
-	if (!right->srcfile) {
-		if (!right->ms.map)
-			right->srcfile = no_srcfile;
-		else
-			right->srcfile = get_srcfile(right);
-	}
+	if (!left->srcfile)
+		left->srcfile = hist_entry__get_srcfile(left);
+	if (!right->srcfile)
+		right->srcfile = hist_entry__get_srcfile(right);
+
 	return strcmp(right->srcfile, left->srcfile);
 }
 
 static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile);
+	if (!he->srcfile)
+		he->srcfile = hist_entry__get_srcfile(he);
+
+	return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile);
 }
 
 struct sort_entry sort_srcfile = {
@@ -439,10 +475,21 @@
 	return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
 }
 
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+	int sk = *(const int *)arg;
+
+	if (type != HIST_FILTER__SOCKET)
+		return -1;
+
+	return sk >= 0 && he->socket != sk;
+}
+
 struct sort_entry sort_socket = {
 	.se_header      = "Socket",
 	.se_cmp	        = sort__socket_cmp,
 	.se_snprintf    = hist_entry__socket_snprintf,
+	.se_filter      = hist_entry__socket_filter,
 	.se_width_idx	= HISTC_SOCKET,
 };
 
@@ -483,9 +530,6 @@
 	if (right->trace_output == NULL)
 		right->trace_output = get_trace_output(right);
 
-	hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output));
-	hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output));
-
 	return strcmp(right->trace_output, left->trace_output);
 }
 
@@ -496,11 +540,11 @@
 
 	evsel = hists_to_evsel(he->hists);
 	if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
-		return scnprintf(bf, size, "%-*.*s", width, width, "N/A");
+		return scnprintf(bf, size, "%-.*s", width, "N/A");
 
 	if (he->trace_output == NULL)
 		he->trace_output = get_trace_output(he);
-	return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output);
+	return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output);
 }
 
 struct sort_entry sort_trace = {
@@ -532,6 +576,18 @@
 		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->from.map ||
+		       he->branch_info->from.map->dso != dso);
+}
+
 static int64_t
 sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -552,6 +608,18 @@
 		return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+				     const void *arg)
+{
+	const struct dso *dso = arg;
+
+	if (type != HIST_FILTER__DSO)
+		return -1;
+
+	return dso && (!he->branch_info || !he->branch_info->to.map ||
+		       he->branch_info->to.map->dso != dso);
+}
+
 static int64_t
 sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -613,10 +681,35 @@
 	return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->from.sym &&
+			strstr(he->branch_info->from.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+				       const void *arg)
+{
+	const char *sym = arg;
+
+	if (type != HIST_FILTER__SYMBOL)
+		return -1;
+
+	return sym && !(he->branch_info && he->branch_info->to.sym &&
+		        strstr(he->branch_info->to.sym->name, sym));
+}
+
 struct sort_entry sort_dso_from = {
 	.se_header	= "Source Shared Object",
 	.se_cmp		= sort__dso_from_cmp,
 	.se_snprintf	= hist_entry__dso_from_snprintf,
+	.se_filter	= hist_entry__dso_from_filter,
 	.se_width_idx	= HISTC_DSO_FROM,
 };
 
@@ -624,6 +717,7 @@
 	.se_header	= "Target Shared Object",
 	.se_cmp		= sort__dso_to_cmp,
 	.se_snprintf	= hist_entry__dso_to_snprintf,
+	.se_filter	= hist_entry__dso_to_filter,
 	.se_width_idx	= HISTC_DSO_TO,
 };
 
@@ -631,6 +725,7 @@
 	.se_header	= "Source Symbol",
 	.se_cmp		= sort__sym_from_cmp,
 	.se_snprintf	= hist_entry__sym_from_snprintf,
+	.se_filter	= hist_entry__sym_from_filter,
 	.se_width_idx	= HISTC_SYMBOL_FROM,
 };
 
@@ -638,6 +733,7 @@
 	.se_header	= "Target Symbol",
 	.se_cmp		= sort__sym_to_cmp,
 	.se_snprintf	= hist_entry__sym_to_snprintf,
+	.se_filter	= hist_entry__sym_to_filter,
 	.se_width_idx	= HISTC_SYMBOL_TO,
 };
 
@@ -797,20 +893,10 @@
 static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
-	const char *out;
-	u64 mask = PERF_MEM_LOCK_NA;
+	char out[10];
 
-	if (he->mem_info)
-		mask = he->mem_info->data_src.mem_lock;
-
-	if (mask & PERF_MEM_LOCK_NA)
-		out = "N/A";
-	else if (mask & PERF_MEM_LOCK_LOCKED)
-		out = "Yes";
-	else
-		out = "No";
-
-	return repsep_snprintf(bf, size, "%-*s", width, out);
+	perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
+	return repsep_snprintf(bf, size, "%.*s", width, out);
 }
 
 static int64_t
@@ -832,54 +918,12 @@
 	return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
 }
 
-static const char * const tlb_access[] = {
-	"N/A",
-	"HIT",
-	"MISS",
-	"L1",
-	"L2",
-	"Walker",
-	"Fault",
-};
-#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *))
-
 static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t l = 0, i;
-	u64 m = PERF_MEM_TLB_NA;
-	u64 hit, miss;
 
-	out[0] = '\0';
-
-	if (he->mem_info)
-		m = he->mem_info->data_src.mem_dtlb;
-
-	hit = m & PERF_MEM_TLB_HIT;
-	miss = m & PERF_MEM_TLB_MISS;
-
-	/* already taken care of */
-	m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
-
-	for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, tlb_access[i], sz - l);
-		l += strlen(tlb_access[i]);
-	}
-	if (*out == '\0')
-		strcpy(out, "N/A");
-	if (hit)
-		strncat(out, " hit", sz - l);
-	if (miss)
-		strncat(out, " miss", sz - l);
-
+	perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -902,61 +946,12 @@
 	return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
 }
 
-static const char * const mem_lvl[] = {
-	"N/A",
-	"HIT",
-	"MISS",
-	"L1",
-	"LFB",
-	"L2",
-	"L3",
-	"Local RAM",
-	"Remote RAM (1 hop)",
-	"Remote RAM (2 hops)",
-	"Remote Cache (1 hop)",
-	"Remote Cache (2 hops)",
-	"I/O",
-	"Uncached",
-};
-#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *))
-
 static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t i, l = 0;
-	u64 m =  PERF_MEM_LVL_NA;
-	u64 hit, miss;
 
-	if (he->mem_info)
-		m  = he->mem_info->data_src.mem_lvl;
-
-	out[0] = '\0';
-
-	hit = m & PERF_MEM_LVL_HIT;
-	miss = m & PERF_MEM_LVL_MISS;
-
-	/* already taken care of */
-	m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
-
-	for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, mem_lvl[i], sz - l);
-		l += strlen(mem_lvl[i]);
-	}
-	if (*out == '\0')
-		strcpy(out, "N/A");
-	if (hit)
-		strncat(out, " hit", sz - l);
-	if (miss)
-		strncat(out, " miss", sz - l);
-
+	perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -979,51 +974,15 @@
 	return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
 }
 
-static const char * const snoop_access[] = {
-	"N/A",
-	"None",
-	"Miss",
-	"Hit",
-	"HitM",
-};
-#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *))
-
 static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
-	size_t sz = sizeof(out) - 1; /* -1 for null termination */
-	size_t i, l = 0;
-	u64 m = PERF_MEM_SNOOP_NA;
 
-	out[0] = '\0';
-
-	if (he->mem_info)
-		m = he->mem_info->data_src.mem_snoop;
-
-	for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) {
-		if (!(m & 0x1))
-			continue;
-		if (l) {
-			strcat(out, " or ");
-			l += 4;
-		}
-		strncat(out, snoop_access[i], sz - l);
-		l += strlen(snoop_access[i]);
-	}
-
-	if (*out == '\0')
-		strcpy(out, "N/A");
-
+	perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
-static inline  u64 cl_address(u64 address)
-{
-	/* return the cacheline of the address */
-	return (address & ~(cacheline_size - 1));
-}
-
 static int64_t
 sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -1440,20 +1399,6 @@
 	struct sort_entry *se;
 };
 
-bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
-{
-	struct hpp_sort_entry *hse_a;
-	struct hpp_sort_entry *hse_b;
-
-	if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
-		return false;
-
-	hse_a = container_of(a, struct hpp_sort_entry, hpp);
-	hse_b = container_of(b, struct hpp_sort_entry, hpp);
-
-	return hse_a->se == hse_b->se;
-}
-
 void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 {
 	struct hpp_sort_entry *hse;
@@ -1539,8 +1484,56 @@
 	return sort_fn(a, b);
 }
 
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+{
+	return format->header == __sort__hpp_header;
+}
+
+#define MK_SORT_ENTRY_CHK(key)					\
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)	\
+{								\
+	struct hpp_sort_entry *hse;				\
+								\
+	if (!perf_hpp__is_sort_entry(fmt))			\
+		return false;					\
+								\
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);	\
+	return hse->se == &sort_ ## key ;			\
+}
+
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
+
+
+static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct hpp_sort_entry *hse_a;
+	struct hpp_sort_entry *hse_b;
+
+	if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
+		return false;
+
+	hse_a = container_of(a, struct hpp_sort_entry, hpp);
+	hse_b = container_of(b, struct hpp_sort_entry, hpp);
+
+	return hse_a->se == hse_b->se;
+}
+
+static void hse_free(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_sort_entry *hse;
+
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);
+	free(hse);
+}
+
 static struct hpp_sort_entry *
-__sort_dimension__alloc_hpp(struct sort_dimension *sd)
+__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level)
 {
 	struct hpp_sort_entry *hse;
 
@@ -1560,40 +1553,92 @@
 	hse->hpp.cmp = __sort__hpp_cmp;
 	hse->hpp.collapse = __sort__hpp_collapse;
 	hse->hpp.sort = __sort__hpp_sort;
+	hse->hpp.equal = __sort__hpp_equal;
+	hse->hpp.free = hse_free;
 
 	INIT_LIST_HEAD(&hse->hpp.list);
 	INIT_LIST_HEAD(&hse->hpp.sort_list);
 	hse->hpp.elide = false;
 	hse->hpp.len = 0;
 	hse->hpp.user_len = 0;
+	hse->hpp.level = level;
 
 	return hse;
 }
 
-bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+static void hpp_free(struct perf_hpp_fmt *fmt)
 {
-	return format->header == __sort__hpp_header;
+	free(fmt);
 }
 
-static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd)
+static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd,
+						       int level)
 {
-	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
+	struct perf_hpp_fmt *fmt;
+
+	fmt = memdup(hd->fmt, sizeof(*fmt));
+	if (fmt) {
+		INIT_LIST_HEAD(&fmt->list);
+		INIT_LIST_HEAD(&fmt->sort_list);
+		fmt->free = hpp_free;
+		fmt->level = level;
+	}
+
+	return fmt;
+}
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+	struct perf_hpp_fmt *fmt;
+	struct hpp_sort_entry *hse;
+	int ret = -1;
+	int r;
+
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_filter == NULL)
+			continue;
+
+		/*
+		 * hist entry is filtered if any of sort key in the hpp list
+		 * is applied.  But it should skip non-matched filter types.
+		 */
+		r = hse->se->se_filter(he, type, arg);
+		if (r >= 0) {
+			if (ret < 0)
+				ret = 0;
+			ret |= r;
+		}
+	}
+
+	return ret;
+}
+
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+					  struct perf_hpp_list *list,
+					  int level)
+{
+	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
 
 	if (hse == NULL)
 		return -1;
 
-	perf_hpp__register_sort_field(&hse->hpp);
+	perf_hpp_list__register_sort_field(list, &hse->hpp);
 	return 0;
 }
 
-static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+					    struct perf_hpp_list *list)
 {
-	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
+	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
 
 	if (hse == NULL)
 		return -1;
 
-	perf_hpp__column_register(&hse->hpp);
+	perf_hpp_list__column_register(list, &hse->hpp);
 	return 0;
 }
 
@@ -1727,6 +1772,9 @@
 	if (hde->raw_trace)
 		goto raw_field;
 
+	if (!he->trace_output)
+		he->trace_output = get_trace_output(he);
+
 	field = hde->field;
 	namelen = strlen(field->name);
 	str = he->trace_output;
@@ -1776,6 +1824,11 @@
 
 	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
 
+	if (b == NULL) {
+		update_dynamic_len(hde, a);
+		return 0;
+	}
+
 	field = hde->field;
 	if (field->flags & FIELD_IS_DYNAMIC) {
 		unsigned long long dyn;
@@ -1790,9 +1843,6 @@
 	} else {
 		offset = field->offset;
 		size = field->size;
-
-		update_dynamic_len(hde, a);
-		update_dynamic_len(hde, b);
 	}
 
 	return memcmp(a->raw_data + offset, b->raw_data + offset, size);
@@ -1803,8 +1853,31 @@
 	return fmt->cmp == __sort__hde_cmp;
 }
 
+static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+	struct hpp_dynamic_entry *hde_a;
+	struct hpp_dynamic_entry *hde_b;
+
+	if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b))
+		return false;
+
+	hde_a = container_of(a, struct hpp_dynamic_entry, hpp);
+	hde_b = container_of(b, struct hpp_dynamic_entry, hpp);
+
+	return hde_a->field == hde_b->field;
+}
+
+static void hde_free(struct perf_hpp_fmt *fmt)
+{
+	struct hpp_dynamic_entry *hde;
+
+	hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+	free(hde);
+}
+
 static struct hpp_dynamic_entry *
-__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field,
+		      int level)
 {
 	struct hpp_dynamic_entry *hde;
 
@@ -1827,16 +1900,47 @@
 	hde->hpp.cmp = __sort__hde_cmp;
 	hde->hpp.collapse = __sort__hde_cmp;
 	hde->hpp.sort = __sort__hde_cmp;
+	hde->hpp.equal = __sort__hde_equal;
+	hde->hpp.free = hde_free;
 
 	INIT_LIST_HEAD(&hde->hpp.list);
 	INIT_LIST_HEAD(&hde->hpp.sort_list);
 	hde->hpp.elide = false;
 	hde->hpp.len = 0;
 	hde->hpp.user_len = 0;
+	hde->hpp.level = level;
 
 	return hde;
 }
 
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
+{
+	struct perf_hpp_fmt *new_fmt = NULL;
+
+	if (perf_hpp__is_sort_entry(fmt)) {
+		struct hpp_sort_entry *hse, *new_hse;
+
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		new_hse = memdup(hse, sizeof(*hse));
+		if (new_hse)
+			new_fmt = &new_hse->hpp;
+	} else if (perf_hpp__is_dynamic_entry(fmt)) {
+		struct hpp_dynamic_entry *hde, *new_hde;
+
+		hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+		new_hde = memdup(hde, sizeof(*hde));
+		if (new_hde)
+			new_fmt = &new_hde->hpp;
+	} else {
+		new_fmt = memdup(fmt, sizeof(*fmt));
+	}
+
+	INIT_LIST_HEAD(&new_fmt->list);
+	INIT_LIST_HEAD(&new_fmt->sort_list);
+
+	return new_fmt;
+}
+
 static int parse_field_name(char *str, char **event, char **field, char **opt)
 {
 	char *event_name, *field_name, *opt_name;
@@ -1908,11 +2012,11 @@
 
 static int __dynamic_dimension__add(struct perf_evsel *evsel,
 				    struct format_field *field,
-				    bool raw_trace)
+				    bool raw_trace, int level)
 {
 	struct hpp_dynamic_entry *hde;
 
-	hde = __alloc_dynamic_entry(evsel, field);
+	hde = __alloc_dynamic_entry(evsel, field, level);
 	if (hde == NULL)
 		return -ENOMEM;
 
@@ -1922,14 +2026,14 @@
 	return 0;
 }
 
-static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
+static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level)
 {
 	int ret;
 	struct format_field *field;
 
 	field = evsel->tp_format->format.fields;
 	while (field) {
-		ret = __dynamic_dimension__add(evsel, field, raw_trace);
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
 		if (ret < 0)
 			return ret;
 
@@ -1938,7 +2042,8 @@
 	return 0;
 }
 
-static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
+static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace,
+				  int level)
 {
 	int ret;
 	struct perf_evsel *evsel;
@@ -1947,7 +2052,7 @@
 		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
-		ret = add_evsel_fields(evsel, raw_trace);
+		ret = add_evsel_fields(evsel, raw_trace, level);
 		if (ret < 0)
 			return ret;
 	}
@@ -1955,7 +2060,7 @@
 }
 
 static int add_all_matching_fields(struct perf_evlist *evlist,
-				   char *field_name, bool raw_trace)
+				   char *field_name, bool raw_trace, int level)
 {
 	int ret = -ESRCH;
 	struct perf_evsel *evsel;
@@ -1969,14 +2074,15 @@
 		if (field == NULL)
 			continue;
 
-		ret = __dynamic_dimension__add(evsel, field, raw_trace);
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
 		if (ret < 0)
 			break;
 	}
 	return ret;
 }
 
-static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
+			     int level)
 {
 	char *str, *event_name, *field_name, *opt_name;
 	struct perf_evsel *evsel;
@@ -2006,12 +2112,12 @@
 	}
 
 	if (!strcmp(field_name, "trace_fields")) {
-		ret = add_all_dynamic_fields(evlist, raw_trace);
+		ret = add_all_dynamic_fields(evlist, raw_trace, level);
 		goto out;
 	}
 
 	if (event_name == NULL) {
-		ret = add_all_matching_fields(evlist, field_name, raw_trace);
+		ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
 		goto out;
 	}
 
@@ -2029,7 +2135,7 @@
 	}
 
 	if (!strcmp(field_name, "*")) {
-		ret = add_evsel_fields(evsel, raw_trace);
+		ret = add_evsel_fields(evsel, raw_trace, level);
 	} else {
 		field = pevent_find_any_field(evsel->tp_format, field_name);
 		if (field == NULL) {
@@ -2038,7 +2144,7 @@
 			return -ENOENT;
 		}
 
-		ret = __dynamic_dimension__add(evsel, field, raw_trace);
+		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
 	}
 
 out:
@@ -2046,12 +2152,14 @@
 	return ret;
 }
 
-static int __sort_dimension__add(struct sort_dimension *sd)
+static int __sort_dimension__add(struct sort_dimension *sd,
+				 struct perf_hpp_list *list,
+				 int level)
 {
 	if (sd->taken)
 		return 0;
 
-	if (__sort_dimension__add_hpp_sort(sd) < 0)
+	if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
 		return -1;
 
 	if (sd->entry->se_collapse)
@@ -2062,46 +2170,63 @@
 	return 0;
 }
 
-static int __hpp_dimension__add(struct hpp_dimension *hd)
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+				struct perf_hpp_list *list,
+				int level)
 {
-	if (!hd->taken) {
-		hd->taken = 1;
+	struct perf_hpp_fmt *fmt;
 
-		perf_hpp__register_sort_field(hd->fmt);
-	}
+	if (hd->taken)
+		return 0;
+
+	fmt = __hpp_dimension__alloc_hpp(hd, level);
+	if (!fmt)
+		return -1;
+
+	hd->taken = 1;
+	perf_hpp_list__register_sort_field(list, fmt);
 	return 0;
 }
 
-static int __sort_dimension__add_output(struct sort_dimension *sd)
+static int __sort_dimension__add_output(struct perf_hpp_list *list,
+					struct sort_dimension *sd)
 {
 	if (sd->taken)
 		return 0;
 
-	if (__sort_dimension__add_hpp_output(sd) < 0)
+	if (__sort_dimension__add_hpp_output(sd, list) < 0)
 		return -1;
 
 	sd->taken = 1;
 	return 0;
 }
 
-static int __hpp_dimension__add_output(struct hpp_dimension *hd)
+static int __hpp_dimension__add_output(struct perf_hpp_list *list,
+				       struct hpp_dimension *hd)
 {
-	if (!hd->taken) {
-		hd->taken = 1;
+	struct perf_hpp_fmt *fmt;
 
-		perf_hpp__column_register(hd->fmt);
-	}
+	if (hd->taken)
+		return 0;
+
+	fmt = __hpp_dimension__alloc_hpp(hd, 0);
+	if (!fmt)
+		return -1;
+
+	hd->taken = 1;
+	perf_hpp_list__column_register(list, fmt);
 	return 0;
 }
 
 int hpp_dimension__add_output(unsigned col)
 {
 	BUG_ON(col >= PERF_HPP__MAX_INDEX);
-	return __hpp_dimension__add_output(&hpp_sort_dimensions[col]);
+	return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
 }
 
-static int sort_dimension__add(const char *tok,
-			       struct perf_evlist *evlist __maybe_unused)
+static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			       struct perf_evlist *evlist __maybe_unused,
+			       int level)
 {
 	unsigned int i;
 
@@ -2136,9 +2261,13 @@
 			sort__has_dso = 1;
 		} else if (sd->entry == &sort_socket) {
 			sort__has_socket = 1;
+		} else if (sd->entry == &sort_thread) {
+			sort__has_thread = 1;
+		} else if (sd->entry == &sort_comm) {
+			sort__has_comm = 1;
 		}
 
-		return __sort_dimension__add(sd);
+		return __sort_dimension__add(sd, list, level);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2147,7 +2276,7 @@
 		if (strncasecmp(tok, hd->name, strlen(tok)))
 			continue;
 
-		return __hpp_dimension__add(hd);
+		return __hpp_dimension__add(hd, list, level);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2162,7 +2291,7 @@
 		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
 			sort__has_sym = 1;
 
-		__sort_dimension__add(sd);
+		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
 
@@ -2178,16 +2307,60 @@
 		if (sd->entry == &sort_mem_daddr_sym)
 			sort__has_sym = 1;
 
-		__sort_dimension__add(sd);
+		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
 
-	if (!add_dynamic_entry(evlist, tok))
+	if (!add_dynamic_entry(evlist, tok, level))
 		return 0;
 
 	return -ESRCH;
 }
 
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+			   struct perf_evlist *evlist)
+{
+	char *tmp, *tok;
+	int ret = 0;
+	int level = 0;
+	int next_level = 1;
+	bool in_group = false;
+
+	do {
+		tok = str;
+		tmp = strpbrk(str, "{}, ");
+		if (tmp) {
+			if (in_group)
+				next_level = level;
+			else
+				next_level = level + 1;
+
+			if (*tmp == '{')
+				in_group = true;
+			else if (*tmp == '}')
+				in_group = false;
+
+			*tmp = '\0';
+			str = tmp + 1;
+		}
+
+		if (*tok) {
+			ret = sort_dimension__add(list, tok, evlist, level);
+			if (ret == -EINVAL) {
+				error("Invalid --sort key: `%s'", tok);
+				break;
+			} else if (ret == -ESRCH) {
+				error("Unknown --sort key: `%s'", tok);
+				break;
+			}
+		}
+
+		level = next_level;
+	} while (tmp);
+
+	return ret;
+}
+
 static const char *get_default_sort_order(struct perf_evlist *evlist)
 {
 	const char *default_sort_orders[] = {
@@ -2282,7 +2455,7 @@
 
 static int __setup_sorting(struct perf_evlist *evlist)
 {
-	char *tmp, *tok, *str;
+	char *str;
 	const char *sort_keys;
 	int ret = 0;
 
@@ -2320,17 +2493,7 @@
 		}
 	}
 
-	for (tok = strtok_r(str, ", ", &tmp);
-			tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		ret = sort_dimension__add(tok, evlist);
-		if (ret == -EINVAL) {
-			error("Invalid --sort key: `%s'", tok);
-			break;
-		} else if (ret == -ESRCH) {
-			error("Unknown --sort key: `%s'", tok);
-			break;
-		}
-	}
+	ret = setup_sort_list(&perf_hpp_list, str, evlist);
 
 	free(str);
 	return ret;
@@ -2341,7 +2504,7 @@
 	struct perf_hpp_fmt *fmt;
 	struct hpp_sort_entry *hse;
 
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
@@ -2401,7 +2564,7 @@
 	struct perf_hpp_fmt *fmt;
 	struct hpp_sort_entry *hse;
 
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
@@ -2413,7 +2576,7 @@
 	 * It makes no sense to elide all of sort entries.
 	 * Just revert them to show up again.
 	 */
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
@@ -2421,7 +2584,7 @@
 			return;
 	}
 
-	perf_hpp__for_each_format(fmt) {
+	perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
 		if (!perf_hpp__is_sort_entry(fmt))
 			continue;
 
@@ -2429,7 +2592,7 @@
 	}
 }
 
-static int output_field_add(char *tok)
+static int output_field_add(struct perf_hpp_list *list, char *tok)
 {
 	unsigned int i;
 
@@ -2439,7 +2602,7 @@
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
-		return __sort_dimension__add_output(sd);
+		return __sort_dimension__add_output(list, sd);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2448,7 +2611,7 @@
 		if (strncasecmp(tok, hd->name, strlen(tok)))
 			continue;
 
-		return __hpp_dimension__add_output(hd);
+		return __hpp_dimension__add_output(list, hd);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2457,7 +2620,7 @@
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
-		return __sort_dimension__add_output(sd);
+		return __sort_dimension__add_output(list, sd);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
@@ -2466,12 +2629,32 @@
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
-		return __sort_dimension__add_output(sd);
+		return __sort_dimension__add_output(list, sd);
 	}
 
 	return -ESRCH;
 }
 
+static int setup_output_list(struct perf_hpp_list *list, char *str)
+{
+	char *tmp, *tok;
+	int ret = 0;
+
+	for (tok = strtok_r(str, ", ", &tmp);
+			tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		ret = output_field_add(list, tok);
+		if (ret == -EINVAL) {
+			error("Invalid --fields key: `%s'", tok);
+			break;
+		} else if (ret == -ESRCH) {
+			error("Unknown --fields key: `%s'", tok);
+			break;
+		}
+	}
+
+	return ret;
+}
+
 static void reset_dimensions(void)
 {
 	unsigned int i;
@@ -2496,7 +2679,7 @@
 
 static int __setup_output_field(void)
 {
-	char *tmp, *tok, *str, *strp;
+	char *str, *strp;
 	int ret = -EINVAL;
 
 	if (field_order == NULL)
@@ -2516,17 +2699,7 @@
 		goto out;
 	}
 
-	for (tok = strtok_r(strp, ", ", &tmp);
-			tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		ret = output_field_add(tok);
-		if (ret == -EINVAL) {
-			error("Invalid --fields key: `%s'", tok);
-			break;
-		} else if (ret == -ESRCH) {
-			error("Unknown --fields key: `%s'", tok);
-			break;
-		}
-	}
+	ret = setup_output_list(&perf_hpp_list, strp);
 
 out:
 	free(str);
@@ -2542,7 +2715,7 @@
 		return err;
 
 	if (parent_pattern != default_parent_pattern) {
-		err = sort_dimension__add("parent", evlist);
+		err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
 		if (err < 0)
 			return err;
 	}
@@ -2560,9 +2733,13 @@
 		return err;
 
 	/* copy sort keys to output fields */
-	perf_hpp__setup_output_field();
+	perf_hpp__setup_output_field(&perf_hpp_list);
 	/* and then copy output fields to sort keys */
-	perf_hpp__append_sort_keys();
+	perf_hpp__append_sort_keys(&perf_hpp_list);
+
+	/* setup hists-specific output fields */
+	if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0)
+		return -1;
 
 	return 0;
 }
@@ -2578,5 +2755,5 @@
 	sort_order = NULL;
 
 	reset_dimensions();
-	perf_hpp__reset_output_field();
+	perf_hpp__reset_output_field(&perf_hpp_list);
 }

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 687bbb12..3f4e359 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h

@@ -32,9 +32,12 @@
 extern regex_t ignore_callees_regex;
 extern int have_ignore_callees;
 extern int sort__need_collapse;
+extern int sort__has_dso;
 extern int sort__has_parent;
 extern int sort__has_sym;
 extern int sort__has_socket;
+extern int sort__has_thread;
+extern int sort__has_comm;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
@@ -94,9 +97,11 @@
 	s32			socket;
 	s32			cpu;
 	u8			cpumode;
+	u8			depth;
 
 	/* We are added by hists__add_dummy_entry. */
 	bool			dummy;
+	bool			leaf;
 
 	char			level;
 	u8			filtered;
@@ -113,18 +118,28 @@
 			bool	init_have_children;
 			bool	unfolded;
 			bool	has_children;
+			bool	has_no_entry;
 		};
 	};
 	char			*srcline;
 	char			*srcfile;
 	struct symbol		*parent;
-	struct rb_root		sorted_chain;
 	struct branch_info	*branch_info;
 	struct hists		*hists;
 	struct mem_info		*mem_info;
 	void			*raw_data;
 	u32			raw_size;
 	void			*trace_output;
+	struct perf_hpp_list	*hpp_list;
+	struct hist_entry	*parent_he;
+	union {
+		/* this is for hierarchical entry structure */
+		struct {
+			struct rb_root	hroot_in;
+			struct rb_root  hroot_out;
+		};				/* non-leaf entries */
+		struct rb_root	sorted_chain;	/* leaf entry has callchains */
+	};
 	struct callchain_root	callchain[0]; /* must be last member */
 };
 
@@ -160,6 +175,17 @@
 	return period * 100.0 / total_period;
 }
 
+static inline u64 cl_address(u64 address)
+{
+	/* return the cacheline of the address */
+	return (address & ~(cacheline_size - 1));
+}
+
+static inline u64 cl_offset(u64 address)
+{
+	/* return the cacheline of the address */
+	return (address & (cacheline_size - 1));
+}
 
 enum sort_mode {
 	SORT_MODE__NORMAL,
@@ -221,6 +247,7 @@
 	int64_t	(*se_sort)(struct hist_entry *, struct hist_entry *);
 	int	(*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
 			       unsigned int width);
+	int	(*se_filter)(struct hist_entry *he, int type, const void *arg);
 	u8	se_width_idx;
 };
 

diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 6ac0314..b33ffb2 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c

@@ -2,6 +2,7 @@
 #include "evsel.h"
 #include "stat.h"
 #include "color.h"
+#include "pmu.h"
 
 enum {
 	CTX_BIT_USER	= 1 << 0,
@@ -14,6 +15,13 @@
 
 #define NUM_CTX CTX_BIT_MAX
 
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
@@ -28,9 +36,15 @@
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static bool have_frontend_stalled;
 
 struct stats walltime_nsecs_stats;
 
+void perf_stat__init_shadow_stats(void)
+{
+	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
+}
+
 static int evsel_context(struct perf_evsel *evsel)
 {
 	int ctx = 0;
@@ -137,9 +151,10 @@
 	return color;
 }
 
-static void print_stalled_cycles_frontend(FILE *out, int cpu,
+static void print_stalled_cycles_frontend(int cpu,
 					  struct perf_evsel *evsel
-					  __maybe_unused, double avg)
+					  __maybe_unused, double avg,
+					  struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -152,14 +167,17 @@
 
 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " frontend cycles idle   ");
+	if (ratio)
+		out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
+				  ratio);
+	else
+		out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
 }
 
-static void print_stalled_cycles_backend(FILE *out, int cpu,
+static void print_stalled_cycles_backend(int cpu,
 					 struct perf_evsel *evsel
-					 __maybe_unused, double avg)
+					 __maybe_unused, double avg,
+					 struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -172,14 +190,13 @@
 
 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " backend  cycles idle   ");
+	out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
 }
 
-static void print_branch_misses(FILE *out, int cpu,
+static void print_branch_misses(int cpu,
 				struct perf_evsel *evsel __maybe_unused,
-				double avg)
+				double avg,
+				struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -192,14 +209,13 @@
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all branches        ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
 }
 
-static void print_l1_dcache_misses(FILE *out, int cpu,
+static void print_l1_dcache_misses(int cpu,
 				   struct perf_evsel *evsel __maybe_unused,
-				   double avg)
+				   double avg,
+				   struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -212,14 +228,13 @@
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all L1-dcache hits  ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
 }
 
-static void print_l1_icache_misses(FILE *out, int cpu,
+static void print_l1_icache_misses(int cpu,
 				   struct perf_evsel *evsel __maybe_unused,
-				   double avg)
+				   double avg,
+				   struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -231,15 +246,13 @@
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all L1-icache hits  ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
 }
 
-static void print_dtlb_cache_misses(FILE *out, int cpu,
+static void print_dtlb_cache_misses(int cpu,
 				    struct perf_evsel *evsel __maybe_unused,
-				    double avg)
+				    double avg,
+				    struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -251,15 +264,13 @@
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all dTLB cache hits ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
 }
 
-static void print_itlb_cache_misses(FILE *out, int cpu,
+static void print_itlb_cache_misses(int cpu,
 				    struct perf_evsel *evsel __maybe_unused,
-				    double avg)
+				    double avg,
+				    struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -271,15 +282,13 @@
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all iTLB cache hits ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
 }
 
-static void print_ll_cache_misses(FILE *out, int cpu,
+static void print_ll_cache_misses(int cpu,
 				  struct perf_evsel *evsel __maybe_unused,
-				  double avg)
+				  double avg,
+				  struct perf_stat_output_ctx *out)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -291,15 +300,15 @@
 		ratio = avg / total * 100.0;
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all LL-cache hits   ");
+	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 }
 
-void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-				   double avg, int cpu, enum aggr_mode aggr)
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+				   double avg, int cpu,
+				   struct perf_stat_output_ctx *out)
 {
+	void *ctxp = out->ctx;
+	print_metric_t print_metric = out->print_metric;
 	double total, ratio = 0.0, total2;
 	int ctx = evsel_context(evsel);
 
@@ -307,119 +316,145 @@
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total) {
 			ratio = avg / total;
-			fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
+			print_metric(ctxp, NULL, "%7.2f ",
+					"insn per cycle", ratio);
 		} else {
-			fprintf(out, "                                   ");
+			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
 		}
 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
 		if (total && avg) {
+			out->new_line(ctxp);
 			ratio = total / avg;
-			fprintf(out, "\n");
-			if (aggr == AGGR_NONE)
-				fprintf(out, "        ");
-			fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
+			print_metric(ctxp, NULL, "%7.2f ",
+					"stalled cycles per insn",
+					ratio);
+		} else if (have_frontend_stalled) {
+			print_metric(ctxp, NULL, NULL,
+				     "stalled cycles per insn", 0);
 		}
-
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-			runtime_branches_stats[ctx][cpu].n != 0) {
-		print_branch_misses(out, cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+		if (runtime_branches_stats[ctx][cpu].n != 0)
+			print_branch_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all branches", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
-		print_l1_dcache_misses(out, cpu, evsel, avg);
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
+			print_l1_dcache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_icache_stats[ctx][cpu].n != 0) {
-		print_l1_icache_misses(out, cpu, evsel, avg);
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_l1_icache_stats[ctx][cpu].n != 0)
+			print_l1_icache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
-		print_dtlb_cache_misses(out, cpu, evsel, avg);
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
+			print_dtlb_cache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
-		print_itlb_cache_misses(out, cpu, evsel, avg);
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
+			print_itlb_cache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_ll_cache_stats[ctx][cpu].n != 0) {
-		print_ll_cache_misses(out, cpu, evsel, avg);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-			runtime_cacherefs_stats[ctx][cpu].n != 0) {
+					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+		if (runtime_ll_cache_stats[ctx][cpu].n != 0)
+			print_ll_cache_misses(cpu, evsel, avg, out);
+		else
+			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
 		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
 
 		if (total)
 			ratio = avg * 100 / total;
 
-		fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
-
+		if (runtime_cacherefs_stats[ctx][cpu].n != 0)
+			print_metric(ctxp, NULL, "%8.3f %%",
+				     "of all cache refs", ratio);
+		else
+			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(out, cpu, evsel, avg);
+		print_stalled_cycles_frontend(cpu, evsel, avg, out);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(out, cpu, evsel, avg);
+		print_stalled_cycles_backend(cpu, evsel, avg, out);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
 		if (total) {
 			ratio = avg / total;
-			fprintf(out, " # %8.3f GHz                    ", ratio);
+			print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
 		} else {
-			fprintf(out, "                                   ");
+			print_metric(ctxp, NULL, NULL, "Ghz", 0);
 		}
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total)
-			fprintf(out,
-				" #   %5.2f%% transactional cycles   ",
-				100.0 * (avg / total));
+			print_metric(ctxp, NULL,
+					"%7.2f%%", "transactional cycles",
+					100.0 * (avg / total));
+		else
+			print_metric(ctxp, NULL, NULL, "transactional cycles",
+				     0);
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 		if (total2 < avg)
 			total2 = avg;
 		if (total)
-			fprintf(out,
-				" #   %5.2f%% aborted cycles         ",
+			print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
 				100.0 * ((total2-avg) / total));
-	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
-		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		else
+			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
+	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
 		if (avg)
 			ratio = total / avg;
 
-		fprintf(out, " # %8.0f cycles / transaction   ", ratio);
-	} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
-		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
+			print_metric(ctxp, NULL, "%8.0f",
+				     "cycles / transaction", ratio);
+		else
+			print_metric(ctxp, NULL, NULL, "cycles / transaction",
+				     0);
+	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
 		if (avg)
 			ratio = total / avg;
 
-		fprintf(out, " # %8.0f cycles / elision       ", ratio);
+		print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
 	} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
 		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
-			fprintf(out, " # %8.3f CPUs utilized          ", avg / ratio);
+			print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
+				     avg / ratio);
 		else
-			fprintf(out, "                                   ");
+			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		char unit = 'M';
+		char unit_buf[10];
 
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
@@ -429,9 +464,9 @@
 			ratio *= 1000;
 			unit = 'K';
 		}
-
-		fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
+		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
 	} else {
-		fprintf(out, "                                   ");
+		print_metric(ctxp, NULL, NULL, NULL, 0);
 	}
 }

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index afb0c45..4d9b481 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c

@@ -97,7 +97,7 @@
 	}
 }
 
-void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
+static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 {
 	int i;
 	struct perf_stat_evsel *ps = evsel->priv;
@@ -108,7 +108,7 @@
 	perf_stat_evsel_id_init(evsel);
 }
 
-int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 {
 	evsel->priv = zalloc(sizeof(struct perf_stat_evsel));
 	if (evsel->priv == NULL)
@@ -117,13 +117,13 @@
 	return 0;
 }
 
-void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 {
 	zfree(&evsel->priv);
 }
 
-int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
-				      int ncpus, int nthreads)
+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
+					     int ncpus, int nthreads)
 {
 	struct perf_counts *counts;
 
@@ -134,13 +134,13 @@
 	return counts ? 0 : -ENOMEM;
 }
 
-void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 {
 	perf_counts__delete(evsel->prev_raw_counts);
 	evsel->prev_raw_counts = NULL;
 }
 
-int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
+static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
 {
 	int ncpus = perf_evsel__nr_cpus(evsel);
 	int nthreads = thread_map__nr(evsel->threads);

diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 086f4e1..0150e78 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h

@@ -68,21 +68,23 @@
 
 extern struct stats walltime_nsecs_stats;
 
+typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
+			       const char *fmt, double val);
+typedef void (*new_line_t )(void *ctx);
+
+void perf_stat__init_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 				    int cpu);
-void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-				   double avg, int cpu, enum aggr_mode aggr);
+struct perf_stat_output_ctx {
+	void *ctx;
+	print_metric_t print_metric;
+	new_line_t new_line;
+};
 
-void perf_evsel__reset_stat_priv(struct perf_evsel *evsel);
-int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel);
-void perf_evsel__free_stat_priv(struct perf_evsel *evsel);
-
-int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
-				      int ncpus, int nthreads);
-void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel);
-
-int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+				   double avg, int cpu,
+				   struct perf_stat_output_ctx *out);
 
 int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct perf_evlist *evlist);

diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index 25671fa..d3d2792 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c

@@ -51,30 +51,6 @@
 	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
 }
 
-static void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
-				   const void *data, size_t dlen)
-{
-	if (pos + len < pos)
-		die("you want to use way too much memory");
-	if (pos > sb->len)
-		die("`pos' is too far after the end of the buffer");
-	if (pos + len > sb->len)
-		die("`pos + len' is too far after the end of the buffer");
-
-	if (dlen >= len)
-		strbuf_grow(sb, dlen - len);
-	memmove(sb->buf + pos + dlen,
-			sb->buf + pos + len,
-			sb->len - pos - len);
-	memcpy(sb->buf + pos, data, dlen);
-	strbuf_setlen(sb, sb->len + dlen - len);
-}
-
-void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
-{
-	strbuf_splice(sb, pos, len, NULL, 0);
-}
-
 void strbuf_add(struct strbuf *sb, const void *data, size_t len)
 {
 	strbuf_grow(sb, len);

diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 529f2f0..7a32c83 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h

@@ -77,8 +77,6 @@
 	sb->buf[sb->len] = '\0';
 }
 
-extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
-
 extern void strbuf_add(struct strbuf *, const void *, size_t);
 static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
 	strbuf_add(sb, s, strlen(s));

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 562b8eb..b1dd68f 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c

@@ -6,6 +6,7 @@
 #include <inttypes.h>
 
 #include "symbol.h"
+#include "demangle-java.h"
 #include "machine.h"
 #include "vdso.h"
 #include <symbol/kallsyms.h>
@@ -1077,6 +1078,8 @@
 				demangle_flags = DMGL_PARAMS | DMGL_ANSI;
 
 			demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+			if (demangled == NULL)
+				demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
 			if (demangled != NULL)
 				elf_name = demangled;
 		}

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ab02209..e7588dc 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c

@@ -1466,7 +1466,8 @@
 	 * Read the build id if possible. This is required for
 	 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
 	 */
-	if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
+	if (is_regular_file(name) &&
+	    filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
 		dso__set_build_id(dso, build_id);
 
 	/*
@@ -1487,6 +1488,9 @@
 						   root_dir, name, PATH_MAX))
 			continue;
 
+		if (!is_regular_file(name))
+			continue;
+
 		/* Name is now the name of the next image to try */
 		if (symsrc__init(ss, dso, name, symtab_type) < 0)
 			continue;
@@ -1525,6 +1529,10 @@
 	if (!runtime_ss && syms_ss)
 		runtime_ss = syms_ss;
 
+	if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
+		if (dso__build_id_is_kmod(dso, name, PATH_MAX))
+			kmod = true;
+
 	if (syms_ss)
 		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod);
 	else

diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ccd1caa..a937053 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h

@@ -110,7 +110,8 @@
 			has_filter,
 			show_ref_callgraph,
 			hide_unresolved,
-			raw_trace;
+			raw_trace,
+			report_hierarchy;
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,

diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 802bb86..8ae051e 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c

@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <traceevent/event-parse.h>
 #include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
 #include "trace-event.h"
 #include "machine.h"
 #include "util.h"

diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
index 4d4210d..1b74164 100644
--- a/tools/perf/util/tsc.c
+++ b/tools/perf/util/tsc.c

@@ -19,7 +19,7 @@
 	u64 quot, rem;
 
 	quot = cyc >> tc->time_shift;
-	rem  = cyc & ((1 << tc->time_shift) - 1);
+	rem  = cyc & (((u64)1 << tc->time_shift) - 1);
 	return tc->time_zero + quot * tc->time_mult +
 	       ((rem * tc->time_mult) >> tc->time_shift);
 }

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index ead9509..b7766c5 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c

@@ -14,6 +14,7 @@
 #include <limits.h>
 #include <byteswap.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 #include <unistd.h>
 #include "callchain.h"
 #include "strlist.h"
@@ -507,54 +508,6 @@
 	return ret;
 }
 
-int filename__read_str(const char *filename, char **buf, size_t *sizep)
-{
-	size_t size = 0, alloc_size = 0;
-	void *bf = NULL, *nbf;
-	int fd, n, err = 0;
-	char sbuf[STRERR_BUFSIZE];
-
-	fd = open(filename, O_RDONLY);
-	if (fd < 0)
-		return -errno;
-
-	do {
-		if (size == alloc_size) {
-			alloc_size += BUFSIZ;
-			nbf = realloc(bf, alloc_size);
-			if (!nbf) {
-				err = -ENOMEM;
-				break;
-			}
-
-			bf = nbf;
-		}
-
-		n = read(fd, bf + size, alloc_size - size);
-		if (n < 0) {
-			if (size) {
-				pr_warning("read failed %d: %s\n", errno,
-					 strerror_r(errno, sbuf, sizeof(sbuf)));
-				err = 0;
-			} else
-				err = -errno;
-
-			break;
-		}
-
-		size += n;
-	} while (n > 0);
-
-	if (!err) {
-		*sizep = size;
-		*buf   = bf;
-	} else
-		free(bf);
-
-	close(fd);
-	return err;
-}
-
 const char *get_filename_for_perf_kvm(void)
 {
 	const char *filename;
@@ -691,3 +644,66 @@
 
 	return tip;
 }
+
+bool is_regular_file(const char *file)
+{
+	struct stat st;
+
+	if (stat(file, &st))
+		return false;
+
+	return S_ISREG(st.st_mode);
+}
+
+int fetch_current_timestamp(char *buf, size_t sz)
+{
+	struct timeval tv;
+	struct tm tm;
+	char dt[32];
+
+	if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+		return -1;
+
+	if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+		return -1;
+
+	scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+	return 0;
+}
+
+void print_binary(unsigned char *data, size_t len,
+		  size_t bytes_per_line, print_binary_t printer,
+		  void *extra)
+{
+	size_t i, j, mask;
+
+	if (!printer)
+		return;
+
+	bytes_per_line = roundup_pow_of_two(bytes_per_line);
+	mask = bytes_per_line - 1;
+
+	printer(BINARY_PRINT_DATA_BEGIN, 0, extra);
+	for (i = 0; i < len; i++) {
+		if ((i & mask) == 0) {
+			printer(BINARY_PRINT_LINE_BEGIN, -1, extra);
+			printer(BINARY_PRINT_ADDR, i, extra);
+		}
+
+		printer(BINARY_PRINT_NUM_DATA, data[i], extra);
+
+		if (((i & mask) == mask) || i == len - 1) {
+			for (j = 0; j < mask-(i & mask); j++)
+				printer(BINARY_PRINT_NUM_PAD, -1, extra);
+
+			printer(BINARY_PRINT_SEP, i, extra);
+			for (j = i & ~mask; j <= i; j++)
+				printer(BINARY_PRINT_CHAR_DATA, data[j], extra);
+			for (j = 0; j < mask-(i & mask); j++)
+				printer(BINARY_PRINT_CHAR_PAD, i, extra);
+			printer(BINARY_PRINT_LINE_END, -1, extra);
+		}
+	}
+	printer(BINARY_PRINT_DATA_END, -1, extra);
+}

diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index fe915e6..d0d50ce 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h

@@ -82,6 +82,8 @@
 
 extern const char *graph_line;
 extern const char *graph_dotted_line;
+extern const char *spaces;
+extern const char *dots;
 extern char buildid_dir[];
 
 /* On most systems <limits.h> would have given us this, but
@@ -303,7 +305,6 @@
 		  bool show_sym, bool unwind_inlines);
 void free_srcline(char *srcline);
 
-int filename__read_str(const char *filename, char **buf, size_t *sizep);
 int perf_event_paranoid(void);
 
 void mem_bswap_64(void *src, int byte_size);
@@ -343,5 +344,27 @@
 #define KVER_PARAM(x)	KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
 
 const char *perf_tip(const char *dirpath);
+bool is_regular_file(const char *file);
+int fetch_current_timestamp(char *buf, size_t sz);
 
+enum binary_printer_ops {
+	BINARY_PRINT_DATA_BEGIN,
+	BINARY_PRINT_LINE_BEGIN,
+	BINARY_PRINT_ADDR,
+	BINARY_PRINT_NUM_DATA,
+	BINARY_PRINT_NUM_PAD,
+	BINARY_PRINT_SEP,
+	BINARY_PRINT_CHAR_DATA,
+	BINARY_PRINT_CHAR_PAD,
+	BINARY_PRINT_LINE_END,
+	BINARY_PRINT_DATA_END,
+};
+
+typedef void (*print_binary_t)(enum binary_printer_ops,
+			       unsigned int val,
+			       void *extra);
+
+void print_binary(unsigned char *data, size_t len,
+		  size_t bytes_per_line, print_binary_t printer,
+		  void *extra);
 #endif /* GIT_COMPAT_UTIL_H */

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0dac7e0..3fa94e2 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c

@@ -1970,7 +1970,7 @@
 }
 
 static void
-dump_cstate_pstate_config_info(family, model)
+dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 {
 	if (!do_nhm_platform_info)
 		return;
@@ -2142,7 +2142,7 @@
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
 
-double get_tdp(model)
+double get_tdp(unsigned int model)
 {
 	unsigned long long msr;
 
@@ -2256,7 +2256,7 @@
 	return;
 }
 
-void perf_limit_reasons_probe(family, model)
+void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
 		return;
@@ -2792,7 +2792,7 @@
 	perf_limit_reasons_probe(family, model);
 
 	if (debug)
-		dump_cstate_pstate_config_info();
+		dump_cstate_pstate_config_info(family, model);
 
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 90bd2ea..b3281dc 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c

@@ -217,13 +217,16 @@
 	return rc;
 }
 
+#define NFIT_TEST_ARS_RECORDS 4
+
 static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
 		unsigned int buf_len)
 {
 	if (buf_len < sizeof(*nd_cmd))
 		return -EINVAL;
 
-	nd_cmd->max_ars_out = 256;
+	nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+		+ NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record);
 	nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
 
 	return 0;
@@ -246,7 +249,8 @@
 	if (buf_len < sizeof(*nd_cmd))
 		return -EINVAL;
 
-	nd_cmd->out_length = 256;
+	nd_cmd->out_length = sizeof(struct nd_cmd_ars_status);
+	/* TODO: emit error records */
 	nd_cmd->num_records = 0;
 	nd_cmd->address = 0;
 	nd_cmd->length = -1ULL;

diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
index 773e276..1e1abe0 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc

@@ -39,28 +39,23 @@
 }
 
 instance_slam &
-x=`jobs -l`
-p1=`echo $x | cut -d' ' -f2`
+p1=$!
 echo $p1
 
 instance_slam &
-x=`jobs -l | tail -1`
-p2=`echo $x | cut -d' ' -f2`
+p2=$!
 echo $p2
 
 instance_slam &
-x=`jobs -l | tail -1`
-p3=`echo $x | cut -d' ' -f2`
+p3=$!
 echo $p3
 
 instance_slam &
-x=`jobs -l | tail -1`
-p4=`echo $x | cut -d' ' -f2`
+p4=$!
 echo $p4
 
 instance_slam &
-x=`jobs -l | tail -1`
-p5=`echo $x | cut -d' ' -f2`
+p5=$!
 echo $p5
 
 ls -lR >/dev/null

diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 844787a..5eb49b7 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh

@@ -33,7 +33,7 @@
 then
 	print_warning Console output contains nul bytes, old qemu still running?
 fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
 if test -s $1.diags
 then
 	print_warning Assertion failure in $file $title
@@ -64,10 +64,12 @@
 	then
 		summary="$summary  lockdep: $n_badness"
 	fi
-	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|Stall ended before state dump start' $1`
+	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1`
 	if test "$n_stalls" -ne 0
 	then
 		summary="$summary  Stalls: $n_stalls"
 	fi
 	print_warning Summary: $summary
+else
+	rm $1.diags
 fi

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d0c473f..d5ce7d7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile

@@ -4,15 +4,16 @@
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
+			check_initial_reg_state sigreturn ldt_gdt
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
-			ldt_gdt \
 			vdso_restorer
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
+TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
-BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
+BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
 
@@ -40,7 +41,7 @@
 $(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
 	$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
 
-$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
+$(TARGETS_C_64BIT_ALL:%=%_64): %_64: %.c
 	$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
 
 # x86_64 users should be encouraged to install 32-bit libraries
@@ -65,3 +66,9 @@
 sysret_ss_attrs_64: thunks.S
 ptrace_syscall_32: raw_syscall_helper_32.S
 test_syscall_vdso_32: thunks_32.S
+
+# check_initial_reg_state is special: it needs a custom entry, and it
+# needs to be static so that its interpreter doesn't destroy its initial
+# state.
+check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
+check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static

diff --git a/tools/testing/selftests/x86/check_initial_reg_state.c b/tools/testing/selftests/x86/check_initial_reg_state.c
new file mode 100644
index 0000000..6aaed9b8
--- /dev/null
+++ b/tools/testing/selftests/x86/check_initial_reg_state.c

@@ -0,0 +1,109 @@
+/*
+ * check_initial_reg_state.c - check that execve sets the correct state
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+
+unsigned long ax, bx, cx, dx, si, di, bp, sp, flags;
+unsigned long r8, r9, r10, r11, r12, r13, r14, r15;
+
+asm (
+	".pushsection .text\n\t"
+	".type real_start, @function\n\t"
+	".global real_start\n\t"
+	"real_start:\n\t"
+#ifdef __x86_64__
+	"mov %rax, ax\n\t"
+	"mov %rbx, bx\n\t"
+	"mov %rcx, cx\n\t"
+	"mov %rdx, dx\n\t"
+	"mov %rsi, si\n\t"
+	"mov %rdi, di\n\t"
+	"mov %rbp, bp\n\t"
+	"mov %rsp, sp\n\t"
+	"mov %r8, r8\n\t"
+	"mov %r9, r9\n\t"
+	"mov %r10, r10\n\t"
+	"mov %r11, r11\n\t"
+	"mov %r12, r12\n\t"
+	"mov %r13, r13\n\t"
+	"mov %r14, r14\n\t"
+	"mov %r15, r15\n\t"
+	"pushfq\n\t"
+	"popq flags\n\t"
+#else
+	"mov %eax, ax\n\t"
+	"mov %ebx, bx\n\t"
+	"mov %ecx, cx\n\t"
+	"mov %edx, dx\n\t"
+	"mov %esi, si\n\t"
+	"mov %edi, di\n\t"
+	"mov %ebp, bp\n\t"
+	"mov %esp, sp\n\t"
+	"pushfl\n\t"
+	"popl flags\n\t"
+#endif
+	"jmp _start\n\t"
+	".size real_start, . - real_start\n\t"
+	".popsection");
+
+int main()
+{
+	int nerrs = 0;
+
+	if (sp == 0) {
+		printf("[FAIL]\tTest was built incorrectly\n");
+		return 1;
+	}
+
+	if (ax || bx || cx || dx || si || di || bp
+#ifdef __x86_64__
+	    || r8 || r9 || r10 || r11 || r12 || r13 || r14 || r15
+#endif
+		) {
+		printf("[FAIL]\tAll GPRs except SP should be 0\n");
+#define SHOW(x) printf("\t" #x " = 0x%lx\n", x);
+		SHOW(ax);
+		SHOW(bx);
+		SHOW(cx);
+		SHOW(dx);
+		SHOW(si);
+		SHOW(di);
+		SHOW(bp);
+		SHOW(sp);
+#ifdef __x86_64__
+		SHOW(r8);
+		SHOW(r9);
+		SHOW(r10);
+		SHOW(r11);
+		SHOW(r12);
+		SHOW(r13);
+		SHOW(r14);
+		SHOW(r15);
+#endif
+		nerrs++;
+	} else {
+		printf("[OK]\tAll GPRs except SP are 0\n");
+	}
+
+	if (flags != 0x202) {
+		printf("[FAIL]\tFLAGS is 0x%lx, but it should be 0x202\n", flags);
+		nerrs++;
+	} else {
+		printf("[OK]\tFLAGS is 0x202\n");
+	}
+
+	return nerrs ? 1 : 0;
+}

diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 5105b49..4214567 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c

@@ -103,6 +103,17 @@
 		err(1, "sigaction");
 }
 
+static void setsigign(int sig, int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = (void *)SIG_IGN;
+	sa.sa_flags = flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
 static void clearhandler(int sig)
 {
 	struct sigaction sa;
@@ -187,7 +198,7 @@
 
 	printf("[RUN]\tSYSEMU\n");
 	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
-		err(1, "PTRACE_SYSCALL");
+		err(1, "PTRACE_SYSEMU");
 	wait_trap(chld);
 
 	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
@@ -218,7 +229,7 @@
 		err(1, "PTRACE_SETREGS");
 
 	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
-		err(1, "PTRACE_SYSCALL");
+		err(1, "PTRACE_SYSEMU");
 	wait_trap(chld);
 
 	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
@@ -250,7 +261,7 @@
 		err(1, "PTRACE_SETREGS");
 
 	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
-		err(1, "PTRACE_SYSCALL");
+		err(1, "PTRACE_SYSEMU");
 	wait_trap(chld);
 
 	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
@@ -277,6 +288,119 @@
 	}
 }
 
+static void test_restart_under_ptrace(void)
+{
+	printf("[RUN]\tkernel syscall restart under ptrace\n");
+	pid_t chld = fork();
+	if (chld < 0)
+		err(1, "fork");
+
+	if (chld == 0) {
+		if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		printf("\tChild will take a nap until signaled\n");
+		setsigign(SIGUSR1, SA_RESTART);
+		raise(SIGSTOP);
+
+		syscall(SYS_pause, 0, 0, 0, 0, 0, 0);
+		_exit(0);
+	}
+
+	int status;
+
+	/* Wait for SIGSTOP. */
+	if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+		err(1, "waitpid");
+
+	struct user_regs_struct regs;
+
+	printf("[RUN]\tSYSCALL\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	/* We should be stopped at pause(2) entry. */
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tInitial nr and args are correct\n");
+	}
+
+	/* Interrupt it. */
+	kill(chld, SIGUSR1);
+
+	/* Advance.  We should be stopped at exit. */
+	printf("[RUN]\tSYSCALL\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tArgs after SIGUSR1 are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tArgs after SIGUSR1 are correct (ax = %ld)\n",
+		       (long)regs.user_ax);
+	}
+
+	/* Poke the regs back in.  This must not break anything. */
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	/* Catch the (ignored) SIGUSR1. */
+	if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+		err(1, "PTRACE_CONT");
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+	if (!WIFSTOPPED(status)) {
+		printf("[FAIL]\tChild was stopped for SIGUSR1 (status = 0x%x)\n", status);
+		nerrs++;
+	} else {
+		printf("[OK]\tChild got SIGUSR1\n");
+	}
+
+	/* The next event should be pause(2) again. */
+	printf("[RUN]\tStep again\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	/* We should be stopped at pause(2) entry. */
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tpause did not restart (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tpause(2) restarted correctly\n");
+	}
+
+	/* Kill it. */
+	kill(chld, SIGKILL);
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+}
+
 int main()
 {
 	printf("[RUN]\tCheck int80 return regs\n");
@@ -290,5 +414,7 @@
 
 	test_ptrace_syscall_restart();
 
+	test_restart_under_ptrace();
+
 	return 0;
 }

diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index b5aa1ba..8a577e7 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c

@@ -54,6 +54,37 @@
 #include <sys/ptrace.h>
 #include <sys/user.h>
 
+/* Pull in AR_xyz defines. */
+typedef unsigned int u32;
+typedef unsigned short u16;
+#include "../../../../arch/x86/include/asm/desc_defs.h"
+
+/*
+ * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
+ * headers.
+ */
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ *     saved CS is not 64-bit)
+ *         new SS = saved SS  (will fail IRET and signal if invalid)
+ * else
+ *         new SS = a flat 32-bit data segment
+ */
+#define UC_SIGCONTEXT_SS       0x2
+#define UC_STRICT_RESTORE_SS   0x4
+#endif
+
 /*
  * In principle, this test can run on Linux emulation layers (e.g.
  * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
@@ -267,6 +298,9 @@
 /* Instructions for the SIGUSR1 handler. */
 static volatile unsigned short sig_cs, sig_ss;
 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+#ifdef __x86_64__
+static volatile sig_atomic_t sig_corrupt_final_ss;
+#endif
 
 /* Abstractions for some 32-bit vs 64-bit differences. */
 #ifdef __x86_64__
@@ -305,62 +339,6 @@
 }
 #endif
 
-/* Number of errors in the current test case. */
-static volatile sig_atomic_t nerrs;
-
-/*
- * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
- * int3 trampoline.  Sets SP to a large known value so that we can see
- * whether the value round-trips back to user mode correctly.
- */
-static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
-{
-	ucontext_t *ctx = (ucontext_t*)ctx_void;
-
-	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
-
-	*csptr(ctx) = sig_cs;
-	*ssptr(ctx) = sig_ss;
-
-	ctx->uc_mcontext.gregs[REG_IP] =
-		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
-	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
-	ctx->uc_mcontext.gregs[REG_AX] = 0;
-
-	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
-	requested_regs[REG_AX] = *ssptr(ctx);	/* The asm code does this. */
-
-	return;
-}
-
-/*
- * Called after a successful sigreturn.  Restores our state so that
- * the original raise(SIGUSR1) returns.
- */
-static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
-{
-	ucontext_t *ctx = (ucontext_t*)ctx_void;
-
-	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
-	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
-
-	unsigned short ss;
-	asm ("mov %%ss,%0" : "=r" (ss));
-
-	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
-	if (asm_ss != sig_ss && sig == SIGTRAP) {
-		/* Sanity check failure. */
-		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
-		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
-		nerrs++;
-	}
-
-	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
-	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
-
-	sig_trapped = sig;
-}
-
 /*
  * Checks a given selector for its code bitness or returns -1 if it's not
  * a usable code segment selector.
@@ -394,6 +372,184 @@
 		return -1;	/* Unknown bitness. */
 }
 
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+bool is_valid_ss(unsigned short cs)
+{
+	uint32_t valid = 0, ar;
+	asm ("lar %[cs], %[ar]\n\t"
+	     "jnz 1f\n\t"
+	     "mov $1, %[valid]\n\t"
+	     "1:"
+	     : [ar] "=r" (ar), [valid] "+rm" (valid)
+	     : [cs] "r" (cs));
+
+	if (!valid)
+		return false;
+
+	if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
+	    (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
+		return false;
+
+	return (ar & AR_P);
+}
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+static void validate_signal_ss(int sig, ucontext_t *ctx)
+{
+#ifdef __x86_64__
+	bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
+
+	if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
+		printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
+		nerrs++;
+
+		/*
+		 * This happens on Linux 4.1.  The rest will fail, too, so
+		 * return now to reduce the noise.
+		 */
+		return;
+	}
+
+	/* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
+	if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
+		printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
+		       sig);
+		nerrs++;
+	}
+
+	if (is_valid_ss(*ssptr(ctx))) {
+		/*
+		 * DOSEMU was written before 64-bit sigcontext had SS, and
+		 * it tries to figure out the signal source SS by looking at
+		 * the physical register.  Make sure that keeps working.
+		 */
+		unsigned short hw_ss;
+		asm ("mov %%ss, %0" : "=rm" (hw_ss));
+		if (hw_ss != *ssptr(ctx)) {
+			printf("[FAIL]\tHW SS didn't match saved SS\n");
+			nerrs++;
+		}
+	}
+#endif
+}
+
+/*
+ * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
+ * int3 trampoline.  Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	validate_signal_ss(sig, ctx);
+
+	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+	*csptr(ctx) = sig_cs;
+	*ssptr(ctx) = sig_ss;
+
+	ctx->uc_mcontext.gregs[REG_IP] =
+		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+	ctx->uc_mcontext.gregs[REG_AX] = 0;
+
+	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+	requested_regs[REG_AX] = *ssptr(ctx);	/* The asm code does this. */
+
+	return;
+}
+
+/*
+ * Called after a successful sigreturn (via int3) or from a failed
+ * sigreturn (directly by kernel).  Restores our state so that the
+ * original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	validate_signal_ss(sig, ctx);
+
+	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+	unsigned short ss;
+	asm ("mov %%ss,%0" : "=r" (ss));
+
+	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
+	if (asm_ss != sig_ss && sig == SIGTRAP) {
+		/* Sanity check failure. */
+		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
+		nerrs++;
+	}
+
+	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+#ifdef __x86_64__
+	if (sig_corrupt_final_ss) {
+		if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
+			printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
+			nerrs++;
+		} else {
+			/*
+			 * DOSEMU transitions from 32-bit to 64-bit mode by
+			 * adjusting sigcontext, and it requires that this work
+			 * even if the saved SS is bogus.
+			 */
+			printf("\tCorrupting SS on return to 64-bit mode\n");
+			*ssptr(ctx) = 0;
+		}
+	}
+#endif
+
+	sig_trapped = sig;
+}
+
+#ifdef __x86_64__
+/* Tests recovery if !UC_STRICT_RESTORE_SS */
+static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
+		printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
+		nerrs++;
+		return;  /* We can't do the rest. */
+	}
+
+	ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
+	*ssptr(ctx) = 0;
+
+	/* Return.  The kernel should recover without sending another signal. */
+}
+
+static int test_nonstrict_ss(void)
+{
+	clearhandler(SIGUSR1);
+	clearhandler(SIGTRAP);
+	clearhandler(SIGSEGV);
+	clearhandler(SIGILL);
+	sethandler(SIGUSR2, sigusr2, 0);
+
+	nerrs = 0;
+
+	printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
+	raise(SIGUSR2);
+	if (!nerrs)
+		printf("[OK]\tIt worked\n");
+
+	return nerrs;
+}
+#endif
+
 /* Finds a usable code segment of the requested bitness. */
 int find_cs(int bitness)
 {
@@ -576,6 +732,12 @@
 		       errdesc, strsignal(sig_trapped));
 		return 0;
 	} else {
+		/*
+		 * This also implicitly tests UC_STRICT_RESTORE_SS:
+		 * We check that these signals set UC_STRICT_RESTORE_SS and,
+		 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
+		 * then we won't get SIGSEGV.
+		 */
 		printf("[FAIL]\tDid not get SIGSEGV\n");
 		return 1;
 	}
@@ -632,6 +794,14 @@
 						    GDT3(gdt_data16_idx));
 	}
 
+#ifdef __x86_64__
+	/* Nasty ABI case: check SS corruption handling. */
+	sig_corrupt_final_ss = 1;
+	total_nerrs += test_valid_sigreturn(32, false, -1);
+	total_nerrs += test_valid_sigreturn(32, true, -1);
+	sig_corrupt_final_ss = 0;
+#endif
+
 	/*
 	 * We're done testing valid sigreturn cases.  Now we test states
 	 * for which sigreturn itself will succeed but the subsequent
@@ -680,5 +850,9 @@
 	if (gdt_npdata32_idx)
 		test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
 
+#ifdef __x86_64__
+	total_nerrs += test_nonstrict_ss();
+#endif
+
 	return total_nerrs ? 1 : 0;
 }

diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index 60c06af4..43fcab36 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c

@@ -17,6 +17,9 @@
 
 #include <stdio.h>
 #include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <err.h>
 #include <sys/syscall.h>
 #include <asm/processor-flags.h>
 
@@ -26,6 +29,8 @@
 # define WIDTH "l"
 #endif
 
+static unsigned int nerrs;
+
 static unsigned long get_eflags(void)
 {
 	unsigned long eflags;
@@ -39,16 +44,52 @@
 		      : : "rm" (eflags) : "flags");
 }
 
-int main()
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void do_it(unsigned long extraflags)
+{
+	unsigned long flags;
+
+	set_eflags(get_eflags() | extraflags);
+	syscall(SYS_getpid);
+	flags = get_eflags();
+	if ((flags & extraflags) == extraflags) {
+		printf("[OK]\tThe syscall worked and flags are still set\n");
+	} else {
+		printf("[FAIL]\tThe syscall worked but flags were cleared (flags = 0x%lx but expected 0x%lx set)\n",
+		       flags, extraflags);
+		nerrs++;
+	}
+}
+
+int main(void)
 {
 	printf("[RUN]\tSet NT and issue a syscall\n");
-	set_eflags(get_eflags() | X86_EFLAGS_NT);
-	syscall(SYS_getpid);
-	if (get_eflags() & X86_EFLAGS_NT) {
-		printf("[OK]\tThe syscall worked and NT is still set\n");
-		return 0;
-	} else {
-		printf("[FAIL]\tThe syscall worked but NT was cleared\n");
-		return 1;
-	}
+	do_it(X86_EFLAGS_NT);
+
+	/*
+	 * Now try it again with TF set -- TF forces returns via IRET in all
+	 * cases except non-ptregs-using 64-bit full fast path syscalls.
+	 */
+
+	sethandler(SIGTRAP, sigtrap, 0);
+
+	printf("[RUN]\tSet NT|TF and issue a syscall\n");
+	do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+
+	return nerrs == 0 ? 0 : 1;
 }

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 043032c..00429b3 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c

@@ -1875,8 +1875,8 @@
 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-
-	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+	int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+	int sz = nr_longs * sizeof(unsigned long);
 	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
 	vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
 	vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 3531599..65da997 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c

@@ -97,8 +97,8 @@
 	 * This memory barrier pairs with prepare_to_wait's set_current_state()
 	 */
 	smp_mb();
-	if (waitqueue_active(&vcpu->wq))
-		wake_up_interruptible(&vcpu->wq);
+	if (swait_active(&vcpu->wq))
+		swake_up(&vcpu->wq);
 
 	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
@@ -172,7 +172,7 @@
 	 * do alloc nowait since if we are going to sleep anyway we
 	 * may as well sleep faulting in page
 	 */
-	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
+	work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
 	if (!work)
 		return 0;
 

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a11cfd2..5af50c3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c

@@ -216,8 +216,7 @@
 	vcpu->kvm = kvm;
 	vcpu->vcpu_id = id;
 	vcpu->pid = NULL;
-	vcpu->halt_poll_ns = 0;
-	init_waitqueue_head(&vcpu->wq);
+	init_swait_queue_head(&vcpu->wq);
 	kvm_async_pf_vcpu_init(vcpu);
 
 	vcpu->pre_pcpu = -1;
@@ -1952,6 +1951,9 @@
 	else
 		val *= halt_poll_ns_grow;
 
+	if (val > halt_poll_ns)
+		val = halt_poll_ns;
+
 	vcpu->halt_poll_ns = val;
 	trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
 }
@@ -1990,7 +1992,7 @@
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
 	ktime_t start, cur;
-	DEFINE_WAIT(wait);
+	DECLARE_SWAITQUEUE(wait);
 	bool waited = false;
 	u64 block_ns;
 
@@ -2015,7 +2017,7 @@
 	kvm_arch_vcpu_blocking(vcpu);
 
 	for (;;) {
-		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
 		if (kvm_vcpu_check_block(vcpu) < 0)
 			break;
@@ -2024,7 +2026,7 @@
 		schedule();
 	}
 
-	finish_wait(&vcpu->wq, &wait);
+	finish_swait(&vcpu->wq, &wait);
 	cur = ktime_get();
 
 	kvm_arch_vcpu_unblocking(vcpu);
@@ -2056,11 +2058,11 @@
 {
 	int me;
 	int cpu = vcpu->cpu;
-	wait_queue_head_t *wqp;
+	struct swait_queue_head *wqp;
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (waitqueue_active(wqp)) {
-		wake_up_interruptible(wqp);
+	if (swait_active(wqp)) {
+		swake_up(wqp);
 		++vcpu->stat.halt_wakeup;
 	}
 
@@ -2161,7 +2163,7 @@
 				continue;
 			if (vcpu == me)
 				continue;
-			if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
+			if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
 				continue;
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;
commit	5ca5446ec5ba5e79a6f271cd026bb153d6850fcc	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Tue Mar 15 20:23:13 2016 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Tue Mar 15 20:23:13 2016 -0700
tree	ba6b9a309d5f8730a01002db389e05ea7f784f9c
parent	710d60cbf1b312a8075a2158cbfbbd9c66132dcc [diff]
parent	3c177a166253653bf9c377eb28a5155ea2d9b631 [diff]