Merge branch 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux

Pull i2c fixes from Wolfram Sang:
 "Here are two I2C driver regression fixes.  piix4 gets a larger
  overhaul fixing the latest refactoring and also an older known issue
  as well.  designware-pci gets a fix for a bad merge conflict
  resolution"

* 'i2c/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux:
  i2c: piix4: don't regress on bus names
  i2c: designware-pci: use IRQF_COND_SUSPEND flag
  i2c: piix4: Fully initialize SB800 before it is registered
  i2c: piix4: Fix SB800 locking
diff --git a/.mailmap b/.mailmap
index b1e9a97..7e6c533 100644
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
index 7b57fc0..49585b6 100644
--- a/Documentation/Intel-IOMMU.txt
+++ b/Documentation/Intel-IOMMU.txt
@@ -3,7 +3,7 @@
 
 The architecture spec can be obtained from the below location.
 
-http://www.intel.com/technology/virtualization/
+http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/vt-directed-io-spec.pdf
 
 This guide gives a quick cheat sheet for some basic understanding.
 
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 053f613..07e4cdf 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3025,7 +3025,7 @@
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
-4.90 KVM_SMI
+4.96 KVM_SMI
 
 Capability: KVM_CAP_X86_SMM
 Architectures: x86
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 4c23a68..7a6a58e 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -106,6 +106,15 @@
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
+# -fstack-protector-strong triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+nossp_flags := $(call cc-option, -fno-stack-protector)
+CFLAGS_atags_to_fdt.o := $(nossp_flags)
+CFLAGS_fdt.o := $(nossp_flags)
+CFLAGS_fdt_ro.o := $(nossp_flags)
+CFLAGS_fdt_rw.o := $(nossp_flags)
+CFLAGS_fdt_wip.o := $(nossp_flags)
+
 ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ede692f..5dd2528 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -417,6 +417,7 @@
 #define __NR_userfaultfd		(__NR_SYSCALL_BASE+388)
 #define __NR_membarrier			(__NR_SYSCALL_BASE+389)
 #define __NR_mlock2			(__NR_SYSCALL_BASE+390)
+#define __NR_copy_file_range		(__NR_SYSCALL_BASE+391)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index ac368bb..dfc7cd6 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -400,6 +400,7 @@
 		CALL(sys_userfaultfd)
 		CALL(sys_membarrier)
 		CALL(sys_mlock2)
+		CALL(sys_copy_file_range)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index cd822d8..307237c 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -27,6 +27,8 @@
 endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS	+= $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 18ca9fb..86581f7 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -16,7 +16,6 @@
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@
 CONFIG_ARCH_LAYERSCAPE=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
 CONFIG_ARCH_SEATTLE=y
 CONFIG_ARCH_RENESAS=y
 CONFIG_ARCH_R8A7795=y
 CONFIG_ARCH_STRATIX10=y
 CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_SPRD=y
 CONFIG_ARCH_THUNDER=y
 CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@
 CONFIG_ARCH_ZYNQMP=y
 CONFIG_PCI=y
 CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
 CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_XEN=y
 CONFIG_CMDLINE="console=ttyAMA0"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_COMPAT=y
@@ -100,7 +102,11 @@
 CONFIG_NETDEVICES=y
 CONFIG_TUN=y
 CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
 CONFIG_SKY2=y
 CONFIG_RAVB=y
 CONFIG_SMC91X=y
@@ -117,25 +123,23 @@
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_SH_SCI=y
 CONFIG_SERIAL_SH_SCI_NR_UARTS=11
 CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_XILINX_PS_UART=y
 CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
 CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
 CONFIG_I2C_RCAR=y
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
 CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@
 CONFIG_LEDS_TRIGGER_CPU=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_RTC_DRV_XGENE=y
 CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
 CONFIG_QCOM_BAM_DMA=y
 CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
 CONFIG_COMMON_CLK_CS2000_CP=y
 CONFIG_COMMON_CLK_QCOM=y
 CONFIG_MSM_GCC_8916=y
 CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
 CONFIG_QCOM_SMEM=y
 CONFIG_QCOM_SMD=y
 CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
 CONFIG_PHY_XGENE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -239,6 +246,7 @@
 # CONFIG_FTRACE is not set
 CONFIG_MEMTEST=y
 CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2d545d7a..bf464de 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -67,11 +67,11 @@
 #define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +81,7 @@
 
 #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX		__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -153,6 +153,7 @@
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +164,6 @@
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 #define pte_valid_young(pte) \
@@ -278,13 +277,13 @@
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_valid_user(pte)) {
-		if (!pte_special(pte) && pte_exec(pte))
-			__sync_icache_dcache(pte, addr);
+	if (pte_valid(pte)) {
 		if (pte_sw_dirty(pte) && pte_write(pte))
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
 			pte_val(pte) |= PTE_RDONLY;
+		if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+			__sync_icache_dcache(pte, addr);
 	}
 
 	/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ffe9c2b6..917d981 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -514,9 +514,14 @@
 #endif
 
 	/* EL2 debug */
+	mrs	x0, id_aa64dfr0_el1		// Check ID_AA64DFR0_EL1 PMUVer
+	sbfx	x0, x0, #8, #4
+	cmp	x0, #1
+	b.lt	4f				// Skip if no PMU present
 	mrs	x0, pmcr_el0			// Disable debug access traps
 	ubfx	x0, x0, #11, #5			// to EL2 and allow access to
 	msr	mdcr_el2, x0			// all PMU counters from EL1
+4:
 
 	/* Stage-2 translation */
 	msr	vttbr_el2, xzr
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index bc2abb8..999633b 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -65,6 +65,16 @@
 #ifdef CONFIG_EFI
 
 /*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)	ABSOLUTE(sym)
+
+/*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
  * accessed by the stub, so provide some aliases to make them accessible.
@@ -73,25 +83,25 @@
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp		= __pi_memcmp;
-__efistub_memchr		= __pi_memchr;
-__efistub_memcpy		= __pi_memcpy;
-__efistub_memmove		= __pi_memmove;
-__efistub_memset		= __pi_memset;
-__efistub_strlen		= __pi_strlen;
-__efistub_strcmp		= __pi_strcmp;
-__efistub_strncmp		= __pi_strncmp;
-__efistub___flush_dcache_area	= __pi___flush_dcache_area;
+__efistub_memcmp		= KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr		= KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset		= KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen		= KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp		= KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp		= KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area	= KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy		= __pi_memcpy;
-__efistub___memmove		= __pi_memmove;
-__efistub___memset		= __pi_memset;
+__efistub___memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset		= KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text			= _text;
-__efistub__end			= _end;
-__efistub__edata		= _edata;
+__efistub__text			= KALLSYMS_HIDE(_text);
+__efistub__end			= KALLSYMS_HIDE(_end);
+__efistub__edata		= KALLSYMS_HIDE(_edata);
 
 #endif
 
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 5a22a11..0adbebb 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -46,7 +46,7 @@
 	PCI_START_NR,
 	PCI_END_NR,
 	MODULES_START_NR,
-	MODUELS_END_NR,
+	MODULES_END_NR,
 	KERNEL_SPACE_NR,
 };
 
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7..cab7a5b 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -120,6 +120,7 @@
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
+	int i;
 
 	/*
 	 * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@
 				pfn_to_nid(virt_to_pfn(start)));
 	}
 
+	/*
+	 * KAsan may reuse the contents of kasan_zero_pte directly, so we
+	 * should make sure that it maps the zero page read-only.
+	 */
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		set_pte(&kasan_zero_pte[i],
+			pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 	cpu_set_ttbr1(__pa(swapper_pg_dir));
 	flush_tlb_all();
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 3571c73..cf62407 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -57,6 +57,9 @@
 	if (end < MODULES_VADDR || end >= MODULES_END)
 		return -EINVAL;
 
+	if (!numpages)
+		return 0;
+
 	data.set_mask = set_mask;
 	data.clear_mask = clear_mask;
 
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 146bd99..e6a30e1 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -84,3 +84,15 @@
 	b.lo	9998b
 	dsb	\domain
 	.endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+	.macro	reset_pmuserenr_el0, tmpreg
+	mrs	\tmpreg, id_aa64dfr0_el1	// Check ID_AA64DFR0_EL1 PMUVer
+	sbfx	\tmpreg, \tmpreg, #8, #4
+	cmp	\tmpreg, #1			// Skip if no PMU present
+	b.lt	9000f
+	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+9000:
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a3d867e..c164d2c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -117,7 +117,7 @@
 	 */
 	ubfx	x11, x11, #1, #1
 	msr	oslar_el1, x11
-	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
 	mov	x0, x12
 	dsb	nsh		// Make sure local tlb invalidation completed
 	isb
@@ -154,7 +154,7 @@
 	msr	cpacr_el1, x0			// Enable FP/ASIMD
 	mov	x0, #1 << 12			// Reset mdscr_el1 and disable
 	msr	mdscr_el1, x0			// access to the DCC from EL0
-	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
 	/*
 	 * Memory region attributes for LPAE:
 	 *
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 06f17e7..8d1c816 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -50,7 +50,9 @@
  * set of bits not changed in pmd_modify.
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | _PAGE_THP_HUGE)
+			 _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \
+			 _PAGE_SOFT_DIRTY)
+
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8204b0c..8d1c41d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -223,7 +223,6 @@
 #define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
-#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 271fefb..9d08d8c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,8 +38,7 @@
 
 #define KVM_MAX_VCPUS		NR_CPUS
 #define KVM_MAX_VCORES		NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS	512
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 5654ece..3fa9df7 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -383,3 +383,4 @@
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYSCALL(mlock2)
+SYSCALL(copy_file_range)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 6a5ace5..1f2594d 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		379
+#define NR_syscalls		380
 
 #define __NR__exit __NR_exit
 
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 12a0565..940290d 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -389,5 +389,6 @@
 #define __NR_userfaultfd	364
 #define __NR_membarrier		365
 #define __NR_mlock2		378
+#define __NR_copy_file_range	379
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 8654cb1..ca9e537 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -883,32 +883,29 @@
 const char *eeh_pe_loc_get(struct eeh_pe *pe)
 {
 	struct pci_bus *bus = eeh_pe_bus_get(pe);
-	struct device_node *dn = pci_bus_to_OF_node(bus);
+	struct device_node *dn;
 	const char *loc = NULL;
 
-	if (!dn)
-		goto out;
+	while (bus) {
+		dn = pci_bus_to_OF_node(bus);
+		if (!dn) {
+			bus = bus->parent;
+			continue;
+		}
 
-	/* PHB PE or root PE ? */
-	if (pci_is_root_bus(bus)) {
-		loc = of_get_property(dn, "ibm,loc-code", NULL);
-		if (!loc)
+		if (pci_is_root_bus(bus))
 			loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
-		if (loc)
-			goto out;
+		else
+			loc = of_get_property(dn, "ibm,slot-location-code",
+					      NULL);
 
-		/* Check the root port */
-		dn = dn->child;
-		if (!dn)
-			goto out;
+		if (loc)
+			return loc;
+
+		bus = bus->parent;
 	}
 
-	loc = of_get_property(dn, "ibm,loc-code", NULL);
-	if (!loc)
-		loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
-	return loc ? loc : "N/A";
+	return "N/A";
 }
 
 /**
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index db475d4..f28754c 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -701,31 +701,3 @@
 	li	r5,0
 	blr	/* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
-	.llong	__crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
-	.asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
-	.llong 0 /* .value */
-	.llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 59663af..ac64ffd 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -326,7 +326,10 @@
 		}
 }
 
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
 static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
 {
 	unsigned int i;
@@ -334,8 +337,11 @@
 	for (i = 1; i < numsyms; i++) {
 		if (syms[i].st_shndx == SHN_UNDEF) {
 			char *name = strtab + syms[i].st_name;
-			if (name[0] == '.')
+			if (name[0] == '.') {
+				if (strcmp(name+1, "TOC.") == 0)
+					syms[i].st_shndx = SHN_ABS;
 				memmove(name, name+1, strlen(name));
+			}
 		}
 	}
 }
@@ -351,7 +357,7 @@
 	numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
 
 	for (i = 1; i < numsyms; i++) {
-		if (syms[i].st_shndx == SHN_UNDEF
+		if (syms[i].st_shndx == SHN_ABS
 		    && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
 			return &syms[i];
 	}
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 774a253..9bf7031 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -377,15 +377,12 @@
 
 static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
 {
-	struct kvmppc_vcpu_book3s *vcpu_book3s;
 	u64 esid, esid_1t;
 	int slb_nr;
 	struct kvmppc_slb *slbe;
 
 	dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
 
-	vcpu_book3s = to_book3s(vcpu);
-
 	esid = GET_ESID(rb);
 	esid_1t = GET_ESID_1T(rb);
 	slb_nr = rb & 0xfff;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cff207b..baeddb0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -833,6 +833,24 @@
 
 	vcpu->stat.sum_exits++;
 
+	/*
+	 * This can happen if an interrupt occurs in the last stages
+	 * of guest entry or the first stages of guest exit (i.e. after
+	 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+	 * and before setting it to KVM_GUEST_MODE_HOST_HV).
+	 * That can happen due to a bug, or due to a machine check
+	 * occurring at just the wrong time.
+	 */
+	if (vcpu->arch.shregs.msr & MSR_HV) {
+		printk(KERN_EMERG "KVM trap in HV mode!\n");
+		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+			vcpu->arch.trap, kvmppc_get_pc(vcpu),
+			vcpu->arch.shregs.msr);
+		kvmppc_dump_regs(vcpu);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		run->hw.hardware_exit_reason = vcpu->arch.trap;
+		return RESUME_HOST;
+	}
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
 	switch (vcpu->arch.trap) {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3c6badc..6ee26de 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2153,7 +2153,7 @@
 
 	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
 2:	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
-	rlwimi	r5, r4, 1, DAWRX_WT
+	rlwimi	r5, r4, 2, DAWRX_WT
 	clrrdi	r4, r4, 3
 	std	r4, VCPU_DAWR(r3)
 	std	r5, VCPU_DAWRX(r3)
@@ -2404,6 +2404,8 @@
 	 * guest as machine check causing guest to crash.
 	 */
 	ld	r11, VCPU_MSR(r9)
+	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+	bne	mc_cont			/* if so, exit to host */
 	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
 	beq	1f			/* Deliver a machine check to guest */
 	ld	r10, VCPU_PC(r9)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6fd2405..a3b182d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -919,21 +919,17 @@
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
 			break;
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
 			break;
 		case KVM_REG_PPC_VRSAVE:
-			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-				r = -ENXIO;
-				break;
-			}
-			vcpu->arch.vrsave = set_reg_val(reg->id, val);
+			val = get_reg_val(reg->id, vcpu->arch.vrsave);
 			break;
 #endif /* CONFIG_ALTIVEC */
 		default:
@@ -974,17 +970,21 @@
 				r = -ENXIO;
 				break;
 			}
-			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
 			break;
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
 			break;
 		case KVM_REG_PPC_VRSAVE:
-			val = get_reg_val(reg->id, vcpu->arch.vrsave);
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vrsave = set_reg_val(reg->id, val);
 			break;
 #endif /* CONFIG_ALTIVEC */
 		default:
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 22d94c3..d0f0a51 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -560,12 +560,12 @@
  */
 int devmem_is_allowed(unsigned long pfn)
 {
+	if (page_is_rtas_user_buf(pfn))
+		return 1;
 	if (iomem_is_exclusive(PFN_PHYS(pfn)))
 		return 0;
 	if (!page_is_ram(pfn))
 		return 1;
-	if (page_is_rtas_user_buf(pfn))
-		return 1;
 	return 0;
 }
 #endif /* CONFIG_STRICT_DEVMEM */
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 7d5e295..9958ba8 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -816,7 +816,7 @@
 	.get_constraint		= power8_get_constraint,
 	.get_alternatives	= power8_get_alternatives,
 	.disable_pmc		= power8_disable_pmc,
-	.flags			= PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S,
 	.n_generic		= ARRAY_SIZE(power8_generic_events),
 	.generic_events		= power8_generic_events,
 	.cache_events		= &power8_cache_events,
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 16aa0c7..595a275 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#define ARCH_IRQ_ENABLED	(3UL << (BITS_PER_LONG - 8))
+
 /* store then OR system mask. */
 #define __arch_local_irq_stosm(__or)					\
 ({									\
@@ -54,14 +56,17 @@
 	__arch_local_irq_stosm(0x03);
 }
 
+/* This only restores external and I/O interrupt state */
 static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
-	__arch_local_irq_ssm(flags);
+	/* only disabled->disabled and disabled->enabled is valid */
+	if (flags & ARCH_IRQ_ENABLED)
+		arch_local_irq_enable();
 }
 
 static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
 {
-	return !(flags & (3UL << (BITS_PER_LONG - 8)));
+	return !(flags & ARCH_IRQ_ENABLED);
 }
 
 static inline notrace bool arch_irqs_disabled(void)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6742414..8959ebb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -546,7 +546,6 @@
 	struct kvm_s390_sie_block *sie_block;
 	unsigned int      host_acrs[NUM_ACRS];
 	struct fpu	  host_fpregs;
-	struct fpu	  guest_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 1a9a98d..69aa18b 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -8,10 +8,13 @@
 #include <asm/pci_insn.h>
 
 /* I/O Map */
-#define ZPCI_IOMAP_MAX_ENTRIES		0x7fff
-#define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000ULL
-#define ZPCI_IOMAP_ADDR_IDX_MASK	0x7fff000000000000ULL
-#define ZPCI_IOMAP_ADDR_OFF_MASK	0x0000ffffffffffffULL
+#define ZPCI_IOMAP_SHIFT		48
+#define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK	((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES							\
+	((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK						\
+	(~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
 
 struct zpci_iomap_entry {
 	u32 fh;
@@ -21,8 +24,9 @@
 
 extern struct zpci_iomap_entry *zpci_iomap_start;
 
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
 #define ZPCI_IDX(addr)								\
-	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
 #define ZPCI_OFFSET(addr)							\
 	((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index f16debf..1c4fe12 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -166,14 +166,14 @@
  */
 #define start_thread(regs, new_psw, new_stackp) do {			\
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;	\
-	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->psw.addr	= new_psw;					\
 	regs->gprs[15]	= new_stackp;					\
 	execve_tail();							\
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {			\
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_BA;			\
-	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->psw.addr	= new_psw;					\
 	regs->gprs[15]	= new_stackp;					\
 	crst_table_downgrade(current->mm, 1UL << 31);			\
 	execve_tail();							\
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f00cd35..99bc456 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -149,7 +149,7 @@
 #define arch_has_block_step()	(1)
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
-#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define instruction_pointer(regs) ((regs)->psw.addr)
 #define user_stack_pointer(regs)((regs)->gprs[15])
 #define profile_pc(regs) instruction_pointer(regs)
 
@@ -161,7 +161,7 @@
 static inline void instruction_pointer_set(struct pt_regs *regs,
 					   unsigned long val)
 {
-	regs->psw.addr = val | PSW_ADDR_AMODE;
+	regs->psw.addr = val;
 }
 
 int regs_query_register_offset(const char *name);
@@ -171,7 +171,7 @@
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
 {
-	return regs->gprs[15] & PSW_ADDR_INSN;
+	return regs->gprs[15];
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 34ec202..ab3aa68 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -310,7 +310,8 @@
 #define __NR_recvmsg		372
 #define __NR_shutdown		373
 #define __NR_mlock2		374
-#define NR_syscalls 375
+#define __NR_copy_file_range	375
+#define NR_syscalls 376
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index fac4eed..ae2cda5 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -177,3 +177,4 @@
 COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index a92b39f..3986c9f 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -59,8 +59,6 @@
 	struct save_area *sa;
 
 	sa = (void *) memblock_alloc(sizeof(*sa), 8);
-	if (!sa)
-		return NULL;
 	if (is_boot_cpu)
 		list_add(&sa->list, &dump_save_areas);
 	else
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 6fca0e4..c890a55 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1470,7 +1470,7 @@
 		except_str = "*";
 	else
 		except_str = "-";
-	caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+	caller = (unsigned long) entry->caller;
 	rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p  ",
 		      area, (long long)time_spec.tv_sec,
 		      time_spec.tv_nsec / 1000, level, except_str,
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index dc8e204..02bd02f 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -34,22 +34,21 @@
 	unsigned long addr;
 
 	while (1) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
-		addr = sf->gprs[8] & PSW_ADDR_INSN;
+		addr = sf->gprs[8];
 		printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
 		/* Follow the backchain. */
 		while (1) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
-			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			addr = sf->gprs[8];
 			printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
 		}
 		/* Zero backchain detected, check for interrupt frame. */
@@ -57,7 +56,7 @@
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *) sp;
-		addr = regs->psw.addr & PSW_ADDR_INSN;
+		addr = regs->psw.addr;
 		printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
 		low = sp;
 		sp = regs->gprs[15];
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 20a5caf..c55576b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -252,14 +252,14 @@
 	unsigned long addr;
 
 	addr = S390_lowcore.program_old_psw.addr;
-	fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+	fixup = search_exception_tables(addr);
 	if (!fixup)
 		disabled_wait(0);
 	/* Disable low address protection before storing into lowcore. */
 	__ctl_store(cr0, 0, 0);
 	cr0_new = cr0 & ~(1UL << 28);
 	__ctl_load(cr0_new, 0, 0);
-	S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+	S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
 	__ctl_load(cr0, 0, 0);
 }
 
@@ -268,9 +268,9 @@
 	psw_t psw;
 
 	psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
-	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+	psw.addr = (unsigned long) s390_base_ext_handler;
 	S390_lowcore.external_new_psw = psw;
-	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+	psw.addr = (unsigned long) s390_base_pgm_handler;
 	S390_lowcore.program_new_psw = psw;
 	s390_base_pgm_handler_fn = early_pgm_check_handler;
 }
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index e0eaf11..0f7bfeb 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -203,7 +203,7 @@
 		goto out;
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
-	ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+	ip -= MCOUNT_INSN_SIZE;
 	trace.func = ip;
 	trace.depth = current->curr_ret_stack + 1;
 	/* Only trace if the calling function expects to. */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 0a5a6b6..f20abdb 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2057,12 +2057,12 @@
 	/* Set new machine check handler */
 	S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
 	S390_lowcore.mcck_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+		(unsigned long) s390_base_mcck_handler;
 
 	/* Set new program check handler */
 	S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
 	S390_lowcore.program_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+		(unsigned long) s390_base_pgm_handler;
 
 	/*
 	 * Clear subchannel ID and number to signal new kernel that no CCW or
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 389db56..250f597 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -226,7 +226,7 @@
 	__ctl_load(per_kprobe, 9, 11);
 	regs->psw.mask |= PSW_MASK_PER;
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-	regs->psw.addr = ip | PSW_ADDR_AMODE;
+	regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(enable_singlestep);
 
@@ -238,7 +238,7 @@
 	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
 	regs->psw.mask &= ~PSW_MASK_PER;
 	regs->psw.mask |= kcb->kprobe_saved_imask;
-	regs->psw.addr = ip | PSW_ADDR_AMODE;
+	regs->psw.addr = ip;
 }
 NOKPROBE_SYMBOL(disable_singlestep);
 
@@ -310,7 +310,7 @@
 	 */
 	preempt_disable();
 	kcb = get_kprobe_ctlblk();
-	p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+	p = get_kprobe((void *)(regs->psw.addr - 2));
 
 	if (p) {
 		if (kprobe_running()) {
@@ -460,7 +460,7 @@
 			break;
 	}
 
-	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+	regs->psw.addr = orig_ret_address;
 
 	pop_kprobe(get_kprobe_ctlblk());
 	kretprobe_hash_unlock(current, &flags);
@@ -490,7 +490,7 @@
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+	unsigned long ip = regs->psw.addr;
 	int fixup = probe_get_fixup_type(p->ainsn.insn);
 
 	/* Check if the kprobes location is an enabled ftrace caller */
@@ -605,9 +605,9 @@
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
 		 */
-		entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		entry = search_exception_tables(regs->psw.addr);
 		if (entry) {
-			regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+			regs->psw.addr = extable_fixup(entry);
 			return 1;
 		}
 
@@ -683,7 +683,7 @@
 	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
 	/* setup return addr to the jprobe handler routine */
-	regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) jp->entry;
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 
 	/* r15 is the stack pointer */
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 61595c1..cfcba2d 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -74,7 +74,7 @@
 
 static unsigned long instruction_pointer_guest(struct pt_regs *regs)
 {
-	return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+	return sie_block(regs)->gpsw.addr;
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
@@ -231,29 +231,27 @@
 	struct pt_regs *regs;
 
 	while (1) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
-		perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+		perf_callchain_store(entry, sf->gprs[8]);
 		/* Follow the backchain. */
 		while (1) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
-			perf_callchain_store(entry,
-					     sf->gprs[8] & PSW_ADDR_INSN);
+			perf_callchain_store(entry, sf->gprs[8]);
 		}
 		/* Zero backchain detected, check for interrupt frame. */
 		sp = (unsigned long) (sf + 1);
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *) sp;
-		perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+		perf_callchain_store(entry, sf->gprs[8]);
 		low = sp;
 		sp = regs->gprs[15];
 	}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 114ee8b..2bba7df 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -56,10 +56,10 @@
 		return 0;
 	low = task_stack_page(tsk);
 	high = (struct stack_frame *) task_pt_regs(tsk);
-	sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) tsk->thread.ksp;
 	if (sf <= low || sf > high)
 		return 0;
-	sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) sf->back_chain;
 	if (sf <= low || sf > high)
 		return 0;
 	return sf->gprs[8];
@@ -154,7 +154,7 @@
 		memset(&frame->childregs, 0, sizeof(struct pt_regs));
 		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
 				PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-		frame->childregs.psw.addr = PSW_ADDR_AMODE |
+		frame->childregs.psw.addr =
 				(unsigned long) kernel_thread_starter;
 		frame->childregs.gprs[9] = new_stackp; /* function */
 		frame->childregs.gprs[10] = arg;
@@ -220,14 +220,14 @@
 		return 0;
 	low = task_stack_page(p);
 	high = (struct stack_frame *) task_pt_regs(p);
-	sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+	sf = (struct stack_frame *) p->thread.ksp;
 	if (sf <= low || sf > high)
 		return 0;
 	for (count = 0; count < 16; count++) {
-		sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+		sf = (struct stack_frame *) sf->back_chain;
 		if (sf <= low || sf > high)
 			return 0;
-		return_address = sf->gprs[8] & PSW_ADDR_INSN;
+		return_address = sf->gprs[8];
 		if (!in_sched_functions(return_address))
 			return return_address;
 	}
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 01c37b3..49b1c13 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -84,7 +84,7 @@
 		if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
 			new.control |= PER_EVENT_IFETCH;
 		new.start = 0;
-		new.end = PSW_ADDR_INSN;
+		new.end = -1UL;
 	}
 
 	/* Take care of the PER enablement bit in the PSW. */
@@ -148,7 +148,7 @@
 	else if (addr == (addr_t) &dummy->cr11)
 		/* End address of the active per set. */
 		return test_thread_flag(TIF_SINGLE_STEP) ?
-			PSW_ADDR_INSN : child->thread.per_user.end;
+			-1UL : child->thread.per_user.end;
 	else if (addr == (addr_t) &dummy->bits)
 		/* Single-step bit. */
 		return test_thread_flag(TIF_SINGLE_STEP) ?
@@ -495,8 +495,6 @@
 		}
 		return 0;
 	default:
-		/* Removing high order bit from addr (only for 31 bit). */
-		addr &= PSW_ADDR_INSN;
 		return ptrace_request(child, request, addr, data);
 	}
 }
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c6878fb..9220db5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -301,25 +301,21 @@
 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
 	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
 	lc->restart_psw.mask = PSW_KERNEL_BITS;
-	lc->restart_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+	lc->restart_psw.addr = (unsigned long) restart_int_handler;
 	lc->external_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->external_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
 	lc->svc_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+	lc->svc_new_psw.addr = (unsigned long) system_call;
 	lc->program_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->program_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
-	lc->mcck_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
 	lc->io_new_psw.mask = PSW_KERNEL_BITS |
 		PSW_MASK_DAT | PSW_MASK_MCHECK;
-	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+	lc->io_new_psw.addr = (unsigned long) io_int_handler;
 	lc->clock_comparator = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union)
 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 028cc46..d82562c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -331,13 +331,13 @@
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-		restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+		restorer = (unsigned long) ka->sa.sa_restorer;
 	} else {
 		/* Signal frame without vector registers are short ! */
 		__u16 __user *svc = (void __user *) frame + frame_size - 2;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
 			return -EFAULT;
-		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+		restorer = (unsigned long) svc;
 	}
 
 	/* Set up registers for signal handler */
@@ -347,7 +347,7 @@
 	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
 		(PSW_USER_BITS & PSW_MASK_ASC) |
 		(regs->psw.mask & ~PSW_MASK_ASC);
-	regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) ka->sa.sa_handler;
 
 	regs->gprs[2] = sig;
 	regs->gprs[3] = (unsigned long) &frame->sc;
@@ -394,13 +394,12 @@
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-		restorer = (unsigned long)
-			ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+		restorer = (unsigned long) ksig->ka.sa.sa_restorer;
 	} else {
 		__u16 __user *svc = &frame->svc_insn;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
 			return -EFAULT;
-		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+		restorer = (unsigned long) svc;
 	}
 
 	/* Create siginfo on the signal stack */
@@ -426,7 +425,7 @@
 	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
 		(PSW_USER_BITS & PSW_MASK_ASC) |
 		(regs->psw.mask & ~PSW_MASK_ASC);
-	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
 
 	regs->gprs[2] = ksig->sig;
 	regs->gprs[3] = (unsigned long) &frame->info;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a13468b..3c65a8e 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -623,8 +623,6 @@
 		return;
 	/* Allocate a page as dumping area for the store status sigps */
 	page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
-	if (!page)
-		panic("could not allocate memory for save area\n");
 	/* Set multi-threading state to the previous system. */
 	pcpu_set_smt(sclp.mtid_prev);
 	boot_cpu_addr = stap();
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 1785cd8..5acba3c 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -21,12 +21,11 @@
 	unsigned long addr;
 
 	while(1) {
-		sp &= PSW_ADDR_INSN;
 		if (sp < low || sp > high)
 			return sp;
 		sf = (struct stack_frame *)sp;
 		while(1) {
-			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			addr = sf->gprs[8];
 			if (!trace->skip)
 				trace->entries[trace->nr_entries++] = addr;
 			else
@@ -34,7 +33,7 @@
 			if (trace->nr_entries >= trace->max_entries)
 				return sp;
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
@@ -46,7 +45,7 @@
 		if (sp <= low || sp > high - sizeof(*regs))
 			return sp;
 		regs = (struct pt_regs *)sp;
-		addr = regs->psw.addr & PSW_ADDR_INSN;
+		addr = regs->psw.addr;
 		if (savesched || !in_sched_functions(addr)) {
 			if (!trace->skip)
 				trace->entries[trace->nr_entries++] = addr;
@@ -65,7 +64,7 @@
 	register unsigned long sp asm ("15");
 	unsigned long orig_sp, new_sp;
 
-	orig_sp = sp & PSW_ADDR_INSN;
+	orig_sp = sp;
 	new_sp = save_context_stack(trace, orig_sp,
 				    S390_lowcore.panic_stack - PAGE_SIZE,
 				    S390_lowcore.panic_stack, 1);
@@ -86,7 +85,7 @@
 {
 	unsigned long sp, low, high;
 
-	sp = tsk->thread.ksp & PSW_ADDR_INSN;
+	sp = tsk->thread.ksp;
 	low = (unsigned long) task_stack_page(tsk);
 	high = (unsigned long) task_pt_regs(tsk);
 	save_context_stack(trace, sp, low, high, 0);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 5378c3e..293d8b9 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -383,3 +383,4 @@
 SYSCALL(sys_recvmsg,compat_sys_recvmsg)
 SYSCALL(sys_shutdown,sys_shutdown)
 SYSCALL(sys_mlock2,compat_sys_mlock2)
+SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index d69d648..017eb03d 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -32,8 +32,7 @@
 		address = *(unsigned long *)(current->thread.trap_tdb + 24);
 	else
 		address = regs->psw.addr;
-	return (void __user *)
-		((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+	return (void __user *) (address - (regs->int_code >> 16));
 }
 
 static inline void report_user_fault(struct pt_regs *regs, int signr)
@@ -46,7 +45,7 @@
 		return;
 	printk("User process fault: interruption code %04x ilc:%d ",
 	       regs->int_code & 0xffff, regs->int_code >> 17);
-	print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+	print_vma_addr("in ", regs->psw.addr);
 	printk("\n");
 	show_regs(regs);
 }
@@ -69,13 +68,13 @@
 		report_user_fault(regs, si_signo);
         } else {
                 const struct exception_table_entry *fixup;
-                fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		fixup = search_exception_tables(regs->psw.addr);
                 if (fixup)
-			regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+			regs->psw.addr = extable_fixup(fixup);
 		else {
 			enum bug_trap_type btt;
 
-			btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+			btt = report_bug(regs->psw.addr, regs);
 			if (btt == BUG_TRAP_TYPE_WARN)
 				return;
 			die(regs, str);
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5fce52c..5ea5af3 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -29,6 +29,7 @@
 	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select SRCU
+	select KVM_VFIO
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index b3b5534..d42fa38 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 47518a3..d697312 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -116,7 +116,7 @@
 	if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
 		*cr9 &= ~PER_CONTROL_ALTERATION;
 		*cr10 = 0;
-		*cr11 = PSW_ADDR_INSN;
+		*cr11 = -1UL;
 	} else {
 		*cr9 &= ~PER_CONTROL_ALTERATION;
 		*cr9 |= PER_EVENT_STORE;
@@ -159,7 +159,7 @@
 		vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
 		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
 		vcpu->arch.sie_block->gcr[10] = 0;
-		vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+		vcpu->arch.sie_block->gcr[11] = -1UL;
 	}
 
 	if (guestdbg_hw_bp_enabled(vcpu)) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 835d60b..4af21c7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1423,44 +1423,18 @@
 	return 0;
 }
 
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination.  Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
-	dst->fpc = current->thread.fpu.fpc;
-	dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
-	current->thread.fpu.fpc = from->fpc;
-	current->thread.fpu.regs = from->regs;
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	/* Save host register state */
 	save_fpu_regs();
-	save_fpu_to(&vcpu->arch.host_fpregs);
+	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
 
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-		/*
-		 * Use the register save area in the SIE-control block
-		 * for register restore and save in kvm_arch_vcpu_put()
-		 */
-		current->thread.fpu.vxrs =
-			(__vector128 *)&vcpu->run->s.regs.vrs;
-	} else
-		load_fpu_from(&vcpu->arch.guest_fpregs);
-
+	/* Depending on MACHINE_HAS_VX, data stored to vrs either
+	 * has vector register or floating point register format.
+	 */
+	current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
 	if (test_fp_ctl(current->thread.fpu.fpc))
 		/* User space provided an invalid FPC, let's clear it */
 		current->thread.fpu.fpc = 0;
@@ -1476,19 +1450,13 @@
 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
 
+	/* Save guest register state */
 	save_fpu_regs();
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 
-	if (test_kvm_facility(vcpu->kvm, 129))
-		/*
-		 * kvm_arch_vcpu_load() set up the register save area to
-		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
-		 * are already saved.  Only the floating-point control must be
-		 * copied.
-		 */
-		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-	else
-		save_fpu_to(&vcpu->arch.guest_fpregs);
-	load_fpu_from(&vcpu->arch.host_fpregs);
+	/* Restore host register state */
+	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
 
 	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
@@ -1506,8 +1474,9 @@
 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
-	vcpu->arch.guest_fpregs.fpc = 0;
-	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+	/* make sure the new fpc will be lazily loaded */
+	save_fpu_regs();
+	current->thread.fpu.fpc = 0;
 	vcpu->arch.sie_block->gbea = 1;
 	vcpu->arch.sie_block->pp = 0;
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1648,17 +1617,6 @@
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
-	/*
-	 * Allocate a save area for floating-point registers.  If the vector
-	 * extension is available, register contents are saved in the SIE
-	 * control block.  The allocated save area is still required in
-	 * particular places, for example, in kvm_s390_vcpu_store_status().
-	 */
-	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-					       GFP_KERNEL);
-	if (!vcpu->arch.guest_fpregs.fprs)
-		goto out_free_sie_block;
-
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 		goto out_free_sie_block;
@@ -1879,19 +1837,27 @@
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
+	/* make sure the new values will be lazily loaded */
+	save_fpu_regs();
 	if (test_fp_ctl(fpu->fpc))
 		return -EINVAL;
-	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-	save_fpu_regs();
-	load_fpu_from(&vcpu->arch.guest_fpregs);
+	current->thread.fpu.fpc = fpu->fpc;
+	if (MACHINE_HAS_VX)
+		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+	else
+		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
-	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+	/* make sure we have the latest values */
+	save_fpu_regs();
+	if (MACHINE_HAS_VX)
+		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+	else
+		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+	fpu->fpc = current->thread.fpu.fpc;
 	return 0;
 }
 
@@ -2396,6 +2362,7 @@
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 {
 	unsigned char archmode = 1;
+	freg_t fprs[NUM_FPRS];
 	unsigned int px;
 	u64 clkcomp;
 	int rc;
@@ -2411,8 +2378,16 @@
 		gpa = px;
 	} else
 		gpa -= __LC_FPREGS_SAVE_AREA;
-	rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
-			     vcpu->arch.guest_fpregs.fprs, 128);
+
+	/* manually convert vector registers if necessary */
+	if (MACHINE_HAS_VX) {
+		convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+				     fprs, 128);
+	} else {
+		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+				     vcpu->run->s.regs.vrs, 128);
+	}
 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
 			      vcpu->run->s.regs.gprs, 128);
 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2420,7 +2395,7 @@
 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
 			      &px, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
-			      &vcpu->arch.guest_fpregs.fpc, 4);
+			      &vcpu->run->s.regs.fpc, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
 			      &vcpu->arch.sie_block->todpr, 4);
 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2443,19 +2418,7 @@
 	 * it into the save area
 	 */
 	save_fpu_regs();
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		/*
-		 * If the vector extension is available, the vector registers
-		 * which overlaps with floating-point registers are saved in
-		 * the SIE-control block.  Hence, extract the floating-point
-		 * registers and the FPC value and store them in the
-		 * guest_fpregs structure.
-		 */
-		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
-		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
-				 current->thread.fpu.vxrs);
-	} else
-		save_fpu_to(&vcpu->arch.guest_fpregs);
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 	return kvm_s390_store_status_unloaded(vcpu, addr);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 1b903f6..791a414 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -228,7 +228,7 @@
 		return;
 	printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
 	       regs->int_code & 0xffff, regs->int_code >> 17);
-	print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+	print_vma_addr(KERN_CONT "in ", regs->psw.addr);
 	printk(KERN_CONT "\n");
 	printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
 	       regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
@@ -256,9 +256,9 @@
 	const struct exception_table_entry *fixup;
 
 	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+	fixup = search_exception_tables(regs->psw.addr);
 	if (fixup) {
-		regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+		regs->psw.addr = extable_fixup(fixup);
 		return;
 	}
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c722400..73e2903 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -98,7 +98,7 @@
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
 	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
-	arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+	__arch_local_irq_stosm(0x04);
 
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index ea01477..45c4daa 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -169,12 +169,12 @@
 
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
-	if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+	if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE)
 		return 0;
 	if (!(flags & MAP_FIXED))
 		addr = 0;
 	if ((addr + len) >= TASK_SIZE)
-		return crst_table_upgrade(current->mm, 1UL << 53);
+		return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
 	return 0;
 }
 
@@ -189,9 +189,9 @@
 	area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
 	if (!(area & ~PAGE_MASK))
 		return area;
-	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
 		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm, 1UL << 53);
+		rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
 		if (rc)
 			return (unsigned long) rc;
 		area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -211,9 +211,9 @@
 	area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
 	if (!(area & ~PAGE_MASK))
 		return area;
-	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
 		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm, 1UL << 53);
+		rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
 		if (rc)
 			return (unsigned long) rc;
 		area = arch_get_unmapped_area_topdown(filp, addr, len,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a809fa8..5109827 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -55,7 +55,7 @@
 	unsigned long entry;
 	int flush;
 
-	BUG_ON(limit > (1UL << 53));
+	BUG_ON(limit > TASK_MAX_SIZE);
 	flush = 0;
 repeat:
 	table = crst_table_alloc(mm);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 43f32ce..2794845 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -57,9 +57,7 @@
 {
 	pg_data_t *res;
 
-	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
-	if (!res)
-		panic("Could not allocate memory for node data!\n");
+	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
 	memset(res, 0, sizeof(pg_data_t));
 	return res;
 }
@@ -162,7 +160,7 @@
 		register_one_node(nid);
 	return 0;
 }
-device_initcall(numa_init_late);
+arch_initcall(numa_init_late);
 
 static int __init parse_debug(char *parm)
 {
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
index 8a6811b..fe0bfe3 100644
--- a/arch/s390/oprofile/backtrace.c
+++ b/arch/s390/oprofile/backtrace.c
@@ -16,24 +16,23 @@
 	struct pt_regs *regs;
 
 	while (*depth) {
-		sp = sp & PSW_ADDR_INSN;
 		if (sp < low || sp > high - sizeof(*sf))
 			return sp;
 		sf = (struct stack_frame *) sp;
 		(*depth)--;
-		oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+		oprofile_add_trace(sf->gprs[8]);
 
 		/* Follow the backchain.  */
 		while (*depth) {
 			low = sp;
-			sp = sf->back_chain & PSW_ADDR_INSN;
+			sp = sf->back_chain;
 			if (!sp)
 				break;
 			if (sp <= low || sp > high - sizeof(*sf))
 				return sp;
 			sf = (struct stack_frame *) sp;
 			(*depth)--;
-			oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+			oprofile_add_trace(sf->gprs[8]);
 
 		}
 
@@ -46,7 +45,7 @@
 			return sp;
 		regs = (struct pt_regs *) sp;
 		(*depth)--;
-		oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+		oprofile_add_trace(sf->gprs[8]);
 		low = sp;
 		sp = regs->gprs[15];
 	}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 11d4f27..8f19c8f 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -68,9 +68,12 @@
 	.isc = PCI_ISC,
 };
 
-/* I/O Map */
+#define ZPCI_IOMAP_ENTRIES						\
+	min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT),	\
+	    ZPCI_IOMAP_MAX_ENTRIES)
+
 static DEFINE_SPINLOCK(zpci_iomap_lock);
-static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
 EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
@@ -265,27 +268,20 @@
 			      unsigned long max)
 {
 	struct zpci_dev *zdev =	to_zpci(pdev);
-	u64 addr;
 	int idx;
 
-	if ((bar & 7) != bar)
+	if (!pci_resource_len(pdev, bar))
 		return NULL;
 
 	idx = zdev->bars[bar].map_idx;
 	spin_lock(&zpci_iomap_lock);
-	if (zpci_iomap_start[idx].count++) {
-		BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
-		       zpci_iomap_start[idx].bar != bar);
-	} else {
-		zpci_iomap_start[idx].fh = zdev->fh;
-		zpci_iomap_start[idx].bar = bar;
-	}
 	/* Detect overrun */
-	BUG_ON(!zpci_iomap_start[idx].count);
+	WARN_ON(!++zpci_iomap_start[idx].count);
+	zpci_iomap_start[idx].fh = zdev->fh;
+	zpci_iomap_start[idx].bar = bar;
 	spin_unlock(&zpci_iomap_lock);
 
-	addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
-	return (void __iomem *) addr + offset;
+	return (void __iomem *) ZPCI_ADDR(idx) + offset;
 }
 EXPORT_SYMBOL(pci_iomap_range);
 
@@ -297,12 +293,11 @@
 
 void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
 {
-	unsigned int idx;
+	unsigned int idx = ZPCI_IDX(addr);
 
-	idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
 	spin_lock(&zpci_iomap_lock);
 	/* Detect underrun */
-	BUG_ON(!zpci_iomap_start[idx].count);
+	WARN_ON(!zpci_iomap_start[idx].count);
 	if (!--zpci_iomap_start[idx].count) {
 		zpci_iomap_start[idx].fh = 0;
 		zpci_iomap_start[idx].bar = 0;
@@ -544,15 +539,15 @@
 
 static int zpci_alloc_iomap(struct zpci_dev *zdev)
 {
-	int entry;
+	unsigned long entry;
 
 	spin_lock(&zpci_iomap_lock);
-	entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
-	if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+	entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+	if (entry == ZPCI_IOMAP_ENTRIES) {
 		spin_unlock(&zpci_iomap_lock);
 		return -ENOSPC;
 	}
-	set_bit(entry, zpci_iomap);
+	set_bit(entry, zpci_iomap_bitmap);
 	spin_unlock(&zpci_iomap_lock);
 	return entry;
 }
@@ -561,7 +556,7 @@
 {
 	spin_lock(&zpci_iomap_lock);
 	memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
-	clear_bit(entry, zpci_iomap);
+	clear_bit(entry, zpci_iomap_bitmap);
 	spin_unlock(&zpci_iomap_lock);
 }
 
@@ -611,8 +606,7 @@
 		if (zdev->bars[i].val & 4)
 			flags |= IORESOURCE_MEM_64;
 
-		addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
-
+		addr = ZPCI_ADDR(entry);
 		size = 1UL << zdev->bars[i].size;
 
 		res = __alloc_res(zdev, addr, size, flags);
@@ -873,23 +867,30 @@
 	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
 				16, 0, NULL);
 	if (!zdev_fmb_cache)
-		goto error_zdev;
+		goto error_fmb;
 
-	/* TODO: use realloc */
-	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
-				   GFP_KERNEL);
+	zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+				   sizeof(*zpci_iomap_start), GFP_KERNEL);
 	if (!zpci_iomap_start)
 		goto error_iomap;
-	return 0;
 
+	zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+				    sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+	if (!zpci_iomap_bitmap)
+		goto error_iomap_bitmap;
+
+	return 0;
+error_iomap_bitmap:
+	kfree(zpci_iomap_start);
 error_iomap:
 	kmem_cache_destroy(zdev_fmb_cache);
-error_zdev:
+error_fmb:
 	return -ENOMEM;
 }
 
 static void zpci_mem_exit(void)
 {
+	kfree(zpci_iomap_bitmap);
 	kfree(zpci_iomap_start);
 	kmem_cache_destroy(zdev_fmb_cache);
 }
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 369a3e0..b0e0475 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -53,6 +53,11 @@
 
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
 	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+	if (!pdev)
+		return;
+
+	pdev->error_state = pci_channel_io_perm_failure;
 }
 
 void zpci_event_error(void *data)
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index f887c64..8a84e05 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -33,7 +33,6 @@
 #endif
 
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
 
 #include <asm-generic/barrier.h>
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1699df5..888a7fe 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -70,6 +70,18 @@
 	return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
 }
 
+static inline unsigned get_max_io_size(struct request_queue *q,
+				       struct bio *bio)
+{
+	unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+	unsigned mask = queue_logical_block_size(q) - 1;
+
+	/* aligned to logical block size */
+	sectors &= ~(mask >> 9);
+
+	return sectors;
+}
+
 static struct bio *blk_bio_segment_split(struct request_queue *q,
 					 struct bio *bio,
 					 struct bio_set *bs,
@@ -81,6 +93,7 @@
 	unsigned front_seg_size = bio->bi_seg_front_size;
 	bool do_split = true;
 	struct bio *new = NULL;
+	const unsigned max_sectors = get_max_io_size(q, bio);
 
 	bio_for_each_segment(bv, bio, iter) {
 		/*
@@ -90,20 +103,19 @@
 		if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
 			goto split;
 
-		if (sectors + (bv.bv_len >> 9) >
-				blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+		if (sectors + (bv.bv_len >> 9) > max_sectors) {
 			/*
 			 * Consider this a new segment if we're splitting in
 			 * the middle of this vector.
 			 */
 			if (nsegs < queue_max_segments(q) &&
-			    sectors < blk_max_size_offset(q,
-						bio->bi_iter.bi_sector)) {
+			    sectors < max_sectors) {
 				nsegs++;
-				sectors = blk_max_size_offset(q,
-						bio->bi_iter.bi_sector);
+				sectors = max_sectors;
 			}
-			goto split;
+			if (sectors)
+				goto split;
+			/* Make this single bvec as the 1st segment */
 		}
 
 		if (bvprvp && blk_queue_cluster(q)) {
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 90e2d54..1316ddd 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -135,14 +135,6 @@
 		DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"),
 		},
 	},
-	{
-	.callback = video_detect_force_vendor,
-	.ident = "Dell Inspiron 5737",
-	.matches = {
-		DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5737"),
-		},
-	},
 
 	/*
 	 * These models have a working acpi_video backlight control, and using
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 6ac9a7f..784dbe8 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -162,7 +162,7 @@
 
 /**
  * genpd_queue_power_off_work - Queue up the execution of genpd_poweroff().
- * @genpd: PM domait to power off.
+ * @genpd: PM domain to power off.
  *
  * Queue up the execution of genpd_poweroff() unless it's already been done
  * before.
@@ -172,16 +172,15 @@
 	queue_work(pm_wq, &genpd->power_off_work);
 }
 
-static int genpd_poweron(struct generic_pm_domain *genpd);
-
 /**
  * __genpd_poweron - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
+ * @depth: nesting count for lockdep.
  *
  * Restore power to @genpd and all of its masters so that it is possible to
  * resume a device belonging to it.
  */
-static int __genpd_poweron(struct generic_pm_domain *genpd)
+static int __genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 {
 	struct gpd_link *link;
 	int ret = 0;
@@ -196,11 +195,16 @@
 	 * with it.
 	 */
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
-		genpd_sd_counter_inc(link->master);
+		struct generic_pm_domain *master = link->master;
 
-		ret = genpd_poweron(link->master);
+		genpd_sd_counter_inc(master);
+
+		mutex_lock_nested(&master->lock, depth + 1);
+		ret = __genpd_poweron(master, depth + 1);
+		mutex_unlock(&master->lock);
+
 		if (ret) {
-			genpd_sd_counter_dec(link->master);
+			genpd_sd_counter_dec(master);
 			goto err;
 		}
 	}
@@ -232,11 +236,12 @@
 	int ret;
 
 	mutex_lock(&genpd->lock);
-	ret = __genpd_poweron(genpd);
+	ret = __genpd_poweron(genpd, 0);
 	mutex_unlock(&genpd->lock);
 	return ret;
 }
 
+
 static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
 {
 	return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
@@ -484,7 +489,7 @@
 	}
 
 	mutex_lock(&genpd->lock);
-	ret = __genpd_poweron(genpd);
+	ret = __genpd_poweron(genpd, 0);
 	mutex_unlock(&genpd->lock);
 
 	if (ret)
@@ -1339,8 +1344,8 @@
 	if (!link)
 		return -ENOMEM;
 
-	mutex_lock(&genpd->lock);
-	mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
+	mutex_lock(&subdomain->lock);
+	mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
 	if (genpd->status == GPD_STATE_POWER_OFF
 	    &&  subdomain->status != GPD_STATE_POWER_OFF) {
@@ -1363,8 +1368,8 @@
 		genpd_sd_counter_inc(genpd);
 
  out:
-	mutex_unlock(&subdomain->lock);
 	mutex_unlock(&genpd->lock);
+	mutex_unlock(&subdomain->lock);
 	if (ret)
 		kfree(link);
 	return ret;
@@ -1385,7 +1390,8 @@
 	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain))
 		return -EINVAL;
 
-	mutex_lock(&genpd->lock);
+	mutex_lock(&subdomain->lock);
+	mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING);
 
 	if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
 		pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
@@ -1398,22 +1404,19 @@
 		if (link->slave != subdomain)
 			continue;
 
-		mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING);
-
 		list_del(&link->master_node);
 		list_del(&link->slave_node);
 		kfree(link);
 		if (subdomain->status != GPD_STATE_POWER_OFF)
 			genpd_sd_counter_dec(genpd);
 
-		mutex_unlock(&subdomain->lock);
-
 		ret = 0;
 		break;
 	}
 
 out:
 	mutex_unlock(&genpd->lock);
+	mutex_unlock(&subdomain->lock);
 
 	return ret;
 }
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 9bc37c4..0ca74d0 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -142,15 +142,16 @@
 
 try_again:
 	cpu_reg = regulator_get_optional(cpu_dev, reg);
-	if (IS_ERR(cpu_reg)) {
+	ret = PTR_ERR_OR_ZERO(cpu_reg);
+	if (ret) {
 		/*
 		 * If cpu's regulator supply node is present, but regulator is
 		 * not yet registered, we should try defering probe.
 		 */
-		if (PTR_ERR(cpu_reg) == -EPROBE_DEFER) {
+		if (ret == -EPROBE_DEFER) {
 			dev_dbg(cpu_dev, "cpu%d regulator not ready, retry\n",
 				cpu);
-			return -EPROBE_DEFER;
+			return ret;
 		}
 
 		/* Try with "cpu-supply" */
@@ -159,18 +160,16 @@
 			goto try_again;
 		}
 
-		dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
-			cpu, PTR_ERR(cpu_reg));
+		dev_dbg(cpu_dev, "no regulator for cpu%d: %d\n", cpu, ret);
 	}
 
 	cpu_clk = clk_get(cpu_dev, NULL);
-	if (IS_ERR(cpu_clk)) {
+	ret = PTR_ERR_OR_ZERO(cpu_clk);
+	if (ret) {
 		/* put regulator */
 		if (!IS_ERR(cpu_reg))
 			regulator_put(cpu_reg);
 
-		ret = PTR_ERR(cpu_clk);
-
 		/*
 		 * If cpu's clk node is present, but clock is not yet
 		 * registered, we should try defering probe.
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c35e7da..e979ec7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -48,11 +48,11 @@
 					  bool active)
 {
 	do {
-		policy = list_next_entry(policy, policy_list);
-
 		/* No more policies in the list */
-		if (&policy->policy_list == &cpufreq_policy_list)
+		if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
 			return NULL;
+
+		policy = list_next_entry(policy, policy_list);
 	} while (!suitable_policy(policy, active));
 
 	return policy;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index bab3a51..e0d1110 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -387,16 +387,18 @@
 	if (!have_governor_per_policy())
 		cdata->gdbs_data = dbs_data;
 
+	policy->governor_data = dbs_data;
+
 	ret = sysfs_create_group(get_governor_parent_kobj(policy),
 				 get_sysfs_attr(dbs_data));
 	if (ret)
 		goto reset_gdbs_data;
 
-	policy->governor_data = dbs_data;
-
 	return 0;
 
 reset_gdbs_data:
+	policy->governor_data = NULL;
+
 	if (!have_governor_per_policy())
 		cdata->gdbs_data = NULL;
 	cdata->exit(dbs_data, !policy->governor->initialized);
@@ -417,16 +419,19 @@
 	if (!cdbs->shared || cdbs->shared->policy)
 		return -EBUSY;
 
-	policy->governor_data = NULL;
 	if (!--dbs_data->usage_count) {
 		sysfs_remove_group(get_governor_parent_kobj(policy),
 				   get_sysfs_attr(dbs_data));
 
+		policy->governor_data = NULL;
+
 		if (!have_governor_per_policy())
 			cdata->gdbs_data = NULL;
 
 		cdata->exit(dbs_data, policy->governor->initialized == 1);
 		kfree(dbs_data);
+	} else {
+		policy->governor_data = NULL;
 	}
 
 	free_common_dbs_info(policy, cdata);
diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 1d99c97..0963772 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c
@@ -202,7 +202,7 @@
 	}
 }
 #else
-static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq)
+static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq)
 {
 	return 0;
 }
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 344058f..d5657d5 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -119,7 +119,6 @@
 
 #define CPUIDLE_COUPLED_NOT_IDLE	(-1)
 
-static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 046423b..f996efc 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -153,7 +153,7 @@
 	 * be frozen safely.
 	 */
 	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
-	if (index >= 0)
+	if (index > 0)
 		enter_freeze_proper(drv, dev, index);
 
 	return index;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 313b0cc..82edf95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2278,60 +2278,60 @@
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
 
 #define amdgpu_dpm_get_temperature(adev) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
-	      (adev)->pm.funcs->get_temperature((adev))
+	      (adev)->pm.funcs->get_temperature((adev)))
 
 #define amdgpu_dpm_set_fan_control_mode(adev, m) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
-	      (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+	      (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
 
 #define amdgpu_dpm_get_fan_control_mode(adev) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
-	      (adev)->pm.funcs->get_fan_control_mode((adev))
+	      (adev)->pm.funcs->get_fan_control_mode((adev)))
 
 #define amdgpu_dpm_set_fan_speed_percent(adev, s) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-	      (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+	      (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_fan_speed_percent(adev, s) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
-	      (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+	      (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
 
 #define amdgpu_dpm_get_sclk(adev, l) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
-		(adev)->pm.funcs->get_sclk((adev), (l))
+		(adev)->pm.funcs->get_sclk((adev), (l)))
 
 #define amdgpu_dpm_get_mclk(adev, l)  \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
-	      (adev)->pm.funcs->get_mclk((adev), (l))
+	      (adev)->pm.funcs->get_mclk((adev), (l)))
 
 
 #define amdgpu_dpm_force_performance_level(adev, l) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
-	      (adev)->pm.funcs->force_performance_level((adev), (l))
+	      (adev)->pm.funcs->force_performance_level((adev), (l)))
 
 #define amdgpu_dpm_powergate_uvd(adev, g) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
-	      (adev)->pm.funcs->powergate_uvd((adev), (g))
+	      (adev)->pm.funcs->powergate_uvd((adev), (g)))
 
 #define amdgpu_dpm_powergate_vce(adev, g) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
-	      (adev)->pm.funcs->powergate_vce((adev), (g))
+	      (adev)->pm.funcs->powergate_vce((adev), (g)))
 
 #define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
-	(adev)->pp_enabled ?						\
+	((adev)->pp_enabled ?						\
 	      (adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
-	      (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+	      (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
 
 #define amdgpu_dpm_get_current_power_state(adev) \
 	(adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6f89f8e..b882e81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -478,9 +478,9 @@
 	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 	unsigned i;
 
-	amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
 	if (!error) {
+		amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
 		/* Sort the buffer list from the smallest to largest buffer,
 		 * which affects the order of buffers in the LRU list.
 		 * This assures that the smallest buffers are added first
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index cfb6caa..9191467 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -333,6 +333,10 @@
 	if (!adev->mode_info.mode_config_initialized)
 		return 0;
 
+	/* don't init fbdev if there are no connectors */
+	if (list_empty(&adev->ddev->mode_config.connector_list))
+		return 0;
+
 	/* select 8 bpp console on low vram cards */
 	if (adev->mc.real_vram_size <= (32*1024*1024))
 		bpp_sel = 8;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c3ce103..a2a16ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -399,7 +399,8 @@
 		}
 		if (fpfn > bo->placements[i].fpfn)
 			bo->placements[i].fpfn = fpfn;
-		if (lpfn && lpfn < bo->placements[i].lpfn)
+		if (!bo->placements[i].lpfn ||
+		    (lpfn && lpfn < bo->placements[i].lpfn))
 			bo->placements[i].lpfn = lpfn;
 		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 5ee9a06..b9d0d55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -99,13 +99,24 @@
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 	switch (adev->asic_type) {
-		case CHIP_TONGA:
-		case CHIP_FIJI:
-			adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-			break;
-		default:
-			adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
-			break;
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+		adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+		break;
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+		break;
+	/* These chips don't have powerplay implemenations */
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+	case CHIP_KAVERI:
+	case CHIP_TOPAZ:
+	default:
+		adev->pp_enabled = false;
+		break;
 	}
 #else
 	adev->pp_enabled = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 78e9b0f..d1f234d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -487,7 +487,7 @@
 	seq_printf(m, "rptr: 0x%08x [%5d]\n",
 		   rptr, rptr);
 
-	rptr_next = ~0;
+	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
 	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
 		   ring->wptr, ring->wptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index aefc668..9599f75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1282,7 +1282,7 @@
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT * 8);
-	unsigned pd_size, pd_entries, pts_size;
+	unsigned pd_size, pd_entries;
 	int i, r;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
 	/* allocate page table array */
-	pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
-	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+	vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
 	if (vm->page_tables == NULL) {
 		DRM_ERROR("Cannot allocate memory for page table array\n");
 		return -ENOMEM;
@@ -1361,7 +1360,7 @@
 
 	for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
 		amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
-	kfree(vm->page_tables);
+	drm_free_large(vm->page_tables);
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 13235d8..95c0cdf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4186,7 +4186,18 @@
 		gfx_v8_0_cp_gfx_enable(adev, false);
 
 		/* Disable MEC parsing/prefetching */
-		/* XXX todo */
+		gfx_v8_0_cp_compute_enable(adev, false);
+
+		if (grbm_soft_reset || srbm_soft_reset) {
+			tmp = RREG32(mmGMCON_DEBUG);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_STALL, 1);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_CLEAR, 1);
+			WREG32(mmGMCON_DEBUG, tmp);
+
+			udelay(50);
+		}
 
 		if (grbm_soft_reset) {
 			tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4226,16 @@
 			WREG32(mmSRBM_SOFT_RESET, tmp);
 			tmp = RREG32(mmSRBM_SOFT_RESET);
 		}
+
+		if (grbm_soft_reset || srbm_soft_reset) {
+			tmp = RREG32(mmGMCON_DEBUG);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_STALL, 0);
+			tmp = REG_SET_FIELD(tmp,
+					    GMCON_DEBUG, GFX_CLEAR, 0);
+			WREG32(mmGMCON_DEBUG, tmp);
+		}
+
 		/* Wait a little for things to settle down */
 		udelay(50);
 		gfx_v8_0_print_status((void *)adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
index f4a1346..0497784 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
@@ -122,25 +122,12 @@
 
 static int tonga_dpm_suspend(void *handle)
 {
-	return 0;
+	return tonga_dpm_hw_fini(handle);
 }
 
 static int tonga_dpm_resume(void *handle)
 {
-	int ret;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	mutex_lock(&adev->pm.mutex);
-
-	ret = tonga_smu_start(adev);
-	if (ret) {
-		DRM_ERROR("SMU start failed\n");
-		goto fail;
-	}
-
-fail:
-	mutex_unlock(&adev->pm.mutex);
-	return ret;
+	return tonga_dpm_hw_init(handle);
 }
 
 static int tonga_dpm_set_clockgating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8f5d5ed..aa67244 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -64,6 +64,11 @@
 	if (ret == 0)
 		ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 
+	if (ret)
+		printk("amdgpu: powerplay initialization failed\n");
+	else
+		printk("amdgpu: powerplay initialized\n");
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 873a8d2..ec222c6 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -272,6 +272,9 @@
 				UCODE_ID_CP_MEC_JT1_MASK |
 				UCODE_ID_CP_MEC_JT2_MASK;
 
+	if (smumgr->chip_id == CHIP_STONEY)
+		fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
 	cz_request_smu_load_fw(smumgr);
 	cz_check_fw_load_finish(smumgr, fw_to_check);
 
@@ -282,7 +285,7 @@
 	return ret;
 }
 
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
 			enum cz_scratch_entry firmware_enum)
 {
 	uint8_t ret = 0;
@@ -292,7 +295,10 @@
 		ret = UCODE_ID_SDMA0;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
-		ret = UCODE_ID_SDMA1;
+		if (smumgr->chip_id == CHIP_STONEY)
+			ret = UCODE_ID_SDMA0;
+		else
+			ret = UCODE_ID_SDMA1;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
 		ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@
 		ret = UCODE_ID_CP_MEC_JT1;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
-		ret = UCODE_ID_CP_MEC_JT2;
+		if (smumgr->chip_id == CHIP_STONEY)
+			ret = UCODE_ID_CP_MEC_JT1;
+		else
+			ret = UCODE_ID_CP_MEC_JT2;
 		break;
 	case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
 		ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@
 	struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
 	task->type = type;
-	task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+	task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
 	task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
 	for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@
 	struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
 
 	task->type = TASK_TYPE_UCODE_LOAD;
-	task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+	task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
 	task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
 
 	for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
 
@@ -551,7 +566,11 @@
 
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
-	cz_smu_populate_single_ucode_load_task(smumgr,
+	if (smumgr->chip_id == CHIP_STONEY)
+		cz_smu_populate_single_ucode_load_task(smumgr,
+				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+	else
+		cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
 	cz_smu_populate_single_ucode_load_task(smumgr,
 				CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@
 
 	for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
 
-		firmware_type = cz_translate_firmware_enum_to_arg(
+		firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
 					firmware_list[i]);
 
 		ucode_id = cz_convert_fw_type_to_cgs(firmware_type);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 57cccd6..7c52306 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -946,9 +946,23 @@
 	}
 }
 
-static bool framebuffer_changed(struct drm_device *dev,
-				struct drm_atomic_state *old_state,
-				struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update.  This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+					   struct drm_atomic_state *old_state,
+					   struct drm_crtc *crtc)
 {
 	struct drm_plane *plane;
 	struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@
 
 	return false;
 }
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
 
 /**
  * drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@
 		if (old_state->legacy_cursor_update)
 			continue;
 
-		if (!framebuffer_changed(dev, old_state, crtc))
+		if (!drm_atomic_helper_framebuffer_changed(dev,
+				old_state, crtc))
 			continue;
 
 		ret = drm_crtc_vblank_get(crtc);
diff --git a/drivers/gpu/drm/etnaviv/common.xml.h b/drivers/gpu/drm/etnaviv/common.xml.h
index 9e585d5..e881482 100644
--- a/drivers/gpu/drm/etnaviv/common.xml.h
+++ b/drivers/gpu/drm/etnaviv/common.xml.h
@@ -8,8 +8,8 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18379 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -30,15 +30,19 @@
 #define ENDIAN_MODE_NO_SWAP					0x00000000
 #define ENDIAN_MODE_SWAP_16					0x00000001
 #define ENDIAN_MODE_SWAP_32					0x00000002
+#define chipModel_GC200						0x00000200
 #define chipModel_GC300						0x00000300
 #define chipModel_GC320						0x00000320
+#define chipModel_GC328						0x00000328
 #define chipModel_GC350						0x00000350
 #define chipModel_GC355						0x00000355
 #define chipModel_GC400						0x00000400
 #define chipModel_GC410						0x00000410
 #define chipModel_GC420						0x00000420
+#define chipModel_GC428						0x00000428
 #define chipModel_GC450						0x00000450
 #define chipModel_GC500						0x00000500
+#define chipModel_GC520						0x00000520
 #define chipModel_GC530						0x00000530
 #define chipModel_GC600						0x00000600
 #define chipModel_GC700						0x00000700
@@ -46,9 +50,16 @@
 #define chipModel_GC860						0x00000860
 #define chipModel_GC880						0x00000880
 #define chipModel_GC1000					0x00001000
+#define chipModel_GC1500					0x00001500
 #define chipModel_GC2000					0x00002000
 #define chipModel_GC2100					0x00002100
+#define chipModel_GC2200					0x00002200
+#define chipModel_GC2500					0x00002500
+#define chipModel_GC3000					0x00003000
 #define chipModel_GC4000					0x00004000
+#define chipModel_GC5000					0x00005000
+#define chipModel_GC5200					0x00005200
+#define chipModel_GC6400					0x00006400
 #define RGBA_BITS_R						0x00000001
 #define RGBA_BITS_G						0x00000002
 #define RGBA_BITS_B						0x00000004
@@ -160,7 +171,7 @@
 #define chipMinorFeatures2_UNK8					0x00000100
 #define chipMinorFeatures2_UNK9					0x00000200
 #define chipMinorFeatures2_UNK10				0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16			0x00000800
+#define chipMinorFeatures2_HALTI1				0x00000800
 #define chipMinorFeatures2_UNK12				0x00001000
 #define chipMinorFeatures2_UNK13				0x00002000
 #define chipMinorFeatures2_UNK14				0x00004000
@@ -189,7 +200,7 @@
 #define chipMinorFeatures3_UNK5					0x00000020
 #define chipMinorFeatures3_UNK6					0x00000040
 #define chipMinorFeatures3_UNK7					0x00000080
-#define chipMinorFeatures3_UNK8					0x00000100
+#define chipMinorFeatures3_FAST_MSAA				0x00000100
 #define chipMinorFeatures3_UNK9					0x00000200
 #define chipMinorFeatures3_BUG_FIXES10				0x00000400
 #define chipMinorFeatures3_UNK11				0x00000800
@@ -199,7 +210,7 @@
 #define chipMinorFeatures3_UNK15				0x00008000
 #define chipMinorFeatures3_UNK16				0x00010000
 #define chipMinorFeatures3_UNK17				0x00020000
-#define chipMinorFeatures3_UNK18				0x00040000
+#define chipMinorFeatures3_ACE					0x00040000
 #define chipMinorFeatures3_UNK19				0x00080000
 #define chipMinorFeatures3_UNK20				0x00100000
 #define chipMinorFeatures3_UNK21				0x00200000
@@ -207,7 +218,7 @@
 #define chipMinorFeatures3_UNK23				0x00800000
 #define chipMinorFeatures3_UNK24				0x01000000
 #define chipMinorFeatures3_UNK25				0x02000000
-#define chipMinorFeatures3_UNK26				0x04000000
+#define chipMinorFeatures3_NEW_HZ				0x04000000
 #define chipMinorFeatures3_UNK27				0x08000000
 #define chipMinorFeatures3_UNK28				0x10000000
 #define chipMinorFeatures3_UNK29				0x20000000
@@ -229,9 +240,9 @@
 #define chipMinorFeatures4_UNK13				0x00002000
 #define chipMinorFeatures4_UNK14				0x00004000
 #define chipMinorFeatures4_UNK15				0x00008000
-#define chipMinorFeatures4_UNK16				0x00010000
+#define chipMinorFeatures4_HALTI2				0x00010000
 #define chipMinorFeatures4_UNK17				0x00020000
-#define chipMinorFeatures4_UNK18				0x00040000
+#define chipMinorFeatures4_SMALL_MSAA				0x00040000
 #define chipMinorFeatures4_UNK19				0x00080000
 #define chipMinorFeatures4_UNK20				0x00100000
 #define chipMinorFeatures4_UNK21				0x00200000
@@ -245,5 +256,37 @@
 #define chipMinorFeatures4_UNK29				0x20000000
 #define chipMinorFeatures4_UNK30				0x40000000
 #define chipMinorFeatures4_UNK31				0x80000000
+#define chipMinorFeatures5_UNK0					0x00000001
+#define chipMinorFeatures5_UNK1					0x00000002
+#define chipMinorFeatures5_UNK2					0x00000004
+#define chipMinorFeatures5_UNK3					0x00000008
+#define chipMinorFeatures5_UNK4					0x00000010
+#define chipMinorFeatures5_UNK5					0x00000020
+#define chipMinorFeatures5_UNK6					0x00000040
+#define chipMinorFeatures5_UNK7					0x00000080
+#define chipMinorFeatures5_UNK8					0x00000100
+#define chipMinorFeatures5_HALTI3				0x00000200
+#define chipMinorFeatures5_UNK10				0x00000400
+#define chipMinorFeatures5_UNK11				0x00000800
+#define chipMinorFeatures5_UNK12				0x00001000
+#define chipMinorFeatures5_UNK13				0x00002000
+#define chipMinorFeatures5_UNK14				0x00004000
+#define chipMinorFeatures5_UNK15				0x00008000
+#define chipMinorFeatures5_UNK16				0x00010000
+#define chipMinorFeatures5_UNK17				0x00020000
+#define chipMinorFeatures5_UNK18				0x00040000
+#define chipMinorFeatures5_UNK19				0x00080000
+#define chipMinorFeatures5_UNK20				0x00100000
+#define chipMinorFeatures5_UNK21				0x00200000
+#define chipMinorFeatures5_UNK22				0x00400000
+#define chipMinorFeatures5_UNK23				0x00800000
+#define chipMinorFeatures5_UNK24				0x01000000
+#define chipMinorFeatures5_UNK25				0x02000000
+#define chipMinorFeatures5_UNK26				0x04000000
+#define chipMinorFeatures5_UNK27				0x08000000
+#define chipMinorFeatures5_UNK28				0x10000000
+#define chipMinorFeatures5_UNK29				0x20000000
+#define chipMinorFeatures5_UNK30				0x40000000
+#define chipMinorFeatures5_UNK31				0x80000000
 
 #endif /* COMMON_XML */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 5c89ebb..e885898 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -668,7 +668,6 @@
 	.probe      = etnaviv_pdev_probe,
 	.remove     = etnaviv_pdev_remove,
 	.driver     = {
-		.owner  = THIS_MODULE,
 		.name   = "etnaviv",
 		.of_match_table = dt_match,
 	},
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index d6bd438..1cd6046 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -85,7 +85,7 @@
 	struct dma_buf_attachment *attach, struct sg_table *sg);
 int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
 void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
 int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
 		struct timespec *timeout);
 int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index bf8fa85..4a29eea 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -201,7 +201,9 @@
 
 		obj = vram->object;
 
+		mutex_lock(&obj->lock);
 		pages = etnaviv_gem_get_pages(obj);
+		mutex_unlock(&obj->lock);
 		if (pages) {
 			int j;
 
@@ -213,8 +215,8 @@
 
 		iter.hdr->iova = cpu_to_le64(vram->iova);
 
-		vaddr = etnaviv_gem_vaddr(&obj->base);
-		if (vaddr && !IS_ERR(vaddr))
+		vaddr = etnaviv_gem_vmap(&obj->base);
+		if (vaddr)
 			memcpy(iter.data, vaddr, obj->base.size);
 
 		etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 9f77c3b..4b519e4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -353,25 +353,39 @@
 	drm_gem_object_unreference_unlocked(obj);
 }
 
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 
+	if (etnaviv_obj->vaddr)
+		return etnaviv_obj->vaddr;
+
 	mutex_lock(&etnaviv_obj->lock);
-	if (!etnaviv_obj->vaddr) {
-		struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
-		if (IS_ERR(pages))
-			return ERR_CAST(pages);
-
-		etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
-				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
-	}
+	/*
+	 * Need to check again, as we might have raced with another thread
+	 * while waiting for the mutex.
+	 */
+	if (!etnaviv_obj->vaddr)
+		etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
 	mutex_unlock(&etnaviv_obj->lock);
 
 	return etnaviv_obj->vaddr;
 }
 
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+	struct page **pages;
+
+	lockdep_assert_held(&obj->lock);
+
+	pages = etnaviv_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return NULL;
+
+	return vmap(pages, obj->base.size >> PAGE_SHIFT,
+			VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
 static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
 {
 	if (op & ETNA_PREP_READ)
@@ -522,6 +536,7 @@
 static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
 	.get_pages = etnaviv_gem_shmem_get_pages,
 	.release = etnaviv_gem_shmem_release,
+	.vmap = etnaviv_gem_vmap_impl,
 };
 
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -866,6 +881,7 @@
 static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
 	.get_pages = etnaviv_gem_userptr_get_pages,
 	.release = etnaviv_gem_userptr_release,
+	.vmap = etnaviv_gem_vmap_impl,
 };
 
 int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index a300b4b..ab5df81 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -78,6 +78,7 @@
 struct etnaviv_gem_ops {
 	int (*get_pages)(struct etnaviv_gem_object *);
 	void (*release)(struct etnaviv_gem_object *);
+	void *(*vmap)(struct etnaviv_gem_object *);
 };
 
 static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index e94db4f..4e67395 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -31,7 +31,7 @@
 
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
 {
-	return etnaviv_gem_vaddr(obj);
+	return etnaviv_gem_vmap(obj);
 }
 
 void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
@@ -77,9 +77,17 @@
 	drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
 }
 
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+	lockdep_assert_held(&etnaviv_obj->lock);
+
+	return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
 static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
 	/* .get_pages should never be called */
 	.release = etnaviv_gem_prime_release,
+	.vmap = etnaviv_gem_prime_vmap_impl,
 };
 
 struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 056a72e..a33162c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -72,6 +72,14 @@
 		*value = gpu->identity.minor_features3;
 		break;
 
+	case ETNAVIV_PARAM_GPU_FEATURES_5:
+		*value = gpu->identity.minor_features4;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_6:
+		*value = gpu->identity.minor_features5;
+		break;
+
 	case ETNAVIV_PARAM_GPU_STREAM_COUNT:
 		*value = gpu->identity.stream_count;
 		break;
@@ -112,6 +120,10 @@
 		*value = gpu->identity.num_constants;
 		break;
 
+	case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+		*value = gpu->identity.varyings_count;
+		break;
+
 	default:
 		DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
 		return -EINVAL;
@@ -120,46 +132,56 @@
 	return 0;
 }
 
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+	((gpu)->identity.model == chipModel_##mod && \
+	 (gpu)->identity.revision == rev)
+#define etnaviv_field(val, field) \
+	(((val) & field##__MASK) >> field##__SHIFT)
+
 static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
 {
 	if (gpu->identity.minor_features0 &
 	    chipMinorFeatures0_MORE_MINOR_FEATURES) {
-		u32 specs[2];
+		u32 specs[4];
+		unsigned int streams;
 
 		specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
 		specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
+		specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+		specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
 
-		gpu->identity.stream_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
-		gpu->identity.register_max =
-			(specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
-				>> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
-		gpu->identity.thread_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
-		gpu->identity.vertex_cache_size =
-			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
-		gpu->identity.shader_core_count =
-			(specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
-		gpu->identity.pixel_pipes =
-			(specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
-				>> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+		gpu->identity.stream_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+		gpu->identity.register_max = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+		gpu->identity.thread_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+		gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+		gpu->identity.shader_core_count = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+		gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+					VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
 		gpu->identity.vertex_output_buffer_size =
-			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
+			etnaviv_field(specs[0],
+				VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
 
-		gpu->identity.buffer_size =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
-		gpu->identity.instruction_count =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
-		gpu->identity.num_constants =
-			(specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
-				>> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+		gpu->identity.buffer_size = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+		gpu->identity.instruction_count = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+		gpu->identity.num_constants = etnaviv_field(specs[1],
+					VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+		gpu->identity.varyings_count = etnaviv_field(specs[2],
+					VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+		/* This overrides the value from older register if non-zero */
+		streams = etnaviv_field(specs[3],
+					VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+		if (streams)
+			gpu->identity.stream_count = streams;
 	}
 
 	/* Fill in the stream count if not specified */
@@ -173,7 +195,7 @@
 	/* Convert the register max value */
 	if (gpu->identity.register_max)
 		gpu->identity.register_max = 1 << gpu->identity.register_max;
-	else if (gpu->identity.model == 0x0400)
+	else if (gpu->identity.model == chipModel_GC400)
 		gpu->identity.register_max = 32;
 	else
 		gpu->identity.register_max = 64;
@@ -181,10 +203,10 @@
 	/* Convert thread count */
 	if (gpu->identity.thread_count)
 		gpu->identity.thread_count = 1 << gpu->identity.thread_count;
-	else if (gpu->identity.model == 0x0400)
+	else if (gpu->identity.model == chipModel_GC400)
 		gpu->identity.thread_count = 64;
-	else if (gpu->identity.model == 0x0500 ||
-		 gpu->identity.model == 0x0530)
+	else if (gpu->identity.model == chipModel_GC500 ||
+		 gpu->identity.model == chipModel_GC530)
 		gpu->identity.thread_count = 128;
 	else
 		gpu->identity.thread_count = 256;
@@ -206,7 +228,7 @@
 	if (gpu->identity.vertex_output_buffer_size) {
 		gpu->identity.vertex_output_buffer_size =
 			1 << gpu->identity.vertex_output_buffer_size;
-	} else if (gpu->identity.model == 0x0400) {
+	} else if (gpu->identity.model == chipModel_GC400) {
 		if (gpu->identity.revision < 0x4000)
 			gpu->identity.vertex_output_buffer_size = 512;
 		else if (gpu->identity.revision < 0x4200)
@@ -219,9 +241,8 @@
 
 	switch (gpu->identity.instruction_count) {
 	case 0:
-		if ((gpu->identity.model == 0x2000 &&
-		     gpu->identity.revision == 0x5108) ||
-		    gpu->identity.model == 0x880)
+		if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+		    gpu->identity.model == chipModel_GC880)
 			gpu->identity.instruction_count = 512;
 		else
 			gpu->identity.instruction_count = 256;
@@ -242,6 +263,30 @@
 
 	if (gpu->identity.num_constants == 0)
 		gpu->identity.num_constants = 168;
+
+	if (gpu->identity.varyings_count == 0) {
+		if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+			gpu->identity.varyings_count = 12;
+		else
+			gpu->identity.varyings_count = 8;
+	}
+
+	/*
+	 * For some cores, two varyings are consumed for position, so the
+	 * maximum varying count needs to be reduced by one.
+	 */
+	if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+	    etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+	    etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+	    etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+	    etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+	    etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+	    etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+	    etnaviv_is_model_rev(gpu, GC880, 0x5106))
+		gpu->identity.varyings_count -= 1;
 }
 
 static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -251,12 +296,10 @@
 	chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
 
 	/* Special case for older graphic cores. */
-	if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
-	     >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) ==  0x01) {
-		gpu->identity.model    = 0x500; /* gc500 */
-		gpu->identity.revision =
-			(chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
-			>> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+	if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
+		gpu->identity.model    = chipModel_GC500;
+		gpu->identity.revision = etnaviv_field(chipIdentity,
+					 VIVS_HI_CHIP_IDENTITY_REVISION);
 	} else {
 
 		gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
@@ -269,13 +312,12 @@
 		 * same.  Only for GC400 family.
 		 */
 		if ((gpu->identity.model & 0xff00) == 0x0400 &&
-		    gpu->identity.model != 0x0420) {
+		    gpu->identity.model != chipModel_GC420) {
 			gpu->identity.model = gpu->identity.model & 0x0400;
 		}
 
 		/* Another special case */
-		if (gpu->identity.model == 0x300 &&
-		    gpu->identity.revision == 0x2201) {
+		if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
 			u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
 			u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
 
@@ -295,11 +337,13 @@
 	gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
 
 	/* Disable fast clear on GC700. */
-	if (gpu->identity.model == 0x700)
+	if (gpu->identity.model == chipModel_GC700)
 		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 
-	if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
-	    (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+	if ((gpu->identity.model == chipModel_GC500 &&
+	     gpu->identity.revision < 2) ||
+	    (gpu->identity.model == chipModel_GC300 &&
+	     gpu->identity.revision < 0x2000)) {
 
 		/*
 		 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -309,6 +353,8 @@
 		gpu->identity.minor_features1 = 0;
 		gpu->identity.minor_features2 = 0;
 		gpu->identity.minor_features3 = 0;
+		gpu->identity.minor_features4 = 0;
+		gpu->identity.minor_features5 = 0;
 	} else
 		gpu->identity.minor_features0 =
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -321,6 +367,10 @@
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
 		gpu->identity.minor_features3 =
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+		gpu->identity.minor_features4 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+		gpu->identity.minor_features5 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
 	}
 
 	/* GC600 idle register reports zero bits where modules aren't present */
@@ -441,10 +491,9 @@
 {
 	u16 prefetch;
 
-	if (gpu->identity.model == chipModel_GC320 &&
-	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
-	    (gpu->identity.revision == 0x5007 ||
-	     gpu->identity.revision == 0x5220)) {
+	if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+	     etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
 		u32 mc_memory_debug;
 
 		mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -466,7 +515,7 @@
 		  VIVS_HI_AXI_CONFIG_ARCACHE(2));
 
 	/* GC2000 rev 5108 needs a special bus config */
-	if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+	if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
 		u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
 		bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
 				VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
@@ -511,8 +560,16 @@
 
 	if (gpu->identity.model == 0) {
 		dev_err(gpu->dev, "Unknown GPU model\n");
-		pm_runtime_put_autosuspend(gpu->dev);
-		return -ENXIO;
+		ret = -ENXIO;
+		goto fail;
+	}
+
+	/* Exclude VG cores with FE2.0 */
+	if (gpu->identity.features & chipFeatures_PIPE_VG &&
+	    gpu->identity.features & chipFeatures_FE20) {
+		dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+		ret = -ENXIO;
+		goto fail;
 	}
 
 	ret = etnaviv_hw_reset(gpu);
@@ -539,10 +596,9 @@
 		goto fail;
 	}
 
-	/* TODO: we will leak here memory - fix it! */
-
 	gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
 	if (!gpu->mmu) {
+		iommu_domain_free(iommu);
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -552,7 +608,7 @@
 	if (!gpu->buffer) {
 		ret = -ENOMEM;
 		dev_err(gpu->dev, "could not create command buffer\n");
-		goto fail;
+		goto destroy_iommu;
 	}
 	if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
 		ret = -EINVAL;
@@ -582,6 +638,9 @@
 free_buffer:
 	etnaviv_gpu_cmdbuf_free(gpu->buffer);
 	gpu->buffer = NULL;
+destroy_iommu:
+	etnaviv_iommu_destroy(gpu->mmu);
+	gpu->mmu = NULL;
 fail:
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);
@@ -642,6 +701,10 @@
 		   gpu->identity.minor_features2);
 	seq_printf(m, "\t minor_features3: 0x%08x\n",
 		   gpu->identity.minor_features3);
+	seq_printf(m, "\t minor_features4: 0x%08x\n",
+		   gpu->identity.minor_features4);
+	seq_printf(m, "\t minor_features5: 0x%08x\n",
+		   gpu->identity.minor_features5);
 
 	seq_puts(m, "\tspecs\n");
 	seq_printf(m, "\t stream_count:  %d\n",
@@ -664,6 +727,8 @@
 			gpu->identity.instruction_count);
 	seq_printf(m, "\t num_constants: %d\n",
 			gpu->identity.num_constants);
+	seq_printf(m, "\t varyings_count: %d\n",
+			gpu->identity.varyings_count);
 
 	seq_printf(m, "\taxi: 0x%08x\n", axi);
 	seq_printf(m, "\tidle: 0x%08x\n", idle);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index c75d503..f233ac4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -46,6 +46,12 @@
 	/* Supported minor feature 3 fields. */
 	u32 minor_features3;
 
+	/* Supported minor feature 4 fields. */
+	u32 minor_features4;
+
+	/* Supported minor feature 5 fields. */
+	u32 minor_features5;
+
 	/* Number of streams supported. */
 	u32 stream_count;
 
@@ -75,6 +81,9 @@
 
 	/* Buffer size */
 	u32 buffer_size;
+
+	/* Number of varyings */
+	u8 varyings_count;
 };
 
 struct etnaviv_event {
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
index 0064f26..6a7de5f 100644
--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -8,8 +8,8 @@
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
-- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  24309 bytes, from 2015-12-12 09:02:53)
+- common.xml   (  18437 bytes, from 2015-12-12 09:02:53)
 
 Copyright (C) 2015
 */
@@ -182,8 +182,25 @@
 
 #define VIVS_HI_CHIP_MINOR_FEATURE_3				0x00000088
 
+#define VIVS_HI_CHIP_SPECS_3					0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK		0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT		4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK		0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT		0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
 #define VIVS_HI_CHIP_MINOR_FEATURE_4				0x00000094
 
+#define VIVS_HI_CHIP_SPECS_4					0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK			0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT		12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5				0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID					0x000000a8
+
 #define VIVS_PM							0x00000000
 
 #define VIVS_PM_POWER_CONTROLS					0x00000100
@@ -206,6 +223,11 @@
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE		0x00000001
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE		0x00000002
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE		0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH		0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA		0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE		0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA		0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX		0x00000080
 
 #define VIVS_PM_PULSE_EATER					0x0000010c
 
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 6bfc463..367a916 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -304,18 +304,10 @@
 		unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
 			DENTIST_DPREFCLK_WDIVIDER_MASK) >>
 			DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
-		if (div < 128 && div >= 96)
-			div -= 64;
-		else if (div >= 64)
-			div = div / 2 - 16;
-		else if (div >= 8)
-			div /= 4;
-		else
-			div = 0;
+		div = radeon_audio_decode_dfs_div(div);
 
 		if (div)
-			clock = rdev->clock.gpupll_outputfreq * 10 / div;
+			clock = clock * 100 / div;
 
 		WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
 		WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 9953356..3cf04a2 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -289,6 +289,16 @@
 	 * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
 	 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
 	 */
+	if (ASIC_IS_DCE41(rdev)) {
+		unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+			DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+			DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+		div = radeon_audio_decode_dfs_div(div);
+
+		if (div)
+			clock = 100 * clock / div;
+	}
+
 	WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
 	WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
 }
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 4aa5f75..13b6029 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -511,6 +511,11 @@
 #define DCCG_AUDIO_DTO1_CNTL              0x05cc
 #       define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
 
+#define DCE41_DENTIST_DISPCLK_CNTL			0x049c
+#       define DENTIST_DPREFCLK_WDIVIDER(x)		(((x) & 0x7f) << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_MASK		(0x7f << 24)
+#       define DENTIST_DPREFCLK_WDIVIDER_SHIFT		24
+
 /* DCE 4.0 AFMT */
 #define HDMI_CONTROL                         0x7030
 #       define HDMI_KEEPOUT_MODE             (1 << 0)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5ae6db9..78a51b3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -268,7 +268,7 @@
 	uint32_t current_dispclk;
 	uint32_t dp_extclk;
 	uint32_t max_pixel_clock;
-	uint32_t gpupll_outputfreq;
+	uint32_t vco_freq;
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 08fc1b5..de9a2ff 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1106,6 +1106,31 @@
 	ATOM_FIRMWARE_INFO_V2_2 info_22;
 };
 
+union igp_info {
+	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	union igp_info *igp_info;
+	u8 frev, crev;
+	u16 data_offset;
+
+	if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+			&frev, &crev, &data_offset)) {
+		igp_info = (union igp_info *)(mode_info->atom_context->bios +
+			data_offset);
+		rdev->clock.vco_freq =
+			le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+	}
+}
+
 bool radeon_atom_get_clock_info(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -1257,12 +1282,18 @@
 		rdev->mode_info.firmware_flags =
 			le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
 
-		if (ASIC_IS_DCE8(rdev)) {
-			rdev->clock.gpupll_outputfreq =
+		if (ASIC_IS_DCE8(rdev))
+			rdev->clock.vco_freq =
 				le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
-			if (rdev->clock.gpupll_outputfreq == 0)
-				rdev->clock.gpupll_outputfreq = 360000;	/* 3.6 GHz */
-		}
+		else if (ASIC_IS_DCE5(rdev))
+			rdev->clock.vco_freq = rdev->clock.current_dispclk;
+		else if (ASIC_IS_DCE41(rdev))
+			radeon_atombios_get_dentist_vco_freq(rdev);
+		else
+			rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+		if (rdev->clock.vco_freq == 0)
+			rdev->clock.vco_freq = 360000;	/* 3.6 GHz */
 
 		return true;
 	}
@@ -1270,14 +1301,6 @@
 	return false;
 }
 
-union igp_info {
-	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
 bool radeon_atombios_sideport_present(struct radeon_device *rdev)
 {
 	struct radeon_mode_info *mode_info = &rdev->mode_info;
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 2c02e99..b214663 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -739,9 +739,6 @@
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-	struct radeon_connector_atom_dig *dig_connector =
-		radeon_connector->con_priv;
 
 	if (!dig || !dig->afmt)
 		return;
@@ -753,10 +750,7 @@
 		radeon_audio_write_speaker_allocation(encoder);
 		radeon_audio_write_sad_regs(encoder);
 		radeon_audio_write_latency_fields(encoder, mode);
-		if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
-			radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
-		else
-			radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+		radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
 		radeon_audio_set_audio_packet(encoder);
 		radeon_audio_select_pin(encoder);
 
@@ -781,3 +775,15 @@
 	if (radeon_encoder->audio && radeon_encoder->audio->dpms)
 		radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
 }
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+	if (div >= 8 && div < 64)
+		return (div - 8) * 25 + 200;
+	else if (div >= 64 && div < 96)
+		return (div - 64) * 50 + 1600;
+	else if (div >= 96 && div < 128)
+		return (div - 96) * 100 + 3200;
+	else
+		return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index 059cc30..5c70cce 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -79,5 +79,6 @@
 void radeon_audio_mode_set(struct drm_encoder *encoder,
 	struct drm_display_mode *mode);
 void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index b3bb923..298ea1c 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1670,8 +1670,10 @@
 	/* setup afmt */
 	radeon_afmt_init(rdev);
 
-	radeon_fbdev_init(rdev);
-	drm_kms_helper_poll_init(rdev->ddev);
+	if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+		radeon_fbdev_init(rdev);
+		drm_kms_helper_poll_init(rdev->ddev);
+	}
 
 	/* do pm late init */
 	ret = radeon_pm_late_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 3dcc573..e26c963 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -663,6 +663,7 @@
 	bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
 	if (!bo_va) {
 		args->operation = RADEON_VA_RESULT_ERROR;
+		radeon_bo_unreserve(rbo);
 		drm_gem_object_unreference_unlocked(gobj);
 		return -ENOENT;
 	}
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index 07a0d37..a01efe3 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -178,12 +178,12 @@
 		return -EINVAL;
 	}
 
-	for (i = 0; i < sign->num; ++i) {
-		if (sign->val[i].chip_id == chip_id)
+	for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+		if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
 			break;
 	}
 
-	if (i == sign->num)
+	if (i == le32_to_cpu(sign->num))
 		return -EINVAL;
 
 	data += (256 - 64) / 4;
@@ -191,18 +191,18 @@
 	data[1] = sign->val[i].nonce[1];
 	data[2] = sign->val[i].nonce[2];
 	data[3] = sign->val[i].nonce[3];
-	data[4] = sign->len + 64;
+	data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
 
 	memset(&data[5], 0, 44);
 	memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
 
-	data += data[4] / 4;
+	data += le32_to_cpu(data[4]) / 4;
 	data[0] = sign->val[i].sigval[0];
 	data[1] = sign->val[i].sigval[1];
 	data[2] = sign->val[i].sigval[2];
 	data[3] = sign->val[i].sigval[3];
 
-	rdev->vce.keyselect = sign->val[i].keyselect;
+	rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index d1dc0f7..f6a809a 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -2,11 +2,11 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
-		rockchip_drm_gem.o
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
+		rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
 
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
-				rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
index 7bfe243..f8f8f29 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
@@ -461,10 +461,11 @@
 
 static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
 {
-	unsigned int bpp, i, pre;
+	unsigned int i, pre;
 	unsigned long mpclk, pllref, tmp;
 	unsigned int m = 1, n = 1, target_mbps = 1000;
 	unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+	int bpp;
 
 	bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
 	if (bpp < 0) {
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 8397d1b..a0d51cc 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -55,14 +55,12 @@
 
 	return arm_iommu_attach_device(dev, mapping);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
 
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
 				    struct device *dev)
 {
 	arm_iommu_detach_device(dev);
 }
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
 
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 				 const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
 
 void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 {
@@ -89,7 +86,6 @@
 
 	priv->crtc_funcs[pipe] = NULL;
 }
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
 
 static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
 						int pipe)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index f784488..3b8f652 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -39,7 +39,6 @@
 
 	return rk_fb->obj[plane];
 }
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
 
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {
@@ -177,8 +176,23 @@
 		crtc_funcs->wait_for_update(crtc);
 }
 
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ *				| <-- HW vsync irq and reg take effect
+ *	       plane_commit --> |
+ *	get_vblank and wait --> |
+ *				| <-- handle_vblank, vblank->count + 1
+ *		 cleanup_fb --> |
+ *		iommu crash --> |
+ *				| <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
 static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
 {
 	struct drm_crtc_state *old_crtc_state;
 	struct drm_crtc *crtc;
@@ -194,6 +208,10 @@
 		if (!crtc->state->active)
 			continue;
 
+		if (!drm_atomic_helper_framebuffer_changed(dev,
+				old_state, crtc))
+			continue;
+
 		ret = drm_crtc_vblank_get(crtc);
 		if (ret != 0)
 			continue;
@@ -241,7 +259,7 @@
 
 	drm_atomic_helper_commit_planes(dev, state, true);
 
-	rockchip_atomic_wait_for_complete(state);
+	rockchip_atomic_wait_for_complete(dev, state);
 
 	drm_atomic_helper_cleanup_planes(dev, state);
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
index 50432e9..73718c5 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
@@ -15,7 +15,18 @@
 #ifndef _ROCKCHIP_DRM_FBDEV_H
 #define _ROCKCHIP_DRM_FBDEV_H
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 int rockchip_drm_fbdev_init(struct drm_device *dev);
 void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+	return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
 
 #endif /* _ROCKCHIP_DRM_FBDEV_H */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index d908321..18e0733 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -234,13 +234,8 @@
 	/*
 	 * align to 64 bytes since Mali requires it.
 	 */
-	min_pitch = ALIGN(min_pitch, 64);
-
-	if (args->pitch < min_pitch)
-		args->pitch = min_pitch;
-
-	if (args->size < args->pitch * args->height)
-		args->size = args->pitch * args->height;
+	args->pitch = ALIGN(min_pitch, 64);
+	args->size = args->pitch * args->height;
 
 	rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
 						 &args->handle);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 46c2a8d..fd37054 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -43,8 +43,8 @@
 
 #define REG_SET(x, base, reg, v, mode) \
 		__REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
-		__REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+		__REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
 
 #define VOP_WIN_SET(x, win, name, v) \
 		REG_SET(x, win->base, win->phy->name, v, RELAXED)
@@ -58,16 +58,18 @@
 #define VOP_INTR_GET(vop, name) \
 		vop_read_reg(vop, 0, &vop->data->ctrl->name)
 
-#define VOP_INTR_SET(vop, name, v) \
-		REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+		REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
 #define VOP_INTR_SET_TYPE(vop, name, type, v) \
 	do { \
-		int i, reg = 0; \
+		int i, reg = 0, mask = 0; \
 		for (i = 0; i < vop->data->intr->nintrs; i++) { \
-			if (vop->data->intr->intrs[i] & type) \
+			if (vop->data->intr->intrs[i] & type) { \
 				reg |= (v) << i; \
+				mask |= 1 << i; \
+			} \
 		} \
-		VOP_INTR_SET(vop, name, reg); \
+		VOP_INTR_SET(vop, name, mask, reg); \
 	} while (0)
 #define VOP_INTR_GET_TYPE(vop, name, type) \
 		vop_get_intr_type(vop, &vop->data->intr->name, type)
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 424d515..314ff71 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -144,19 +144,16 @@
 }
 #endif /* CONFIG_DEBUG_FS */
 
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
 int
 vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
 {
-	if (on)
-		return pm_generic_poweroff(&vc4->v3d->pdev->dev);
-	else
-		return pm_generic_resume(&vc4->v3d->pdev->dev);
+	/* XXX: This interface is needed for GPU reset, and the way to
+	 * do it is to turn our power domain off and back on.  We
+	 * can't just reset from within the driver, because the reset
+	 * bits are in the power domain's register area, and get set
+	 * during the poweron process.
+	 */
+	return 0;
 }
 
 static void vc4_v3d_init_hw(struct drm_device *dev)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c49812b..24fb348 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -25,6 +25,7 @@
  *
  **************************************************************************/
 #include <linux/module.h>
+#include <linux/console.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@
 static int __init vmwgfx_init(void)
 {
 	int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+	if (vgacon_text_force())
+		return -EINVAL;
+#endif
+
 	ret = drm_pci_init(&driver, &vmw_pci_driver);
 	if (ret)
 		DRM_ERROR("Failed initializing DRM.\n");
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c848789..c43318d 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -932,6 +932,17 @@
 static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
 	{
 		/*
+		 * CPU fan speed going up and down on Dell Studio XPS 8000
+		 * for unknown reasons.
+		 */
+		.ident = "Dell Studio XPS 8000",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"),
+		},
+	},
+	{
+		/*
 		 * CPU fan speed going up and down on Dell Studio XPS 8100
 		 * for unknown reasons.
 		 */
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index f77eb97..4f695d8 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -90,7 +90,15 @@
 	pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5),
 				  REG_TDP_LIMIT3, &val);
 
-	tdp_limit = val >> 16;
+	/*
+	 * On Carrizo and later platforms, ApmTdpLimit bit field
+	 * is extended to 16:31 from 16:28.
+	 */
+	if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+		tdp_limit = val >> 16;
+	else
+		tdp_limit = (val >> 16) & 0x1fff;
+
 	curr_pwr_watts = ((u64)(tdp_limit +
 				data->base_tdp)) << running_avg_range;
 	curr_pwr_watts -= running_avg_capture;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 539b0de..e5e2239 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2049,7 +2049,7 @@
 	/* Update device table */
 	set_dte_entry(dev_data->devid, domain, ats);
 	if (alias != dev_data->devid)
-		set_dte_entry(dev_data->devid, domain, ats);
+		set_dte_entry(alias, domain, ats);
 
 	device_flush_dte(dev_data);
 }
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ac73876..986a53e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1489,7 +1489,7 @@
 {
 	struct pci_dev *pdev;
 
-	if (dev_is_pci(info->dev))
+	if (!dev_is_pci(info->dev))
 		return;
 
 	pdev = to_pci_dev(info->dev);
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 8bbcbfe..381ca5a 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -25,6 +25,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/barrier.h>
 
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 5f2fda1..fa49f91 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -953,8 +953,10 @@
 {
 	pci_lock_rescan_remove();
 
-	if (slot->flags & SLOT_IS_GOING_AWAY)
+	if (slot->flags & SLOT_IS_GOING_AWAY) {
+		pci_unlock_rescan_remove();
 		return -ENODEV;
+	}
 
 	/* configure all functions */
 	if (!(slot->flags & SLOT_ENABLED))
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index c692dfe..50597f9 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -139,11 +139,11 @@
 
 	device = container_of(kobj, struct device, kobj);
 	chp = to_channelpath(device);
-	if (!chp->cmg_chars)
+	if (chp->cmg == -1)
 		return 0;
 
-	return memory_read_from_buffer(buf, count, &off,
-				chp->cmg_chars, sizeof(struct cmg_chars));
+	return memory_read_from_buffer(buf, count, &off, &chp->cmg_chars,
+				       sizeof(chp->cmg_chars));
 }
 
 static struct bin_attribute chp_measurement_chars_attr = {
@@ -416,7 +416,8 @@
  * chp_update_desc - update channel-path description
  * @chp - channel-path
  *
- * Update the channel-path description of the specified channel-path.
+ * Update the channel-path description of the specified channel-path
+ * including channel measurement related information.
  * Return zero on success, non-zero otherwise.
  */
 int chp_update_desc(struct channel_path *chp)
@@ -428,8 +429,10 @@
 		return rc;
 
 	rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+	if (rc)
+		return rc;
 
-	return rc;
+	return chsc_get_channel_measurement_chars(chp);
 }
 
 /**
@@ -466,14 +469,6 @@
 		ret = -ENODEV;
 		goto out_free;
 	}
-	/* Get channel-measurement characteristics. */
-	if (css_chsc_characteristics.scmc && css_chsc_characteristics.secm) {
-		ret = chsc_get_channel_measurement_chars(chp);
-		if (ret)
-			goto out_free;
-	} else {
-		chp->cmg = -1;
-	}
 	dev_set_name(&chp->dev, "chp%x.%02x", chpid.cssid, chpid.id);
 
 	/* make it known to the system */
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index 4efd5b8..af02322 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -48,7 +48,7 @@
 	/* Channel-measurement related stuff: */
 	int cmg;
 	int shared;
-	void *cmg_chars;
+	struct cmg_chars cmg_chars;
 };
 
 /* Return channel_path struct for given chpid. */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index a831d18..c424c0c 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 
 #include <asm/cio.h>
@@ -224,8 +225,9 @@
 
 void chsc_chp_offline(struct chp_id chpid)
 {
-	char dbf_txt[15];
+	struct channel_path *chp = chpid_to_chp(chpid);
 	struct chp_link link;
+	char dbf_txt[15];
 
 	sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
@@ -236,6 +238,11 @@
 	link.chpid = chpid;
 	/* Wait until previous actions have settled. */
 	css_wait_for_slow_path();
+
+	mutex_lock(&chp->lock);
+	chp_update_desc(chp);
+	mutex_unlock(&chp->lock);
+
 	for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &link);
 }
 
@@ -690,8 +697,9 @@
 
 void chsc_chp_online(struct chp_id chpid)
 {
-	char dbf_txt[15];
+	struct channel_path *chp = chpid_to_chp(chpid);
 	struct chp_link link;
+	char dbf_txt[15];
 
 	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
@@ -701,6 +709,11 @@
 		link.chpid = chpid;
 		/* Wait until previous actions have settled. */
 		css_wait_for_slow_path();
+
+		mutex_lock(&chp->lock);
+		chp_update_desc(chp);
+		mutex_unlock(&chp->lock);
+
 		for_each_subchannel_staged(__s390_process_res_acc, NULL,
 					   &link);
 		css_schedule_reprobe();
@@ -967,22 +980,19 @@
 chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
 			  struct cmg_chars *chars)
 {
-	struct cmg_chars *cmg_chars;
 	int i, mask;
 
-	cmg_chars = chp->cmg_chars;
 	for (i = 0; i < NR_MEASUREMENT_CHARS; i++) {
 		mask = 0x80 >> (i + 3);
 		if (cmcv & mask)
-			cmg_chars->values[i] = chars->values[i];
+			chp->cmg_chars.values[i] = chars->values[i];
 		else
-			cmg_chars->values[i] = 0;
+			chp->cmg_chars.values[i] = 0;
 	}
 }
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
-	struct cmg_chars *cmg_chars;
 	int ccode, ret;
 
 	struct {
@@ -1006,10 +1016,11 @@
 		u32 data[NR_MEASUREMENT_CHARS];
 	} __attribute__ ((packed)) *scmc_area;
 
-	chp->cmg_chars = NULL;
-	cmg_chars = kmalloc(sizeof(*cmg_chars), GFP_KERNEL);
-	if (!cmg_chars)
-		return -ENOMEM;
+	chp->shared = -1;
+	chp->cmg = -1;
+
+	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
+		return 0;
 
 	spin_lock_irq(&chsc_page_lock);
 	memset(chsc_page, 0, PAGE_SIZE);
@@ -1031,25 +1042,19 @@
 			      scmc_area->response.code);
 		goto out;
 	}
-	if (scmc_area->not_valid) {
-		chp->cmg = -1;
-		chp->shared = -1;
+	if (scmc_area->not_valid)
 		goto out;
-	}
+
 	chp->cmg = scmc_area->cmg;
 	chp->shared = scmc_area->shared;
 	if (chp->cmg != 2 && chp->cmg != 3) {
 		/* No cmg-dependent data. */
 		goto out;
 	}
-	chp->cmg_chars = cmg_chars;
 	chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
 				  (struct cmg_chars *) &scmc_area->data);
 out:
 	spin_unlock_irq(&chsc_page_lock);
-	if (!chp->cmg_chars)
-		kfree(cmg_chars);
-
 	return ret;
 }
 
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index 7b23f43..de1b6c1 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -112,9 +112,10 @@
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;
 	case REP82_ERROR_TRANSPORT_FAIL:
 	case REP82_ERROR_MACHINE_FAILURE:
@@ -123,16 +124,18 @@
 		atomic_set(&zcrypt_rescan_req, 1);
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;
 	default:
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			zdev->ap_dev->qid, zdev->online, ehdr->reply_code);
+			AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
+			ehdr->reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 74edf29..eedfaa2 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -336,9 +336,10 @@
 		/* The result is too short, the CEX2A card may not do that.. */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid, zdev->online, t80h->code);
+			       AP_QID_DEVICE(zdev->ap_dev->qid),
+			       zdev->online, t80h->code);
 
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
@@ -368,9 +369,9 @@
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 9a2dd47..2195971 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -572,9 +572,9 @@
 			return -EINVAL;
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid, zdev->online,
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online,
 			       msg->hdr.reply_code);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
@@ -715,9 +715,9 @@
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -747,9 +747,9 @@
 		xcRB->status = 0x0008044DL; /* HDD_InvalidParm */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
@@ -773,9 +773,9 @@
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN; /* repeat the request on a different device. */
 	}
 }
@@ -800,9 +800,9 @@
 	default: /* Unknown response type, this should NEVER EVER happen */
 		zdev->online = 0;
 		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
+		       AP_QID_DEVICE(zdev->ap_dev->qid));
 		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
+			       AP_QID_DEVICE(zdev->ap_dev->qid), zdev->online);
 		return -EAGAIN;	/* repeat the request on a different device. */
 	}
 }
diff --git a/drivers/scsi/hisi_sas/Kconfig b/drivers/scsi/hisi_sas/Kconfig
index 37a0c71..b676618 100644
--- a/drivers/scsi/hisi_sas/Kconfig
+++ b/drivers/scsi/hisi_sas/Kconfig
@@ -1,5 +1,7 @@
 config SCSI_HISI_SAS
 	tristate "HiSilicon SAS"
+	depends on HAS_DMA
+	depends on ARM64 || COMPILE_TEST
 	select SCSI_SAS_LIBSAS
 	select BLK_DEV_INTEGRITY
 	help
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 4e08d1cd..bb669d3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2893,7 +2893,7 @@
 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
 	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
 		rw_max = q->limits.io_opt =
-			logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+			sdkp->opt_xfer_blocks * sdp->sector_size;
 	else
 		rw_max = BLK_DEF_MAX_SECTORS;
 
@@ -3268,8 +3268,8 @@
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 	int ret = 0;
 
-	if (!sdkp)
-		return 0;	/* this can happen */
+	if (!sdkp)	/* E.g.: runtime suspend following sd_remove() */
+		return 0;
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3308,6 +3308,9 @@
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
+	if (!sdkp)	/* E.g.: runtime resume at the start of sd_probe() */
+		return 0;
+
 	if (!sdkp->device->manage_start_stop)
 		return 0;
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 8bd54a6..64c8674 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -144,6 +144,9 @@
 {
 	struct scsi_cd *cd = dev_get_drvdata(dev);
 
+	if (!cd)	/* E.g.: runtime suspend following sr_remove() */
+		return 0;
+
 	if (cd->media_present)
 		return -EBUSY;
 	else
@@ -985,6 +988,7 @@
 	scsi_autopm_get_device(cd->device);
 
 	del_gendisk(cd->disk);
+	dev_set_drvdata(dev, NULL);
 
 	mutex_lock(&sr_ref_mutex);
 	kref_put(&cd->kref, sr_kref_release);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 36205c2..f6bed86 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -545,6 +545,7 @@
 static void virtio_pci_remove(struct pci_dev *pci_dev)
 {
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct device *dev = get_device(&vp_dev->vdev.dev);
 
 	unregister_virtio_device(&vp_dev->vdev);
 
@@ -554,6 +555,7 @@
 		virtio_pci_modern_remove(vp_dev);
 
 	pci_disable_device(pci_dev);
+	put_device(dev);
 }
 
 static struct pci_driver virtio_pci_driver = {
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 945fc43..4ac2ca8 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -242,7 +242,7 @@
 	return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
-static struct cleancache_ops tmem_cleancache_ops = {
+static const struct cleancache_ops tmem_cleancache_ops = {
 	.put_page = tmem_cleancache_put_page,
 	.get_page = tmem_cleancache_get_page,
 	.invalidate_page = tmem_cleancache_flush_page,
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 88d9af3..5fb60ea 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -328,8 +328,8 @@
 		list_add_tail(&work->ordered_list, &wq->ordered_list);
 		spin_unlock_irqrestore(&wq->list_lock, flags);
 	}
-	queue_work(wq->normal_wq, &work->normal_work);
 	trace_btrfs_work_queued(work);
+	queue_work(wq->normal_wq, &work->normal_work);
 }
 
 void btrfs_queue_work(struct btrfs_workqueue *wq,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dd08e29..4545e2e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -182,6 +182,7 @@
 	{ .id = BTRFS_TREE_RELOC_OBJECTID,	.name_stem = "treloc"	},
 	{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID,	.name_stem = "dreloc"	},
 	{ .id = BTRFS_UUID_TREE_OBJECTID,	.name_stem = "uuid"	},
+	{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID,	.name_stem = "free-space" },
 	{ .id = 0,				.name_stem = "tree"	},
 };
 
@@ -1787,7 +1788,6 @@
 	int again;
 	struct btrfs_trans_handle *trans;
 
-	set_freezable();
 	do {
 		again = 0;
 
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 393e36b..53dbeaf 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -153,6 +153,20 @@
 
 static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
+	void *mem;
+
+	/*
+	 * The allocation size varies, observed numbers were < 4K up to 16K.
+	 * Using vmalloc unconditionally would be too heavy, we'll try
+	 * contiguous allocations first.
+	 */
+	if  (bitmap_size <= PAGE_SIZE)
+		return kzalloc(bitmap_size, GFP_NOFS);
+
+	mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+	if (mem)
+		return mem;
+
 	return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
 			 PAGE_KERNEL);
 }
@@ -289,7 +303,7 @@
 
 	ret = 0;
 out:
-	vfree(bitmap);
+	kvfree(bitmap);
 	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
 	return ret;
@@ -438,7 +452,7 @@
 
 	ret = 0;
 out:
-	vfree(bitmap);
+	kvfree(bitmap);
 	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
 	return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e28f3d4..5f06eb1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7116,21 +7116,41 @@
 	if (ret)
 		return ERR_PTR(ret);
 
-	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
-			      ins.offset, ins.offset, ins.offset, 0);
-	if (IS_ERR(em)) {
-		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-		return em;
-	}
-
+	/*
+	 * Create the ordered extent before the extent map. This is to avoid
+	 * races with the fast fsync path that would lead to it logging file
+	 * extent items that point to disk extents that were not yet written to.
+	 * The fast fsync path collects ordered extents into a local list and
+	 * then collects all the new extent maps, so we must create the ordered
+	 * extent first and make sure the fast fsync path collects any new
+	 * ordered extents after collecting new extent maps as well.
+	 * The fsync path simply can not rely on inode_dio_wait() because it
+	 * causes deadlock with AIO.
+	 */
 	ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
 					   ins.offset, ins.offset, 0);
 	if (ret) {
 		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
-		free_extent_map(em);
 		return ERR_PTR(ret);
 	}
 
+	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+			      ins.offset, ins.offset, ins.offset, 0);
+	if (IS_ERR(em)) {
+		struct btrfs_ordered_extent *oe;
+
+		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+		oe = btrfs_lookup_ordered_extent(inode, start);
+		ASSERT(oe);
+		if (WARN_ON(!oe))
+			return em;
+		set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+		set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+		btrfs_remove_ordered_extent(inode, oe);
+		/* Once for our lookup and once for the ordered extents tree. */
+		btrfs_put_ordered_extent(oe);
+		btrfs_put_ordered_extent(oe);
+	}
 	return em;
 }
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index fd1c4d9..2bd0011 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -575,7 +575,8 @@
 	    root_objectid == BTRFS_TREE_LOG_OBJECTID ||
 	    root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
 	    root_objectid == BTRFS_UUID_TREE_OBJECTID ||
-	    root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+	    root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+	    root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
 		return 1;
 	return 0;
 }
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e0ac859..539e7b5 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
 BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 
 static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@
 	BTRFS_FEAT_ATTR_PTR(raid56),
 	BTRFS_FEAT_ATTR_PTR(skinny_metadata),
 	BTRFS_FEAT_ATTR_PTR(no_holes),
+	BTRFS_FEAT_ATTR_PTR(free_space_tree),
 	NULL
 };
 
@@ -780,6 +782,39 @@
 	return error;
 }
 
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+		u64 bit, enum btrfs_feature_set set)
+{
+	struct btrfs_fs_devices *fs_devs;
+	struct kobject *fsid_kobj;
+	u64 features;
+	int ret;
+
+	if (!fs_info)
+		return;
+
+	features = get_features(fs_info, set);
+	ASSERT(bit & supported_feature_masks[set]);
+
+	fs_devs = fs_info->fs_devices;
+	fsid_kobj = &fs_devs->fsid_kobj;
+
+	if (!fsid_kobj->state_initialized)
+		return;
+
+	/*
+	 * FIXME: this is too heavy to update just one value, ideally we'd like
+	 * to use sysfs_update_group but some refactoring is needed first.
+	 */
+	sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+	ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
 static int btrfs_init_debugfs(void)
 {
 #ifdef CONFIG_DEBUG_FS
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9c09522..d7da1a4 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@
 #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
 	BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
 #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
-	BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+	BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
 #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
 	BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 
@@ -90,4 +90,7 @@
 				struct kobject *parent);
 int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+		u64 bit, enum btrfs_feature_set set);
+
 #endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index b1d920b..0e1e61a 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -82,18 +82,18 @@
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
 {
 	struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
-						GFP_NOFS);
+						GFP_KERNEL);
 
 	if (!fs_info)
 		return fs_info;
 	fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
-				      GFP_NOFS);
+				      GFP_KERNEL);
 	if (!fs_info->fs_devices) {
 		kfree(fs_info);
 		return NULL;
 	}
 	fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
-				      GFP_NOFS);
+				      GFP_KERNEL);
 	if (!fs_info->super_copy) {
 		kfree(fs_info->fs_devices);
 		kfree(fs_info);
@@ -180,11 +180,11 @@
 {
 	struct btrfs_block_group_cache *cache;
 
-	cache = kzalloc(sizeof(*cache), GFP_NOFS);
+	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
 	if (!cache)
 		return NULL;
 	cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-					GFP_NOFS);
+					GFP_KERNEL);
 	if (!cache->free_space_ctl) {
 		kfree(cache);
 		return NULL;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index e29fa29..669b582 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -94,7 +94,7 @@
 	 * test.
 	 */
 	for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
-		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
 		if (!page) {
 			test_msg("Failed to allocate test page\n");
 			ret = -ENOMEM;
@@ -113,7 +113,7 @@
 	 * |--- delalloc ---|
 	 * |---  search  ---|
 	 */
-	set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
 	start = 0;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@
 		test_msg("Couldn't find the locked page\n");
 		goto out_bits;
 	}
-	set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
 	start = test_start;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@
 	 *
 	 * We are re-using our test_start from above since it works out well.
 	 */
-	set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+	set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
 	start = test_start;
 	end = 0;
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@
 	}
 	ret = 0;
 out_bits:
-	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+	clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
 out:
 	if (locked_page)
 		page_cache_release(locked_page);
@@ -360,7 +360,7 @@
 
 	test_msg("Running extent buffer bitmap tests\n");
 
-	bitmap = kmalloc(len, GFP_NOFS);
+	bitmap = kmalloc(len, GFP_KERNEL);
 	if (!bitmap) {
 		test_msg("Couldn't allocate test bitmap\n");
 		return -ENOMEM;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 5de55fd..e2d3da0 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -974,7 +974,7 @@
 			       (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
 			       EXTENT_DELALLOC | EXTENT_DIRTY |
 			       EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1045,7 +1045,7 @@
 			       BTRFS_MAX_EXTENT_SIZE+8191,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1079,7 +1079,7 @@
 	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-			       NULL, GFP_NOFS);
+			       NULL, GFP_KERNEL);
 	if (ret) {
 		test_msg("clear_extent_bit returned %d\n", ret);
 		goto out;
@@ -1096,7 +1096,7 @@
 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
 				 EXTENT_DIRTY | EXTENT_DELALLOC |
 				 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
-				 NULL, GFP_NOFS);
+				 NULL, GFP_KERNEL);
 	iput(inode);
 	btrfs_free_dummy_root(root);
 	return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12c..978c3a8 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@
 				     struct inode *inode,
 				     struct btrfs_path *path,
 				     struct list_head *logged_list,
-				     struct btrfs_log_ctx *ctx)
+				     struct btrfs_log_ctx *ctx,
+				     const u64 start,
+				     const u64 end)
 {
 	struct extent_map *em, *n;
 	struct list_head extents;
@@ -4166,7 +4168,13 @@
 	}
 
 	list_sort(NULL, &extents, extent_cmp);
-
+	/*
+	 * Collect any new ordered extents within the range. This is to
+	 * prevent logging file extent items without waiting for the disk
+	 * location they point to being written. We do this only to deal
+	 * with races against concurrent lockless direct IO writes.
+	 */
+	btrfs_get_logged_extents(inode, logged_list, start, end);
 process:
 	while (!list_empty(&extents)) {
 		em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@
 			goto out_unlock;
 		}
 		ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-						&logged_list, ctx);
+						&logged_list, ctx, start, end);
 		if (ret) {
 			err = ret;
 			goto out_unlock;
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 89d008d..fe5efad 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -42,6 +42,10 @@
 			     struct drm_atomic_state *state,
 			     bool async);
 
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+					   struct drm_atomic_state *old_state,
+					   struct drm_crtc *crtc);
+
 void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
 					struct drm_atomic_state *old_state);
 
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index bda5ec0b4..fccf7f4 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -37,7 +37,7 @@
 	void (*invalidate_fs)(int);
 };
 
-extern int cleancache_register_ops(struct cleancache_ops *ops);
+extern int cleancache_register_ops(const struct cleancache_ops *ops);
 extern void __cleancache_init_fs(struct super_block *);
 extern void __cleancache_init_shared_fs(struct super_block *);
 extern int  __cleancache_get_page(struct page *);
@@ -48,14 +48,14 @@
 
 #ifdef CONFIG_CLEANCACHE
 #define cleancache_enabled (1)
-static inline bool cleancache_fs_enabled(struct page *page)
-{
-	return page->mapping->host->i_sb->cleancache_poolid >= 0;
-}
 static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping)
 {
 	return mapping->host->i_sb->cleancache_poolid >= 0;
 }
+static inline bool cleancache_fs_enabled(struct page *page)
+{
+	return cleancache_fs_enabled_mapping(page->mapping);
+}
 #else
 #define cleancache_enabled (0)
 #define cleancache_fs_enabled(_page) (0)
@@ -89,11 +89,9 @@
 
 static inline int cleancache_get_page(struct page *page)
 {
-	int ret = -1;
-
 	if (cleancache_enabled && cleancache_fs_enabled(page))
-		ret = __cleancache_get_page(page);
-	return ret;
+		return __cleancache_get_page(page);
+	return -1;
 }
 
 static inline void cleancache_put_page(struct page *page)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0639dcc..81de712 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -165,7 +165,6 @@
 	ftrace_func_t			saved_func;
 	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
-	int				nr_trampolines;
 	struct ftrace_ops_hash		local_hash;
 	struct ftrace_ops_hash		*func_hash;
 	struct ftrace_ops_hash		old_hash;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f28dff3..a5c539f 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -133,8 +133,9 @@
 
 /**
  * struct iommu_ops - iommu ops and capabilities
- * @domain_init: init iommu domain
- * @domain_destroy: destroy iommu domain
+ * @capable: check capability
+ * @domain_alloc: allocate iommu domain
+ * @domain_free: free iommu domain
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
@@ -144,8 +145,15 @@
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
+ * @device_group: find iommu group for a particular device
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @get_dm_regions: Request list of direct mapping requirements for a device
+ * @put_dm_regions: Free list of direct mapping requirements for a device
+ * @domain_window_enable: Configure and enable a particular window for a domain
+ * @domain_window_disable: Disable a particular window for a domain
+ * @domain_set_windows: Set the number of windows for a domain
+ * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
@@ -182,9 +190,9 @@
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
 				    phys_addr_t paddr, u64 size, int prot);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-	/* Set the numer of window per domain */
+	/* Set the number of windows per domain */
 	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-	/* Get the numer of window per domain */
+	/* Get the number of windows per domain */
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 #ifdef CONFIG_OF_IOMMU
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index e7a018e..017fced 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -1,10 +1,13 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/dma-direction.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct device;
 struct dma_attrs;
+struct page;
 struct scatterlist;
 
 extern int swiotlb_force;
diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h
index 98feb1b..d6dfa05 100644
--- a/include/trace/events/fence.h
+++ b/include/trace/events/fence.h
@@ -17,7 +17,7 @@
 
 	TP_STRUCT__entry(
 		__string(driver, fence->ops->get_driver_name(fence))
-		__string(timeline, fence->ops->get_driver_name(fence))
+		__string(timeline, fence->ops->get_timeline_name(fence))
 		__field(unsigned int, context)
 		__field(unsigned int, seqno)
 
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 4cc989a..f95e1c4 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -48,6 +48,8 @@
 #define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
 #define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
@@ -59,6 +61,7 @@
 #define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
 
 #define ETNA_MAX_PIPES 4
 
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 02e8dfa..68d3ebc 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -235,7 +235,7 @@
 
 config APM_EMULATION
 	tristate "Advanced Power Management Emulation"
-	depends on PM && SYS_SUPPORTS_APM_EMULATION
+	depends on SYS_SUPPORTS_APM_EMULATION
 	help
 	  APM is a BIOS specification for saving power using several different
 	  techniques. This is mostly useful for battery powered laptops with
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index de0e786..544a713 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -162,7 +162,7 @@
 	 */
 	if (idle_should_freeze()) {
 		entered_state = cpuidle_enter_freeze(drv, dev);
-		if (entered_state >= 0) {
+		if (entered_state > 0) {
 			local_irq_enable();
 			goto exit_idle;
 		}
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 580ac2d..15a1795 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -316,24 +316,24 @@
 		put_seccomp_filter(thread);
 		smp_store_release(&thread->seccomp.filter,
 				  caller->seccomp.filter);
+
+		/*
+		 * Don't let an unprivileged task work around
+		 * the no_new_privs restriction by creating
+		 * a thread that sets it up, enters seccomp,
+		 * then dies.
+		 */
+		if (task_no_new_privs(caller))
+			task_set_no_new_privs(thread);
+
 		/*
 		 * Opt the other thread into seccomp if needed.
 		 * As threads are considered to be trust-realm
 		 * equivalent (see ptrace_may_access), it is safe to
 		 * allow one thread to transition the other.
 		 */
-		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-			/*
-			 * Don't let an unprivileged task work around
-			 * the no_new_privs restriction by creating
-			 * a thread that sets it up, enters seccomp,
-			 * then dies.
-			 */
-			if (task_no_new_privs(caller))
-				task_set_no_new_privs(thread);
-
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
 			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-		}
 	}
 }
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 87fb980..d929340 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1751,7 +1751,7 @@
 {
 	__buffer_unlock_commit(buffer, event);
 
-	ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
 	ftrace_trace_userstack(buffer, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
diff --git a/mm/cleancache.c b/mm/cleancache.c
index 8fc5081..ba5d8f3 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -22,7 +22,7 @@
  * cleancache_ops is set by cleancache_register_ops to contain the pointers
  * to the cleancache "backend" implementation functions.
  */
-static struct cleancache_ops *cleancache_ops __read_mostly;
+static const struct cleancache_ops *cleancache_ops __read_mostly;
 
 /*
  * Counters available via /sys/kernel/debug/cleancache (if debugfs is
@@ -49,7 +49,7 @@
 /*
  * Register operations for cleancache. Returns 0 on success.
  */
-int cleancache_register_ops(struct cleancache_ops *ops)
+int cleancache_register_ops(const struct cleancache_ops *ops)
 {
 	if (cmpxchg(&cleancache_ops, NULL, ops))
 		return -EBUSY;
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index e080746..48958d3 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -594,7 +594,8 @@
 		if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
 		    strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
 		    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-		    strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
+		    strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
+		    strcmp(symname, ".TOC.") == 0)
 			return 1;
 	/* Do not ignore this symbol */
 	return 0;
diff --git a/security/keys/key.c b/security/keys/key.c
index 07a8731..09ef276 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -430,7 +430,8 @@
 
 			/* and link it into the destination keyring */
 			if (keyring) {
-				set_bit(KEY_FLAG_KEEP, &key->flags);
+				if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+					set_bit(KEY_FLAG_KEEP, &key->flags);
 
 				__key_link(key, _edit);
 			}
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index e3e9491..a2a1e24 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -97,11 +97,11 @@
 	bool "PCM timer interface" if EXPERT
 	default y
 	help
-	  If you disable this option, pcm timer will be inavailable, so
-	  those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+	  If you disable this option, pcm timer will be unavailable, so
+	  those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
 	  incorrectlly.
 
-	  For some embedded device, we may disable it to reduce memory
+	  For some embedded devices, we may disable it to reduce memory
 	  footprint, about 20KB on x86_64 platform.
 
 config SND_SEQUENCER_OSS
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 18b8dc4..7fac3ca 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -46,6 +46,13 @@
 #include <sound/compress_offload.h>
 #include <sound/compress_driver.h>
 
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
 /* TODO:
  * - add substream support for multiple devices in case of
  *	SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@
 	return retval;
 }
 
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 static int
 snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
 {
@@ -463,6 +471,7 @@
 	kfree(caps);
 	return retval;
 }
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
 
 /* revisit this with snd_pcm_preallocate_xxx */
 static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@
 	case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
 		retval = snd_compr_get_caps(stream, arg);
 		break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
 	case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
 		retval = snd_compr_get_codec_caps(stream, arg);
 		break;
+#endif
 	case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
 		retval = snd_compr_set_params(stream, arg);
 		break;
diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
index b1221b2..6779e82b 100644
--- a/sound/core/seq/oss/seq_oss_init.c
+++ b/sound/core/seq/oss/seq_oss_init.c
@@ -202,7 +202,7 @@
 
 	dp->index = i;
 	if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
-		pr_err("ALSA: seq_oss: too many applications\n");
+		pr_debug("ALSA: seq_oss: too many applications\n");
 		rc = -ENOMEM;
 		goto _error;
 	}
diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c
index 0f3b381..b16dbef 100644
--- a/sound/core/seq/oss/seq_oss_synth.c
+++ b/sound/core/seq/oss/seq_oss_synth.c
@@ -308,7 +308,7 @@
 	struct seq_oss_synth *rec;
 	struct seq_oss_synthinfo *info;
 
-	if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+	if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
 		return;
 	for (i = 0; i < dp->max_synthdev; i++) {
 		info = &dp->synths[i];
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index 75b7485..bde3330 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -87,7 +87,7 @@
 module_param(fake_buffer, bool, 0444);
 MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations.");
 #ifdef CONFIG_HIGH_RES_TIMERS
-module_param(hrtimer, bool, 0644);
+module_param(hrtimer, bool, 0444);
 MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source.");
 #endif
 
diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index 926e5dc..5022c9b 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c
@@ -47,14 +47,16 @@
 	[6] = 0x07,
 };
 
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
 {
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
-		if (snd_bebob_rate_table[i] == rate)
-			return i;
+		if (snd_bebob_rate_table[i] == rate) {
+			*index = i;
+			return 0;
+		}
 	}
 	return -EINVAL;
 }
@@ -425,7 +427,9 @@
 		goto end;
 
 	/* confirm params for both streams */
-	index = get_formation_index(rate);
+	err = get_formation_index(rate, &index);
+	if (err < 0)
+		goto end;
 	pcm_channels = bebob->tx_stream_formations[index].pcm;
 	midi_channels = bebob->tx_stream_formations[index].midi;
 	err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index 0216475..37adcc6 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -3,6 +3,7 @@
 config SND_WSS_LIB
         tristate
         select SND_PCM
+	select SND_TIMER
 
 config SND_SB_COMMON
         tristate
@@ -42,6 +43,7 @@
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Analog Devices SoundPort
 	  AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@
 	tristate "Gravis UltraSound Classic"
 	select SND_RAWMIDI
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Gravis UltraSound Classic
 	  soundcards.
@@ -221,6 +224,7 @@
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for Gravis UltraSound Extreme
 	  soundcards.
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 656ce39..8f6594a 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -155,6 +155,7 @@
 	select SND_PCM
 	select SND_RAWMIDI
 	select SND_AC97_CODEC
+	select SND_TIMER
 	depends on ZONE_DMA
 	help
 	  Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@
 	select SND_HWDEP
 	select SND_RAWMIDI
 	select SND_AC97_CODEC
+	select SND_TIMER
 	depends on ZONE_DMA
 	help
 	  Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
+	select SND_TIMER
 	help
 	  Say Y here to include support for Yamaha PCI audio chips -
 	  YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 256e6cd..4045dca 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -90,6 +90,8 @@
 #define NVIDIA_HDA_ENABLE_COHBIT      0x01
 
 /* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL	 0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE        (0x1 << 6)
 #define INTEL_SCH_HDA_DEVC      0x78
 #define INTEL_SCH_HDA_DEVC_NOSNOOP       (0x1<<11)
 
@@ -534,10 +536,21 @@
 {
 	struct hdac_bus *bus = azx_bus(chip);
 	struct pci_dev *pci = chip->pci;
+	u32 val;
 
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, true);
+	if (IS_BROXTON(pci)) {
+		pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+		val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+		pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+	}
 	azx_init_chip(chip, full_reset);
+	if (IS_BROXTON(pci)) {
+		pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+		val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+		pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+	}
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, false);
 
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 426a29a..1f52b55 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -3653,6 +3653,7 @@
 HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP",	patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI",	patch_nvhdmi_2ch),
 HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP",	patch_via_hdmi),
 HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP",	patch_via_hdmi),
diff --git a/sound/sparc/Kconfig b/sound/sparc/Kconfig
index d75deba..dfcd386 100644
--- a/sound/sparc/Kconfig
+++ b/sound/sparc/Kconfig
@@ -22,6 +22,7 @@
 config SND_SUN_CS4231
 	tristate "Sun CS4231"
 	select SND_PCM
+	select SND_TIMER
 	help
 	  Say Y here to include support for CS4231 sound device on Sun.
 
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 23ea6d8..a75d9ce 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1205,8 +1205,12 @@
 	 * "Playback Design" products need a 50ms delay after setting the
 	 * USB interface.
 	 */
-	if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+	switch (le16_to_cpu(dev->descriptor.idVendor)) {
+	case 0x23ba: /* Playback Design */
+	case 0x0644: /* TEAC Corp. */
 		mdelay(50);
+		break;
+	}
 }
 
 void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1225,14 @@
 	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
 		mdelay(20);
 
+	/*
+	 * "TEAC Corp." products need a 20ms delay after each
+	 * class compliant request
+	 */
+	if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+		mdelay(20);
+
 	/* Marantz/Denon devices with USB DAC functionality need a delay
 	 * after each class compliant request
 	 */
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 26b7926..ba34f9e 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,15 +1,19 @@
 #if defined(__i386__) || defined(__x86_64__)
 #define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb()	mb()
-# define dma_rmb()	barrier()
-# define dma_wmb()	barrier()
-# define smp_rmb()	barrier()
-# define smp_wmb()	barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value)  do { \
+	typeof(var) virt_store_mb_value = (value); \
+	__atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+			  __ATOMIC_SEQ_CST); \
+	barrier(); \
+} while (0);
 /* Weak barriers should be used. If not - it's a bug */
-# define rmb()	abort()
-# define wmb()	abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 0000000..845960e
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+	(*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 4db7d56..0338499 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -8,6 +8,7 @@
 #include <assert.h>
 #include <stdarg.h>
 
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/printk.h>
 #include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 0000000..feaa64a
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+	-rm main.o
+	-rm ring.o ring
+	-rm virtio_ring_0_9.o virtio_ring_0_9
+	-rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 0000000..34e94c4
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 0000000..3a5ff43
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+	unsigned long long v = 1;
+	int r;
+
+	vmexit();
+	r = write(fd, &v, sizeof v);
+	assert(r == sizeof v);
+	vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+	unsigned long long v = 1;
+	int r;
+
+	vmexit();
+	r = read(fd, &v, sizeof v);
+	assert(r == sizeof v);
+	vmentry();
+}
+
+void kick(void)
+{
+	notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+	wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+	notify(callfd);
+}
+
+void wait_for_call(void)
+{
+	wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+	cpu_set_t cpuset;
+	int ret;
+	pthread_t self;
+	long int cpu;
+	char *endptr;
+
+	if (!arg)
+		return;
+
+	cpu = strtol(arg, &endptr, 0);
+	assert(!*endptr);
+
+	assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+	self = pthread_self();
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+	assert(!ret);
+}
+
+static void run_guest(void)
+{
+	int completed_before;
+	int completed = 0;
+	int started = 0;
+	int bufs = runcycles;
+	int spurious = 0;
+	int r;
+	unsigned len;
+	void *buf;
+	int tokick = batch;
+
+	for (;;) {
+		if (do_sleep)
+			disable_call();
+		completed_before = completed;
+		do {
+			if (started < bufs &&
+			    started - completed < max_outstanding) {
+				r = add_inbuf(0, NULL, "Hello, world!");
+				if (__builtin_expect(r == 0, true)) {
+					++started;
+					if (!--tokick) {
+						tokick = batch;
+						if (do_sleep)
+							kick_available();
+					}
+
+				}
+			} else
+				r = -1;
+
+			/* Flush out completed bufs if any */
+			if (get_buf(&len, &buf)) {
+				++completed;
+				if (__builtin_expect(completed == bufs, false))
+					return;
+				r = 0;
+			}
+		} while (r == 0);
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		assert(started <= bufs);
+		if (do_sleep) {
+			if (enable_call())
+				wait_for_call();
+		} else {
+			poll_used();
+		}
+	}
+}
+
+static void run_host(void)
+{
+	int completed_before;
+	int completed = 0;
+	int spurious = 0;
+	int bufs = runcycles;
+	unsigned len;
+	void *buf;
+
+	for (;;) {
+		if (do_sleep) {
+			if (enable_kick())
+				wait_for_kick();
+		} else {
+			poll_avail();
+		}
+		if (do_sleep)
+			disable_kick();
+		completed_before = completed;
+		while (__builtin_expect(use_buf(&len, &buf), true)) {
+			if (do_sleep)
+				call_used();
+			++completed;
+			if (__builtin_expect(completed == bufs, false))
+				return;
+		}
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		if (completed == bufs)
+			break;
+	}
+}
+
+void *start_guest(void *arg)
+{
+	set_affinity(arg);
+	run_guest();
+	pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+	set_affinity(arg);
+	run_host();
+	pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+	{
+		.name = "help",
+		.has_arg = no_argument,
+		.val = 'h',
+	},
+	{
+		.name = "host-affinity",
+		.has_arg = required_argument,
+		.val = 'H',
+	},
+	{
+		.name = "guest-affinity",
+		.has_arg = required_argument,
+		.val = 'G',
+	},
+	{
+		.name = "ring-size",
+		.has_arg = required_argument,
+		.val = 'R',
+	},
+	{
+		.name = "run-cycles",
+		.has_arg = required_argument,
+		.val = 'C',
+	},
+	{
+		.name = "outstanding",
+		.has_arg = required_argument,
+		.val = 'o',
+	},
+	{
+		.name = "batch",
+		.has_arg = required_argument,
+		.val = 'b',
+	},
+	{
+		.name = "sleep",
+		.has_arg = no_argument,
+		.val = 's',
+	},
+	{
+		.name = "relax",
+		.has_arg = no_argument,
+		.val = 'x',
+	},
+	{
+		.name = "exit",
+		.has_arg = no_argument,
+		.val = 'e',
+	},
+	{
+	}
+};
+
+static void help(void)
+{
+	fprintf(stderr, "Usage: <test> [--help]"
+		" [--host-affinity H]"
+		" [--guest-affinity G]"
+		" [--ring-size R (default: %d)]"
+		" [--run-cycles C (default: %d)]"
+		" [--batch b]"
+		" [--outstanding o]"
+		" [--sleep]"
+		" [--relax]"
+		" [--exit]"
+		"\n",
+		ring_size,
+		runcycles);
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+	pthread_t host, guest;
+	void *tret;
+	char *host_arg = NULL;
+	char *guest_arg = NULL;
+	char *endptr;
+	long int c;
+
+	kickfd = eventfd(0, 0);
+	assert(kickfd >= 0);
+	callfd = eventfd(0, 0);
+	assert(callfd >= 0);
+
+	for (;;) {
+		int o = getopt_long(argc, argv, optstring, longopts, NULL);
+		switch (o) {
+		case -1:
+			goto done;
+		case '?':
+			help();
+			exit(2);
+		case 'H':
+			host_arg = optarg;
+			break;
+		case 'G':
+			guest_arg = optarg;
+			break;
+		case 'R':
+			ring_size = strtol(optarg, &endptr, 0);
+			assert(ring_size && !(ring_size & (ring_size - 1)));
+			assert(!*endptr);
+			break;
+		case 'C':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			runcycles = c;
+			break;
+		case 'o':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			max_outstanding = c;
+			break;
+		case 'b':
+			c = strtol(optarg, &endptr, 0);
+			assert(!*endptr);
+			assert(c > 0 && c < INT_MAX);
+			batch = c;
+			break;
+		case 's':
+			do_sleep = true;
+			break;
+		case 'x':
+			do_relax = true;
+			break;
+		case 'e':
+			do_exit = true;
+			break;
+		default:
+			help();
+			exit(4);
+			break;
+		}
+	}
+
+	/* does nothing here, used to make sure all smp APIs compile */
+	smp_acquire();
+	smp_release();
+	smp_mb();
+done:
+
+	if (batch > max_outstanding)
+		batch = max_outstanding;
+
+	if (optind < argc) {
+		help();
+		exit(4);
+	}
+	alloc_ring();
+
+	ret = pthread_create(&host, NULL, start_host, host_arg);
+	assert(!ret);
+	ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+	assert(!ret);
+
+	ret = pthread_join(guest, &tret);
+	assert(!ret);
+	ret = pthread_join(host, &tret);
+	assert(!ret);
+	return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 0000000..16917ac
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+	unsigned long long t;
+
+	t = __rdtsc();
+	while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+	_Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+	if (!do_exit)
+		return;
+	
+	wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+	if (!do_exit)
+		return;
+	
+	wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+	if (do_relax)
+		cpu_relax();
+	else
+		/* prevent compiler from removing busy loops */
+		barrier();
+} 
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+    barrier(); \
+    __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+    barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 0000000..c25c8d2
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+			      unsigned short next,
+			      unsigned short prev)
+{
+	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+	unsigned short flags;
+	unsigned short index;
+	unsigned len;
+	unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+	unsigned short kick_index;
+	unsigned char reserved0[HOST_GUEST_PADDING - 2];
+	unsigned short call_index;
+	unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+	void *buf; /* descriptor is writeable, we can't get buf from there */
+	void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+	unsigned avail_idx;
+	unsigned last_used_idx;
+	unsigned num_free;
+	unsigned kicked_avail_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+	/* we do not need to track last avail index
+	 * unless we have more than one in flight.
+	 */
+	unsigned used_idx;
+	unsigned called_used_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret;
+	int i;
+
+	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+	if (ret) {
+		perror("Unable to allocate ring buffer.\n");
+		exit(3);
+	}
+	event = malloc(sizeof *event);
+	if (!event) {
+		perror("Unable to allocate event buffer.\n");
+		exit(3);
+	}
+	memset(event, 0, sizeof *event);
+	guest.avail_idx = 0;
+	guest.kicked_avail_idx = -1;
+	guest.last_used_idx = 0;
+	host.used_idx = 0;
+	host.called_used_idx = -1;
+	for (i = 0; i < ring_size; ++i) {
+		struct desc desc = {
+			.index = i,
+		};
+		ring[i] = desc;
+	}
+	guest.num_free = ring_size;
+	data = malloc(ring_size * sizeof *data);
+	if (!data) {
+		perror("Unable to allocate data buffer.\n");
+		exit(3);
+	}
+	memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	unsigned head, index;
+
+	if (!guest.num_free)
+		return -1;
+
+	guest.num_free--;
+	head = (ring_size - 1) & (guest.avail_idx++);
+
+	/* Start with a write. On MESI architectures this helps
+	 * avoid a shared state with consumer that is polling this descriptor.
+	 */
+	ring[head].addr = (unsigned long)(void*)buf;
+	ring[head].len = len;
+	/* read below might bypass write above. That is OK because it's just an
+	 * optimization. If this happens, we will get the cache line in a
+	 * shared state which is unfortunate, but probably not worth it to
+	 * add an explicit full barrier to avoid this.
+	 */
+	barrier();
+	index = ring[head].index;
+	data[index].buf = buf;
+	data[index].data = datap;
+	/* Barrier A (for pairing) */
+	smp_release();
+	ring[head].flags = DESC_HW;
+
+	return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+	unsigned index;
+	void *datap;
+
+	if (ring[head].flags & DESC_HW)
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	*lenp = ring[head].len;
+	index = ring[head].index & (ring_size - 1);
+	datap = data[index].data;
+	*bufp = data[index].buf;
+	data[index].buf = NULL;
+	data[index].data = NULL;
+	guest.num_free++;
+	guest.last_used_idx++;
+	return datap;
+}
+
+void poll_used(void)
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	while (ring[head].flags & DESC_HW)
+		busy_wait();
+}
+
+void disable_call()
+{
+	/* Doing nothing to disable calls might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_call()
+{
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	event->call_index = guest.last_used_idx;
+	/* Flush call index write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier C (for pairing) */
+	smp_mb();
+	if (!need_event(event->kick_index,
+			guest.avail_idx,
+			guest.kicked_avail_idx))
+		return;
+
+	guest.kicked_avail_idx = guest.avail_idx;
+	kick();
+}
+
+/* host side */
+void disable_kick()
+{
+	/* Doing nothing to disable kicks might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_kick()
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	event->kick_index = host.used_idx;
+	/* Barrier C (for pairing) */
+	smp_mb();
+	return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	while (!(ring[head].flags & DESC_HW))
+		busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head = (ring_size - 1) & host.used_idx;
+
+	if (!(ring[head].flags & DESC_HW))
+		return false;
+
+	/* make sure length read below is not speculated */
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	/* simple in-order completion: we don't need
+	 * to touch index at all. This also means we
+	 * can just modify the descriptor in-place.
+	 */
+	ring[head].len--;
+	/* Make sure len is valid before flags.
+	 * Note: alternative is to write len and flags in one access -
+	 * possible on 64 bit architectures but wmb is free on Intel anyway
+	 * so I have no way to test whether it's a gain.
+	 */
+	/* Barrier B (for pairing) */
+	smp_release();
+	ring[head].flags = 0;
+	host.used_idx++;
+	return true;
+}
+
+void call_used(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	if (!need_event(event->call_index,
+			host.used_idx,
+			host.called_used_idx))
+		return;
+
+	host.called_used_idx = host.used_idx;
+	call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 0000000..52b0f71
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+	#Don't run guest and host on same CPU
+	#It actually works ok if using signalling
+	if
+		(echo "$@" | grep -e "--sleep" > /dev/null) || \
+			test $HOST_AFFINITY '!=' $cpu
+	then
+		echo "GUEST AFFINITY $cpu"
+		"$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+	fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 0000000..47c9a1a
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+	void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+	unsigned short avail_idx;
+	unsigned short last_used_idx;
+	unsigned short num_free;
+	unsigned short kicked_avail_idx;
+	unsigned short free_head;
+	unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+	/* we do not need to track last avail index
+	 * unless we have more than one in flight.
+	 */
+	unsigned short used_idx;
+	unsigned short called_used_idx;
+	unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+	int ret;
+	int i;
+	void *p;
+
+	ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+	if (ret) {
+		perror("Unable to allocate ring buffer.\n");
+		exit(3);
+	}
+	memset(p, 0, vring_size(ring_size, 0x1000));
+	vring_init(&ring, ring_size, p, 0x1000);
+
+	guest.avail_idx = 0;
+	guest.kicked_avail_idx = -1;
+	guest.last_used_idx = 0;
+	/* Put everything in free lists. */
+	guest.free_head = 0;
+	for (i = 0; i < ring_size - 1; i++)
+		ring.desc[i].next = i + 1;
+	host.used_idx = 0;
+	host.called_used_idx = -1;
+	guest.num_free = ring_size;
+	data = malloc(ring_size * sizeof *data);
+	if (!data) {
+		perror("Unable to allocate data buffer.\n");
+		exit(3);
+	}
+	memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+	unsigned head, avail;
+	struct vring_desc *desc;
+
+	if (!guest.num_free)
+		return -1;
+
+	head = guest.free_head;
+	guest.num_free--;
+
+	desc = ring.desc;
+	desc[head].flags = VRING_DESC_F_NEXT;
+	desc[head].addr = (unsigned long)(void *)buf;
+	desc[head].len = len;
+	/* We do it like this to simulate the way
+	 * we'd have to flip it if we had multiple
+	 * descriptors.
+	 */
+	desc[head].flags &= ~VRING_DESC_F_NEXT;
+	guest.free_head = desc[head].next;
+
+	data[head].data = datap;
+
+#ifdef RING_POLL
+	/* Barrier A (for pairing) */
+	smp_release();
+	avail = guest.avail_idx++;
+	ring.avail->ring[avail & (ring_size - 1)] =
+		(head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+	avail = (ring_size - 1) & (guest.avail_idx++);
+	ring.avail->ring[avail] = head;
+	/* Barrier A (for pairing) */
+	smp_release();
+#endif
+	ring.avail->idx = guest.avail_idx;
+	return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+	unsigned head;
+	unsigned index;
+	void *datap;
+
+#ifdef RING_POLL
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = ring.used->ring[head].id;
+	if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	index &= ring_size - 1;
+#else
+	if (ring.used->idx == guest.last_used_idx)
+		return NULL;
+	/* Barrier B (for pairing) */
+	smp_acquire();
+	head = (ring_size - 1) & guest.last_used_idx;
+	index = ring.used->ring[head].id;
+#endif
+	*lenp = ring.used->ring[head].len;
+	datap = data[index].data;
+	*bufp = (void*)(unsigned long)ring.desc[index].addr;
+	data[index].data = NULL;
+	ring.desc[index].next = guest.free_head;
+	guest.free_head = index;
+	guest.num_free++;
+	guest.last_used_idx++;
+	return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+	unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+	for (;;) {
+		unsigned index = ring.used->ring[head].id;
+
+		if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+			busy_wait();
+		else
+			break;
+	}
+#else
+	unsigned head = guest.last_used_idx;
+
+	while (ring.used->idx == head)
+		busy_wait();
+#endif
+}
+
+void disable_call()
+{
+	/* Doing nothing to disable calls might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_call()
+{
+	unsigned short last_used_idx;
+
+	vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+	/* Flush call index write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+#ifdef RING_POLL
+	{
+		unsigned short head = last_used_idx & (ring_size - 1);
+		unsigned index = ring.used->ring[head].id;
+
+		return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+	}
+#else
+	return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier C (for pairing) */
+	smp_mb();
+	if (!vring_need_event(vring_avail_event(&ring),
+			      guest.avail_idx,
+			      guest.kicked_avail_idx))
+		return;
+
+	guest.kicked_avail_idx = guest.avail_idx;
+	kick();
+}
+
+/* host side */
+void disable_kick()
+{
+	/* Doing nothing to disable kicks might cause
+	 * extra interrupts, but reduces the number of cache misses.
+	 */
+}
+
+bool enable_kick()
+{
+	unsigned head = host.used_idx;
+
+	vring_avail_event(&ring) = head;
+	/* Barrier C (for pairing) */
+	smp_mb();
+#ifdef RING_POLL
+	{
+		unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+		return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+	}
+#else
+	return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+	unsigned head = host.used_idx;
+#ifdef RING_POLL
+	for (;;) {
+		unsigned index = ring.avail->ring[head & (ring_size - 1)];
+		if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+			busy_wait();
+		else
+			break;
+	}
+#else
+	while (ring.avail->idx == head)
+		busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+	unsigned used_idx = host.used_idx;
+	struct vring_desc *desc;
+	unsigned head;
+
+#ifdef RING_POLL
+	head = ring.avail->ring[used_idx & (ring_size - 1)];
+	if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+		return false;
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	used_idx &= ring_size - 1;
+	desc = &ring.desc[head & (ring_size - 1)];
+#else
+	if (used_idx == ring.avail->idx)
+		return false;
+
+	/* Barrier A (for pairing) */
+	smp_acquire();
+
+	used_idx &= ring_size - 1;
+	head = ring.avail->ring[used_idx];
+	desc = &ring.desc[head];
+#endif
+
+	*lenp = desc->len;
+	*bufp = (void *)(unsigned long)desc->addr;
+
+	/* now update used ring */
+	ring.used->ring[used_idx].id = head;
+	ring.used->ring[used_idx].len = desc->len - 1;
+	/* Barrier B (for pairing) */
+	smp_release();
+	host.used_idx++;
+	ring.used->idx = host.used_idx;
+	
+	return true;
+}
+
+void call_used(void)
+{
+	/* Flush in previous flags write */
+	/* Barrier D (for pairing) */
+	smp_mb();
+	if (!vring_need_event(vring_used_event(&ring),
+			      host.used_idx,
+			      host.called_used_idx))
+		return;
+
+	host.called_used_idx = host.used_idx;
+	call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 0000000..84fc2c5
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"