Merge branch '4.7-fixes' into mips-for-linux-next
diff --git a/MAINTAINERS b/MAINTAINERS
index 1209323..7b670bb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9017,6 +9017,16 @@
 S:	Maintained
 F:	drivers/pinctrl/spear/
 
+PISTACHIO SOC SUPPORT
+M:      James Hartley <james.hartley@imgtec.com>
+M:      Ionela Voinescu <ionela.voinescu@imgtec.com>
+L:      linux-mips@linux-mips.org
+S:      Maintained
+F:      arch/mips/pistachio/
+F:      arch/mips/include/asm/mach-pistachio/
+F:      arch/mips/boot/dts/pistachio/
+F:      arch/mips/configs/pistachio*_defconfig
+
 PKTCDVD DRIVER
 M:	Jiri Kosina <jikos@kernel.org>
 S:	Maintained
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ac91939..b0b0a4d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -64,6 +64,7 @@
 	select GENERIC_TIME_VSYSCALL
 	select ARCH_CLOCKSOURCE_DATA
 	select HANDLE_DOMAIN_IRQ
+	select HAVE_EXIT_THREAD
 
 menu "Machine selection"
 
@@ -384,7 +385,7 @@
 	select CLKSRC_MIPS_GIC
 	select COMMON_CLK
 	select CSRC_R4K
-	select DMA_MAYBE_COHERENT
+	select DMA_NONCOHERENT
 	select GPIOLIB
 	select IRQ_MIPS_CPU
 	select LIBFDT
@@ -880,7 +881,6 @@
 	select SYS_SUPPORTS_HOTPLUG_CPU if CPU_BIG_ENDIAN
 	select SYS_HAS_EARLY_PRINTK
 	select SYS_HAS_CPU_CAVIUM_OCTEON
-	select SWAP_IO_SPACE
 	select HW_HAS_PCI
 	select ZONE_DMA32
 	select HOLES_IN_ZONE
@@ -1111,16 +1111,6 @@
 config SYS_HAS_EARLY_PRINTK
 	bool
 
-config HOTPLUG_CPU
-	bool "Support for hot-pluggable CPUs"
-	depends on SMP && SYS_SUPPORTS_HOTPLUG_CPU
-	help
-	  Say Y here to allow turning CPUs off and on. CPUs can be
-	  controlled through /sys/devices/system/cpu.
-	  (Note: power management support will enable this option
-	    automatically on SMP systems. )
-	  Say N if you want to disable CPU hotplug.
-
 config SYS_SUPPORTS_HOTPLUG_CPU
 	bool
 
@@ -1406,7 +1396,6 @@
 	bool "Loongson 1B"
 	depends on SYS_HAS_CPU_LOONGSON1B
 	select CPU_LOONGSON1
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select LEDS_GPIO_REGISTER
 	help
 	  The Loongson 1B is a 32-bit SoC, which implements the MIPS32
@@ -2634,6 +2623,16 @@
 
 	  If you don't know what to do here, say N.
 
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP && SYS_SUPPORTS_HOTPLUG_CPU
+	help
+	  Say Y here to allow turning CPUs off and on. CPUs can be
+	  controlled through /sys/devices/system/cpu.
+	  (Note: power management support will enable this option
+	    automatically on SMP systems. )
+	  Say N if you want to disable CPU hotplug.
+
 config SMP_UP
 	bool
 
@@ -2885,10 +2884,10 @@
 		  the documented boot protocol using a device tree.
 
 	config MIPS_RAW_APPENDED_DTB
-		bool "vmlinux.bin"
+		bool "vmlinux.bin or vmlinuz.bin"
 		help
 		  With this option, the boot code will look for a device tree binary
-		  DTB) appended to raw vmlinux.bin (without decompressor).
+		  DTB) appended to raw vmlinux.bin or vmlinuz.bin.
 		  (e.g. cat vmlinux.bin <filename>.dtb > vmlinux_w_dtb).
 
 		  This is meant as a backward compatibility convenience for those
@@ -2900,24 +2899,6 @@
 		  look like a DTB header after a reboot if no actual DTB is appended
 		  to vmlinux.bin.  Do not leave this option active in a production kernel
 		  if you don't intend to always append a DTB.
-
-	config MIPS_ZBOOT_APPENDED_DTB
-		bool "vmlinuz.bin"
-		depends on SYS_SUPPORTS_ZBOOT
-		help
-		  With this option, the boot code will look for a device tree binary
-		  DTB) appended to raw vmlinuz.bin (with decompressor).
-		  (e.g. cat vmlinuz.bin <filename>.dtb > vmlinuz_w_dtb).
-
-		  This is meant as a backward compatibility convenience for those
-		  systems with a bootloader that can't be upgraded to accommodate
-		  the documented boot protocol using a device tree.
-
-		  Beware that there is very little in terms of protection against
-		  this option being confused by leftover garbage in memory that might
-		  look like a DTB header after a reboot if no actual DTB is appended
-		  to vmlinuz.bin.  Do not leave this option active in a production kernel
-		  if you don't intend to always append a DTB.
 endchoice
 
 choice
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 7adab18..2ec9100 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -204,8 +204,8 @@
 	fdt_start = fw_getenvl("fdt_start");
 	if (fdt_start)
 		__dt_setup_arch((void *)KSEG0ADDR(fdt_start));
-	else if (fw_arg0 == -2)
-		__dt_setup_arch((void *)KSEG0ADDR(fw_arg1));
+	else if (fw_passed_dtb)
+		__dt_setup_arch((void *)KSEG0ADDR(fw_passed_dtb));
 
 	if (mips_machtype != ATH79_MACH_GENERIC_OF) {
 		ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE,
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index f146d12..6776042 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -162,8 +162,8 @@
 	/* intended to somewhat resemble ARM; see Documentation/arm/Booting */
 	if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
 		dtb = phys_to_virt(fw_arg2);
-	else if (fw_arg0 == -2) /* UHI interface */
-		dtb = (void *)fw_arg1;
+	else if (fw_passed_dtb) /* UHI interface */
+		dtb = (void *)fw_passed_dtb;
 	else if (__dtb_start != __dtb_end)
 		dtb = (void *)__dtb_start;
 	else
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index 080cd53..fdf99e9 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/libfdt.h>
 
 #include <asm/addrspace.h>
 
@@ -36,6 +37,8 @@
 #define puthex(val) do {} while (0)
 #endif
 
+extern char __appended_dtb[];
+
 void error(char *x)
 {
 	puts("\n\n");
@@ -114,6 +117,20 @@
 	__decompress((char *)zimage_start, zimage_size, 0, 0,
 		   (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error);
 
+	if (IS_ENABLED(CONFIG_MIPS_RAW_APPENDED_DTB) &&
+	    fdt_magic((void *)&__appended_dtb) == FDT_MAGIC) {
+		unsigned int image_size, dtb_size;
+
+		dtb_size = fdt_totalsize((void *)&__appended_dtb);
+
+		/* last four bytes is always image size in little endian */
+		image_size = le32_to_cpup((void *)&__image_end - 4);
+
+		/* copy dtb to where the booted kernel will expect it */
+		memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size,
+		       __appended_dtb, dtb_size);
+	}
+
 	/* FIXME: should we flush cache here? */
 	puts("Now, booting the kernel...\n");
 }
diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
index c580e85..409cb48 100644
--- a/arch/mips/boot/compressed/head.S
+++ b/arch/mips/boot/compressed/head.S
@@ -25,22 +25,6 @@
 	move	s2, a2
 	move	s3, a3
 
-#ifdef CONFIG_MIPS_ZBOOT_APPENDED_DTB
-	PTR_LA	t0, __appended_dtb
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	li	t1, 0xd00dfeed
-#else
-	li	t1, 0xedfe0dd0
-#endif
-	lw	t2, (t0)
-	bne	t1, t2, not_found
-	 nop
-
-	move	s1, t0
-	PTR_LI	s0, -2
-not_found:
-#endif
-
 	/* Clear BSS */
 	PTR_LA	a0, _edata
 	PTR_LA	a2, _end
diff --git a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
index d6bc994..b134798 100644
--- a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
+++ b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
@@ -9,6 +9,7 @@
  */
 
 /include/ "octeon_3xxx.dtsi"
+#include <dt-bindings/gpio/gpio.h>
 
 / {
 	model = "dlink,dsr-1000n";
@@ -63,12 +64,27 @@
 
 		usb1 {
 			label = "usb1";
-			gpios = <&gpio 9 1>; /* Active low */
+			gpios = <&gpio 9 GPIO_ACTIVE_LOW>;
 		};
 
 		usb2 {
 			label = "usb2";
-			gpios = <&gpio 10 1>; /* Active low */
+			gpios = <&gpio 10 GPIO_ACTIVE_LOW>;
+		};
+
+		wps {
+			label = "wps";
+			gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+		};
+
+		wireless1 {
+			label = "5g";
+			gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
+		};
+
+		wireless2 {
+			label = "2.4g";
+			gpios = <&gpio 18 GPIO_ACTIVE_LOW>;
 		};
 	};
 
diff --git a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
index de61f02..ca6b446 100644
--- a/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
+++ b/arch/mips/boot/dts/cavium-octeon/octeon_3xxx.dts
@@ -388,16 +388,4 @@
 		usbn = &usbn;
 		led0 = &led0;
 	};
-
-	dsr1000n-leds {
-		compatible = "gpio-leds";
-		usb1 {
-			label = "usb1";
-			gpios = <&gpio 9 1>; /* Active low */
-		};
-		usb2 {
-			label = "usb2";
-			gpios = <&gpio 10 1>; /* Active low */
-		};
-	};
  };
diff --git a/arch/mips/boot/tools/relocs_64.c b/arch/mips/boot/tools/relocs_64.c
index b671b5e..06066e6a 100644
--- a/arch/mips/boot/tools/relocs_64.c
+++ b/arch/mips/boot/tools/relocs_64.c
@@ -9,17 +9,20 @@
 
 typedef uint8_t Elf64_Byte;
 
-typedef struct {
-	Elf64_Word r_sym;	/* Symbol index.  */
-	Elf64_Byte r_ssym;	/* Special symbol.  */
-	Elf64_Byte r_type3;	/* Third relocation.  */
-	Elf64_Byte r_type2;	/* Second relocation.  */
-	Elf64_Byte r_type;	/* First relocation.  */
+typedef union {
+	struct {
+		Elf64_Word r_sym;	/* Symbol index.  */
+		Elf64_Byte r_ssym;	/* Special symbol.  */
+		Elf64_Byte r_type3;	/* Third relocation.  */
+		Elf64_Byte r_type2;	/* Second relocation.  */
+		Elf64_Byte r_type;	/* First relocation.  */
+	} fields;
+	Elf64_Xword unused;
 } Elf64_Mips_Rela;
 
 #define ELF_CLASS               ELFCLASS64
-#define ELF_R_SYM(val)          (((Elf64_Mips_Rela *)(&val))->r_sym)
-#define ELF_R_TYPE(val)         (((Elf64_Mips_Rela *)(&val))->r_type)
+#define ELF_R_SYM(val)          (((Elf64_Mips_Rela *)(&val))->fields.r_sym)
+#define ELF_R_TYPE(val)         (((Elf64_Mips_Rela *)(&val))->fields.r_type)
 #define ELF_ST_TYPE(o)          ELF64_ST_TYPE(o)
 #define ELF_ST_BIND(o)          ELF64_ST_BIND(o)
 #define ELF_ST_VISIBILITY(o)    ELF64_ST_VISIBILITY(o)
diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
index 504ed61..b65a6c1 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
@@ -668,7 +668,7 @@
 	/*
 	 * Round size up to mult of minimum alignment bytes We need
 	 * the actual size allocated to allow for blocks to be
-	 * coallesced when they are freed.  The alloc routine does the
+	 * coalesced when they are freed. The alloc routine does the
 	 * same rounding up on all allocations.
 	 */
 	size = ALIGN(size, CVMX_BOOTMEM_ALIGNMENT_SIZE);
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
index 36e30d6..ff49fc0 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
@@ -186,15 +186,6 @@
 			return 7 - ipd_port;
 		else
 			return -1;
-	case CVMX_BOARD_TYPE_CUST_DSR1000N:
-		/*
-		 * Port 2 connects to Broadcom PHY (B5081). Other ports (0-1)
-		 * connect to a switch (BCM53115).
-		 */
-		if (ipd_port == 2)
-			return 8;
-		else
-			return -1;
 	case CVMX_BOARD_TYPE_KONTRON_S1901:
 		if (ipd_port == CVMX_HELPER_BOARD_MGMT_IPD_PORT)
 			return 1;
@@ -289,18 +280,6 @@
 			return result;
 		}
 		break;
-	case CVMX_BOARD_TYPE_CUST_DSR1000N:
-		if (ipd_port == 0 || ipd_port == 1) {
-			/* Ports 0 and 1 connect to a switch (BCM53115). */
-			result.s.link_up = 1;
-			result.s.full_duplex = 1;
-			result.s.speed = 1000;
-			return result;
-		} else {
-			/* Port 2 uses a Broadcom PHY (B5081). */
-			is_broadcom_phy = 1;
-		}
-		break;
 	}
 
 	phy_addr = cvmx_helper_board_get_mii_address(ipd_port);
@@ -765,7 +744,6 @@
 	case CVMX_BOARD_TYPE_LANAI2_G:
 	case CVMX_BOARD_TYPE_NIC10E_66:
 	case CVMX_BOARD_TYPE_UBNT_E100:
-	case CVMX_BOARD_TYPE_CUST_DSR1000N:
 		return USB_CLOCK_TYPE_CRYSTAL_12;
 	case CVMX_BOARD_TYPE_NIC10E:
 		return USB_CLOCK_TYPE_REF_12;
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 75a4add..5a9b87b 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1542,10 +1542,6 @@
 			goto err;
 	}
 
-	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 0, 56);
-	if (r)
-		goto err;
-
 	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI2, 0, 59);
 	if (r)
 		goto err;
@@ -1559,10 +1555,6 @@
 			goto err;
 	}
 
-	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB1, 1, 17);
-	if (r)
-		goto err;
-
 	/* Enable the CIU lines */
 	set_c0_status(STATUSF_IP3 | STATUSF_IP2);
 	if (octeon_irq_use_ip4)
@@ -2077,10 +2069,6 @@
 			goto err;
 	}
 
-	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 3, 44);
-	if (r)
-		goto err;
-
 	for (i = 0; i < 4; i++) {
 		r = octeon_irq_force_ciu_mapping(
 			ciu_domain, i + OCTEON_IRQ_PCI_INT0, 4, i);
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 7aeafed..b31fbc9 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -3,33 +3,27 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004-2011 Cavium Networks
+ * Copyright (C) 2004-2016 Cavium Networks
  * Copyright (C) 2008 Wind River Systems
  */
 
-#include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/i2c.h>
-#include <linux/usb.h>
-#include <linux/dma-mapping.h>
+#include <linux/delay.h>
 #include <linux/etherdevice.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/platform_device.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
+#include <linux/usb/ehci_def.h>
 #include <linux/usb/ehci_pdriver.h>
 #include <linux/usb/ohci_pdriver.h>
 
 #include <asm/octeon/octeon.h>
-#include <asm/octeon/cvmx-rnm-defs.h>
-#include <asm/octeon/cvmx-helper.h>
 #include <asm/octeon/cvmx-helper-board.h>
 #include <asm/octeon/cvmx-uctlx-defs.h>
 
+#define CVMX_UAHCX_EHCI_USBCMD	(CVMX_ADD_IO_SEG(0x00016F0000000010ull))
+#define CVMX_UAHCX_OHCI_USBCMD	(CVMX_ADD_IO_SEG(0x00016F0000000408ull))
+
 /* Octeon Random Number Generator.  */
 static int __init octeon_rng_device_init(void)
 {
@@ -78,12 +72,36 @@
 
 static int octeon2_usb_clock_start_cnt;
 
+static int __init octeon2_usb_reset(void)
+{
+	union cvmx_uctlx_clk_rst_ctl clk_rst_ctl;
+	u32 ucmd;
+
+	if (!OCTEON_IS_OCTEON2())
+		return 0;
+
+	clk_rst_ctl.u64 = cvmx_read_csr(CVMX_UCTLX_CLK_RST_CTL(0));
+	if (clk_rst_ctl.s.hrst) {
+		ucmd = cvmx_read64_uint32(CVMX_UAHCX_EHCI_USBCMD);
+		ucmd &= ~CMD_RUN;
+		cvmx_write64_uint32(CVMX_UAHCX_EHCI_USBCMD, ucmd);
+		mdelay(2);
+		ucmd |= CMD_RESET;
+		cvmx_write64_uint32(CVMX_UAHCX_EHCI_USBCMD, ucmd);
+		ucmd = cvmx_read64_uint32(CVMX_UAHCX_OHCI_USBCMD);
+		ucmd |= CMD_RUN;
+		cvmx_write64_uint32(CVMX_UAHCX_OHCI_USBCMD, ucmd);
+	}
+
+	return 0;
+}
+arch_initcall(octeon2_usb_reset);
+
 static void octeon2_usb_clocks_start(struct device *dev)
 {
 	u64 div;
 	union cvmx_uctlx_if_ena if_ena;
 	union cvmx_uctlx_clk_rst_ctl clk_rst_ctl;
-	union cvmx_uctlx_uphy_ctl_status uphy_ctl_status;
 	union cvmx_uctlx_uphy_portx_ctl_status port_ctl_status;
 	int i;
 	unsigned long io_clk_64_to_ns;
@@ -131,6 +149,17 @@
 	if_ena.s.en = 1;
 	cvmx_write_csr(CVMX_UCTLX_IF_ENA(0), if_ena.u64);
 
+	for (i = 0; i <= 1; i++) {
+		port_ctl_status.u64 =
+			cvmx_read_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0));
+		/* Set txvreftune to 15 to obtain compliant 'eye' diagram. */
+		port_ctl_status.s.txvreftune = 15;
+		port_ctl_status.s.txrisetune = 1;
+		port_ctl_status.s.txpreemphasistune = 1;
+		cvmx_write_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0),
+			       port_ctl_status.u64);
+	}
+
 	/* Step 3: Configure the reference clock, PHY, and HCLK */
 	clk_rst_ctl.u64 = cvmx_read_csr(CVMX_UCTLX_CLK_RST_CTL(0));
 
@@ -218,29 +247,10 @@
 	clk_rst_ctl.s.p_por = 0;
 	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
 
-	/* Step 5:    Wait 1 ms for the PHY clock to start. */
-	mdelay(1);
+	/* Step 5:    Wait 3 ms for the PHY clock to start. */
+	mdelay(3);
 
-	/*
-	 * Step 6: Program the reset input from automatic test
-	 * equipment field in the UPHY CSR
-	 */
-	uphy_ctl_status.u64 = cvmx_read_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0));
-	uphy_ctl_status.s.ate_reset = 1;
-	cvmx_write_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0), uphy_ctl_status.u64);
-
-	/* Step 7: Wait for at least 10ns. */
-	ndelay(10);
-
-	/* Step 8: Clear the ATE_RESET field in the UPHY CSR. */
-	uphy_ctl_status.s.ate_reset = 0;
-	cvmx_write_csr(CVMX_UCTLX_UPHY_CTL_STATUS(0), uphy_ctl_status.u64);
-
-	/*
-	 * Step 9: Wait for at least 20ns for UPHY to output PHY clock
-	 * signals and OHCI_CLK48
-	 */
-	ndelay(20);
+	/* Steps 6..9 for ATE only, are skipped. */
 
 	/* Step 10: Configure the OHCI_CLK48 and OHCI_CLK12 clocks. */
 	/* 10a */
@@ -261,6 +271,20 @@
 	clk_rst_ctl.s.p_prst = 1;
 	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
 
+	/* Step 11b */
+	udelay(1);
+
+	/* Step 11c */
+	clk_rst_ctl.s.p_prst = 0;
+	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
+
+	/* Step 11d */
+	mdelay(1);
+
+	/* Step 11e */
+	clk_rst_ctl.s.p_prst = 1;
+	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
+
 	/* Step 12: Wait 1 uS. */
 	udelay(1);
 
@@ -269,21 +293,9 @@
 	cvmx_write_csr(CVMX_UCTLX_CLK_RST_CTL(0), clk_rst_ctl.u64);
 
 end_clock:
-	/* Now we can set some other registers.  */
-
-	for (i = 0; i <= 1; i++) {
-		port_ctl_status.u64 =
-			cvmx_read_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0));
-		/* Set txvreftune to 15 to obtain compliant 'eye' diagram. */
-		port_ctl_status.s.txvreftune = 15;
-		port_ctl_status.s.txrisetune = 1;
-		port_ctl_status.s.txpreemphasistune = 1;
-		cvmx_write_csr(CVMX_UCTLX_UPHY_PORTX_CTL_STATUS(i, 0),
-			       port_ctl_status.u64);
-	}
-
 	/* Set uSOF cycle period to 60,000 bits. */
 	cvmx_write_csr(CVMX_UCTLX_EHCI_FLA(0), 0x20ull);
+
 exit:
 	mutex_unlock(&octeon2_usb_clocks_mutex);
 }
@@ -311,7 +323,11 @@
 #ifdef __BIG_ENDIAN
 	.big_endian_mmio	= 1,
 #endif
-	.dma_mask_64	= 1,
+	/*
+	 * We can DMA from anywhere. But the descriptors must be in
+	 * the lower 4GB.
+	 */
+	.dma_mask_64	= 0,
 	.power_on	= octeon_ehci_power_on,
 	.power_off	= octeon_ehci_power_off,
 };
@@ -689,6 +705,10 @@
 	if (fdt_check_header(initial_boot_params))
 		panic("Corrupt Device Tree.");
 
+	WARN(octeon_bootinfo->board_type == CVMX_BOARD_TYPE_CUST_DSR1000N,
+	     "Built-in DTB booting is deprecated on %s. Please switch to use appended DTB.",
+	     cvmx_board_type_to_string(octeon_bootinfo->board_type));
+
 	aliases = fdt_path_offset(initial_boot_params, "/aliases");
 	if (aliases < 0) {
 		pr_err("Error: No /aliases node in device tree.");
@@ -1032,13 +1052,6 @@
 		}
 	}
 
-	if (octeon_bootinfo->board_type != CVMX_BOARD_TYPE_CUST_DSR1000N) {
-		int dsr1000n_leds = fdt_path_offset(initial_boot_params,
-						    "/dsr1000n-leds");
-		if (dsr1000n_leds >= 0)
-			fdt_nop_node(initial_boot_params, dsr1000n_leds);
-	}
-
 	return 0;
 }
 
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 64f852b..cb16fcc 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -40,9 +40,27 @@
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/pci-octeon.h>
-#include <asm/octeon/cvmx-mio-defs.h>
 #include <asm/octeon/cvmx-rst-defs.h>
 
+/*
+ * TRUE for devices having registers with little-endian byte
+ * order, FALSE for registers with native-endian byte order.
+ * PCI mandates little-endian, USB and SATA are configuraable,
+ * but we chose little-endian for these.
+ */
+const bool octeon_should_swizzle_table[256] = {
+	[0x00] = true,	/* bootbus/CF */
+	[0x1b] = true,	/* PCI mmio window */
+	[0x1c] = true,	/* PCI mmio window */
+	[0x1d] = true,	/* PCI mmio window */
+	[0x1e] = true,	/* PCI mmio window */
+	[0x68] = true,	/* OCTEON III USB */
+	[0x69] = true,	/* OCTEON III USB */
+	[0x6c] = true,	/* OCTEON III SATA */
+	[0x6f] = true,	/* OCTEON II USB */
+};
+EXPORT_SYMBOL(octeon_should_swizzle_table);
+
 #ifdef CONFIG_PCI
 extern void pci_console_init(const char *arg);
 #endif
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 33aab89..4d457d60 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -271,6 +271,7 @@
 		return -ENOTSUPP;
 
 	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	octeon_fixup_irqs();
 
diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c
index 9a8c2fe..c136a18 100644
--- a/arch/mips/cobalt/setup.c
+++ b/arch/mips/cobalt/setup.c
@@ -42,8 +42,8 @@
 
 /*
  * Cobalt doesn't have PS/2 keyboard/mouse interfaces,
- * keyboard conntroller is never used.
- * Also PCI-ISA bridge DMA contoroller is never used.
+ * keyboard controller is never used.
+ * Also PCI-ISA bridge DMA controller is never used.
  */
 static struct resource cobalt_reserved_resources[] = {
 	{	/* dma1 */
diff --git a/arch/mips/configs/ath25_defconfig b/arch/mips/configs/ath25_defconfig
new file mode 100644
index 0000000..2c82995
--- /dev/null
+++ b/arch/mips/configs/ath25_defconfig
@@ -0,0 +1,119 @@
+CONFIG_ATH25=y
+# CONFIG_COMPACTION is not set
+CONFIG_HZ_100=y
+# CONFIG_SECCOMP is not set
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+# CONFIG_FHANDLE is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_AIO is not set
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_SUSPEND is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_CFG80211=m
+CONFIG_MAC80211=m
+CONFIG_MAC80211_DEBUGFS=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_REDBOOT_PARTS=y
+CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-2
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_GEOMETRY=y
+# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_4 is not set
+# CONFIG_MTD_CFI_I2 is not set
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_NETDEVICES=y
+# CONFIG_ETHERNET is not set
+# CONFIG_WLAN_VENDOR_ADMTEK is not set
+CONFIG_ATH5K=m
+# CONFIG_WLAN_VENDOR_ATMEL is not set
+# CONFIG_WLAN_VENDOR_BROADCOM is not set
+# CONFIG_WLAN_VENDOR_CISCO is not set
+# CONFIG_WLAN_VENDOR_INTEL is not set
+# CONFIG_WLAN_VENDOR_INTERSIL is not set
+# CONFIG_WLAN_VENDOR_MARVELL is not set
+# CONFIG_WLAN_VENDOR_MEDIATEK is not set
+# CONFIG_WLAN_VENDOR_RALINK is not set
+# CONFIG_WLAN_VENDOR_REALTEK is not set
+# CONFIG_WLAN_VENDOR_RSI is not set
+# CONFIG_WLAN_VENDOR_ST is not set
+# CONFIG_WLAN_VENDOR_TI is not set
+# CONFIG_WLAN_VENDOR_ZYDAS is not set
+CONFIG_INPUT=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_PCI is not set
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_VGA_ARB is not set
+CONFIG_USB=m
+CONFIG_USB_EHCI_HCD=m
+CONFIG_LEDS_CLASS=y
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_PROC_PAGE_MONITOR is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+# CONFIG_JFFS2_FS_POSIX_ACL is not set
+# CONFIG_JFFS2_FS_SECURITY is not set
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+# CONFIG_JFFS2_ZLIB is not set
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_FILE_DIRECT=y
+CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y
+# CONFIG_SQUASHFS_ZLIB is not set
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_POWERPC is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index dcac308..d470d08 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -59,6 +59,8 @@
 CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_OCTEON=y
 CONFIG_PATA_OCTEON_CF=y
 CONFIG_SATA_SIL=y
 CONFIG_NETDEVICES=y
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index 9f67033..ee9f5f2 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -127,6 +127,10 @@
  */
 extern unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3;
 
+#ifdef CONFIG_USE_OF
+extern unsigned long fw_passed_dtb;
+#endif
+
 /*
  * Platform memory detection hook called by setup_arch
  */
diff --git a/arch/mips/include/asm/dsemul.h b/arch/mips/include/asm/dsemul.h
new file mode 100644
index 0000000..a6e0678
--- /dev/null
+++ b/arch/mips/include/asm/dsemul.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __MIPS_ASM_DSEMUL_H__
+#define __MIPS_ASM_DSEMUL_H__
+
+#include <asm/break.h>
+#include <asm/inst.h>
+
+/* Break instruction with special math emu break code set */
+#define BREAK_MATH(micromips)	(((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16))
+
+/* When used as a frame index, indicates the lack of a frame */
+#define BD_EMUFRAME_NONE	((int)BIT(31))
+
+struct mm_struct;
+struct pt_regs;
+struct task_struct;
+
+/**
+ * mips_dsemul() - 'Emulate' an instruction from a branch delay slot
+ * @regs:	User thread register context.
+ * @ir:		The instruction to be 'emulated'.
+ * @branch_pc:	The PC of the branch instruction.
+ * @cont_pc:	The PC to continue at following 'emulation'.
+ *
+ * Emulate or execute an arbitrary MIPS instruction within the context of
+ * the current user thread. This is used primarily to handle instructions
+ * in the delay slots of emulated branch instructions, for example FP
+ * branch instructions on systems without an FPU.
+ *
+ * Return: Zero on success, negative if ir is a NOP, signal number on failure.
+ */
+extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir,
+		       unsigned long branch_pc, unsigned long cont_pc);
+
+/**
+ * do_dsemulret() - Return from a delay slot 'emulation' frame
+ * @xcp:	User thread register context.
+ *
+ * Call in response to the BRK_MEMU break instruction used to return to
+ * the kernel from branch delay slot 'emulation' frames following a call
+ * to mips_dsemul(). Restores the user thread PC to the value that was
+ * passed as the cpc parameter to mips_dsemul().
+ *
+ * Return: True if an emulation frame was returned from, else false.
+ */
+extern bool do_dsemulret(struct pt_regs *xcp);
+
+/**
+ * dsemul_thread_cleanup() - Cleanup thread 'emulation' frame
+ * @tsk: The task structure associated with the thread
+ *
+ * If the thread @tsk has a branch delay slot 'emulation' frame
+ * allocated to it then free that frame.
+ *
+ * Return: True if a frame was freed, else false.
+ */
+extern bool dsemul_thread_cleanup(struct task_struct *tsk);
+
+/**
+ * dsemul_thread_rollback() - Rollback from an 'emulation' frame
+ * @regs:	User thread register context.
+ *
+ * If the current thread, whose register context is represented by @regs,
+ * is executing within a delay slot 'emulation' frame then exit that
+ * frame. The PC will be rolled back to the branch if the instruction
+ * that was being 'emulated' has not yet executed, or advanced to the
+ * continuation PC if it has.
+ *
+ * Return: True if a frame was exited, else false.
+ */
+extern bool dsemul_thread_rollback(struct pt_regs *regs);
+
+/**
+ * dsemul_mm_cleanup() - Cleanup per-mm delay slot 'emulation' state
+ * @mm:		The struct mm_struct to cleanup state for.
+ *
+ * Cleanup state for the given @mm, ensuring that any memory allocated
+ * for delay slot 'emulation' book-keeping is freed. This is to be called
+ * before @mm is freed in order to avoid memory leaks.
+ */
+extern void dsemul_mm_cleanup(struct mm_struct *mm);
+
+#endif /* __MIPS_ASM_DSEMUL_H__ */
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index f5f4571..2b3dc29 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -458,6 +458,7 @@
 #define ELF_ET_DYN_BASE		(TASK_SIZE / 3 * 2)
 #endif
 
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
 #define ARCH_DLINFO							\
 do {									\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,					\
@@ -498,4 +499,7 @@
 extern void mips_set_personality_nan(struct arch_elf_state *state);
 extern void mips_set_personality_fp(struct arch_elf_state *state);
 
+#define elf_read_implies_exec(ex, stk) mips_elf_read_implies_exec(&(ex), stk)
+extern int mips_elf_read_implies_exec(void *elf_ex, int exstack);
+
 #endif /* _ASM_ELF_H */
diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h
index 3225c3c..355dc25 100644
--- a/arch/mips/include/asm/fpu_emulator.h
+++ b/arch/mips/include/asm/fpu_emulator.h
@@ -24,7 +24,7 @@
 #define _ASM_FPU_EMULATOR_H
 
 #include <linux/sched.h>
-#include <asm/break.h>
+#include <asm/dsemul.h>
 #include <asm/thread_info.h>
 #include <asm/inst.h>
 #include <asm/local.h>
@@ -60,27 +60,16 @@
 #define MIPS_FPU_EMU_INC_STATS(M) do { } while (0)
 #endif /* CONFIG_DEBUG_FS */
 
-extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir,
-	unsigned long cpc);
-extern int do_dsemulret(struct pt_regs *xcp);
 extern int fpu_emulator_cop1Handler(struct pt_regs *xcp,
 				    struct mips_fpu_struct *ctx, int has_fpu,
 				    void *__user *fault_addr);
 int process_fpemu_return(int sig, void __user *fault_addr,
 			 unsigned long fcr31);
+int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
+		  unsigned long *contpc);
 int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
 		     unsigned long *contpc);
 
-/*
- * Instruction inserted following the badinst to further tag the sequence
- */
-#define BD_COOKIE 0x0000bd36	/* tne $0, $0 with baggage */
-
-/*
- * Break instruction with special math emu break code set
- */
-#define BREAK_MATH(micromips) (((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16))
-
 #define SIGNALLING_NAN 0x7ff800007ff80000LL
 
 static inline void fpu_emulator_init_fpu(void)
diff --git a/arch/mips/include/asm/mach-cavium-octeon/irq.h b/arch/mips/include/asm/mach-cavium-octeon/irq.h
index cceae32..64b86b9 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/irq.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/irq.h
@@ -42,8 +42,6 @@
 	OCTEON_IRQ_TIMER1,
 	OCTEON_IRQ_TIMER2,
 	OCTEON_IRQ_TIMER3,
-	OCTEON_IRQ_USB0,
-	OCTEON_IRQ_USB1,
 #ifndef CONFIG_PCI_MSI
 	OCTEON_IRQ_LAST = 127
 #endif
diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
index 374eefa..0cf5ac1 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
@@ -12,6 +12,14 @@
 
 #ifdef __BIG_ENDIAN
 
+static inline bool __should_swizzle_bits(volatile void *a)
+{
+	extern const bool octeon_should_swizzle_table[];
+
+	unsigned long did = ((unsigned long)a >> 40) & 0xff;
+	return octeon_should_swizzle_table[did];
+}
+
 # define __swizzle_addr_b(port)	(port)
 # define __swizzle_addr_w(port)	(port)
 # define __swizzle_addr_l(port)	(port)
@@ -19,6 +27,8 @@
 
 #else /* __LITTLE_ENDIAN */
 
+#define __should_swizzle_bits(a)	false
+
 static inline bool __should_swizzle_addr(unsigned long p)
 {
 	/* boot bus? */
@@ -35,40 +45,14 @@
 
 #endif /* __BIG_ENDIAN */
 
-/*
- * Sane hardware offers swapping of PCI/ISA I/O space accesses in hardware;
- * less sane hardware forces software to fiddle with this...
- *
- * Regardless, if the host bus endianness mismatches that of PCI/ISA, then
- * you can't have the numerical value of data and byte addresses within
- * multibyte quantities both preserved at the same time.  Hence two
- * variations of functions: non-prefixed ones that preserve the value
- * and prefixed ones that preserve byte addresses.  The latters are
- * typically used for moving raw data between a peripheral and memory (cf.
- * string I/O functions), hence the "__mem_" prefix.
- */
-#if defined(CONFIG_SWAP_IO_SPACE)
 
 # define ioswabb(a, x)		(x)
 # define __mem_ioswabb(a, x)	(x)
-# define ioswabw(a, x)		le16_to_cpu(x)
+# define ioswabw(a, x)		(__should_swizzle_bits(a) ? le16_to_cpu(x) : x)
 # define __mem_ioswabw(a, x)	(x)
-# define ioswabl(a, x)		le32_to_cpu(x)
+# define ioswabl(a, x)		(__should_swizzle_bits(a) ? le32_to_cpu(x) : x)
 # define __mem_ioswabl(a, x)	(x)
-# define ioswabq(a, x)		le64_to_cpu(x)
+# define ioswabq(a, x)		(__should_swizzle_bits(a) ? le64_to_cpu(x) : x)
 # define __mem_ioswabq(a, x)	(x)
 
-#else
-
-# define ioswabb(a, x)		(x)
-# define __mem_ioswabb(a, x)	(x)
-# define ioswabw(a, x)		(x)
-# define __mem_ioswabw(a, x)	cpu_to_le16(x)
-# define ioswabl(a, x)		(x)
-# define __mem_ioswabl(a, x)	cpu_to_le32(x)
-# define ioswabq(a, x)		(x)
-# define __mem_ioswabq(a, x)	cpu_to_le32(x)
-
-#endif
-
 #endif /* __ASM_MACH_GENERIC_MANGLE_PORT_H */
diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index 1afa1f9..f6ba08d 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -2,11 +2,20 @@
 #define __ASM_MMU_H
 
 #include <linux/atomic.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
 
 typedef struct {
 	unsigned long asid[NR_CPUS];
 	void *vdso;
 	atomic_t fp_mode_switching;
+
+	/* lock to be held whilst modifying fp_bd_emupage_allocmap */
+	spinlock_t bd_emupage_lock;
+	/* bitmap tracking allocation of fp_bd_emupage */
+	unsigned long *bd_emupage_allocmap;
+	/* wait queue for threads requiring an emuframe */
+	wait_queue_head_t bd_emupage_queue;
 } mm_context_t;
 
 #endif /* __ASM_MMU_H */
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index fc57e13..ddd57ad 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -16,6 +16,7 @@
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
+#include <asm/dsemul.h>
 #include <asm/hazards.h>
 #include <asm/tlbflush.h>
 #include <asm-generic/mm_hooks.h>
@@ -128,6 +129,10 @@
 
 	atomic_set(&mm->context.fp_mode_switching, 0);
 
+	mm->context.bd_emupage_allocmap = NULL;
+	spin_lock_init(&mm->context.bd_emupage_lock);
+	init_waitqueue_head(&mm->context.bd_emupage_queue);
+
 	return 0;
 }
 
@@ -162,6 +167,7 @@
  */
 static inline void destroy_context(struct mm_struct *mm)
 {
+	dsemul_mm_cleanup(mm);
 }
 
 #define deactivate_mm(tsk, mm)	do { } while (0)
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 21ed715..ea0cd97 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -162,16 +162,34 @@
 /*
  * __pa()/__va() should be used only during mem init.
  */
-#ifdef CONFIG_64BIT
-#define __pa(x)								\
-({									\
-    unsigned long __x = (unsigned long)(x);				\
-    __x < CKSEG0 ? XPHYSADDR(__x) : CPHYSADDR(__x);			\
-})
-#else
-#define __pa(x)								\
-    ((unsigned long)(x) - PAGE_OFFSET + PHYS_OFFSET)
-#endif
+static inline unsigned long ___pa(unsigned long x)
+{
+	if (config_enabled(CONFIG_64BIT)) {
+		/*
+		 * For MIPS64 the virtual address may either be in one of
+		 * the compatibility segements ckseg0 or ckseg1, or it may
+		 * be in xkphys.
+		 */
+		return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
+	}
+
+	if (!config_enabled(CONFIG_EVA)) {
+		/*
+		 * We're using the standard MIPS32 legacy memory map, ie.
+		 * the address x is going to be in kseg0 or kseg1. We can
+		 * handle either case by masking out the desired bits using
+		 * CPHYSADDR.
+		 */
+		return CPHYSADDR(x);
+	}
+
+	/*
+	 * EVA is in use so the memory map could be anything, making it not
+	 * safe to just mask out bits.
+	 */
+	return x - PAGE_OFFSET + PHYS_OFFSET;
+}
+#define __pa(x)		___pa((unsigned long)(x))
 #define __va(x)		((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET))
 #include <asm/io.h>
 
@@ -229,8 +247,10 @@
 #define virt_addr_valid(kaddr)						\
 	__virt_addr_valid((const volatile void *) (kaddr))
 
-#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS \
+	(VM_READ | VM_WRITE | \
+	 ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
+	 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #define UNCAC_ADDR(addr)	((addr) - PAGE_OFFSET + UNCAC_BASE)
 #define CAC_ADDR(addr)		((addr) - UNCAC_BASE + PAGE_OFFSET)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 7e78b62..0d36c87 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -11,12 +11,14 @@
 #ifndef _ASM_PROCESSOR_H
 #define _ASM_PROCESSOR_H
 
+#include <linux/atomic.h>
 #include <linux/cpumask.h>
 #include <linux/threads.h>
 
 #include <asm/cachectl.h>
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
+#include <asm/dsemul.h>
 #include <asm/mipsregs.h>
 #include <asm/prefetch.h>
 
@@ -78,7 +80,11 @@
 
 #endif
 
-#define STACK_TOP	(TASK_SIZE & PAGE_MASK)
+/*
+ * One page above the stack is used for branch delay slot "emulation".
+ * See dsemul.c for details.
+ */
+#define STACK_TOP	((TASK_SIZE & PAGE_MASK) - PAGE_SIZE)
 
 /*
  * This decides where the kernel will search for a free chunk of vm
@@ -256,6 +262,12 @@
 
 	/* Saved fpu/fpu emulator stuff. */
 	struct mips_fpu_struct fpu FPU_ALIGN;
+	/* Assigned branch delay slot 'emulation' frame */
+	atomic_t bd_emu_frame;
+	/* PC of the branch from a branch delay slot 'emulation' */
+	unsigned long bd_emu_branch_pc;
+	/* PC to continue from following a branch delay slot 'emulation' */
+	unsigned long bd_emu_cont_pc;
 #ifdef CONFIG_MIPS_MT_FPAFF
 	/* Emulated instruction count */
 	unsigned long emulated_fp;
@@ -323,6 +335,10 @@
 	 * FPU affinity state (null if not FPAFF)		\
 	 */							\
 	FPAFF_INIT						\
+	/* Delay slot emulation */				\
+	.bd_emu_frame = ATOMIC_INIT(BD_EMUFRAME_NONE),		\
+	.bd_emu_branch_pc = 0,					\
+	.bd_emu_cont_pc = 0,					\
 	/*							\
 	 * Saved DSP stuff					\
 	 */							\
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index 38902bf..667ca3c 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -210,7 +210,11 @@
 
 static inline void protected_writeback_scache_line(unsigned long addr)
 {
+#ifdef CONFIG_EVA
+	protected_cachee_op(Hit_Writeback_Inv_SD, addr);
+#else
 	protected_cache_op(Hit_Writeback_Inv_SD, addr);
+#endif
 }
 
 /*
diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h
index 2292373..77b3b95 100644
--- a/arch/mips/include/asm/signal.h
+++ b/arch/mips/include/asm/signal.h
@@ -11,7 +11,7 @@
 
 #include <uapi/asm/signal.h>
 
-#ifdef CONFIG_MIPS32_COMPAT
+#ifdef CONFIG_MIPS32_O32
 extern struct mips_abi mips_abi_32;
 
 #define sig_uses_siginfo(ka, abi)                               \
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 03722d4..8bc6c70 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -23,7 +23,7 @@
 extern int smp_num_siblings;
 extern cpumask_t cpu_sibling_map[];
 extern cpumask_t cpu_core_map[];
-extern cpumask_t cpu_foreign_map;
+extern cpumask_t cpu_foreign_map[];
 
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
@@ -53,6 +53,8 @@
 
 extern void asmlinkage smp_bootstrap(void);
 
+extern void calculate_cpu_foreign_map(void);
+
 /*
  * this function sends a 'reschedule' IPI to another CPU.
  * it goes straight through and wastes no time serializing
diff --git a/arch/mips/include/uapi/asm/auxvec.h b/arch/mips/include/uapi/asm/auxvec.h
index c9c7195..45ba259 100644
--- a/arch/mips/include/uapi/asm/auxvec.h
+++ b/arch/mips/include/uapi/asm/auxvec.h
@@ -14,4 +14,6 @@
 /* Location of VDSO image. */
 #define AT_SYSINFO_EHDR		33
 
+#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
+
 #endif /* __ASM_AUXVEC_H */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index e6053d0..4a603a3 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -71,7 +71,7 @@
 obj-$(CONFIG_64BIT)		+= scall64-64.o
 obj-$(CONFIG_MIPS32_COMPAT)	+= linux32.o ptrace32.o signal32.o
 obj-$(CONFIG_MIPS32_N32)	+= binfmt_elfn32.o scall64-n32.o signal_n32.o
-obj-$(CONFIG_MIPS32_O32)	+= binfmt_elfo32.o scall64-o32.o
+obj-$(CONFIG_MIPS32_O32)	+= binfmt_elfo32.o scall64-o32.o signal_o32.o
 
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_PROC_FS)		+= proc.o
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 891f5ee..824b037 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -8,9 +8,12 @@
  * option) any later version.
  */
 
+#include <linux/binfmts.h>
 #include <linux/elf.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 
+#include <asm/cpu-features.h>
 #include <asm/cpu-info.h>
 
 /* Whether to accept legacy-NaN and 2008-NaN user binaries.  */
@@ -326,3 +329,19 @@
 		BUG();
 	}
 }
+
+int mips_elf_read_implies_exec(void *elf_ex, int exstack)
+{
+	if (exstack != EXSTACK_DISABLE_X) {
+		/* The binary doesn't request a non-executable stack */
+		return 1;
+	}
+
+	if (!cpu_has_rixi) {
+		/* The CPU doesn't support non-executable memory */
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mips_elf_read_implies_exec);
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 56e8fed..cf05220 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -93,21 +93,24 @@
 	jr	t0
 0:
 
+#ifdef CONFIG_USE_OF
 #ifdef CONFIG_MIPS_RAW_APPENDED_DTB
-	PTR_LA		t0, __appended_dtb
+	PTR_LA		t2, __appended_dtb
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	li		t1, 0xd00dfeed
 #else
 	li		t1, 0xedfe0dd0
 #endif
-	lw		t2, (t0)
-	bne		t1, t2, not_found
-	 nop
+	lw		t0, (t2)
+	beq		t0, t1, dtb_found
+#endif
+	li		t1, -2
+	beq		a0, t1, dtb_found
+	move		t2, a1
 
-	move		a1, t0
-	PTR_LI		a0, -2
-not_found:
+	li		t2, 0
+dtb_found:
 #endif
 	PTR_LA		t0, __bss_start		# clear .bss
 	LONG_S		zero, (t0)
@@ -122,6 +125,10 @@
 	LONG_S		a2, fw_arg2
 	LONG_S		a3, fw_arg3
 
+#ifdef CONFIG_USE_OF
+	LONG_S		t2, fw_passed_dtb
+#endif
+
 	MTC0		zero, CP0_CONTEXT	# clear context register
 	PTR_LA		$28, init_thread_union
 	/* Set the SP after an empty pt_regs.  */
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 7ff2a55..ef23c61 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -283,7 +283,7 @@
 		err = mipsr6_emul(regs, nir);
 		if (err > 0) {
 			regs->cp0_epc = nepc;
-			err = mips_dsemul(regs, nir, cepc);
+			err = mips_dsemul(regs, nir, epc, cepc);
 			if (err == SIGILL)
 				err = SIGEMT;
 			MIPS_R2_STATS(dsemul);
@@ -1033,7 +1033,7 @@
 			if (nir) {
 				err = mipsr6_emul(regs, nir);
 				if (err > 0) {
-					err = mips_dsemul(regs, nir, cpc);
+					err = mips_dsemul(regs, nir, epc, cpc);
 					if (err == SIGILL)
 						err = SIGEMT;
 					MIPS_R2_STATS(dsemul);
@@ -1082,7 +1082,7 @@
 			if (nir) {
 				err = mipsr6_emul(regs, nir);
 				if (err > 0) {
-					err = mips_dsemul(regs, nir, cpc);
+					err = mips_dsemul(regs, nir, epc, cpc);
 					if (err == SIGILL)
 						err = SIGEMT;
 					MIPS_R2_STATS(dsemul);
@@ -1149,7 +1149,7 @@
 		if (nir) {
 			err = mipsr6_emul(regs, nir);
 			if (err > 0) {
-				err = mips_dsemul(regs, nir, cpc);
+				err = mips_dsemul(regs, nir, epc, cpc);
 				if (err == SIGILL)
 					err = SIGEMT;
 				MIPS_R2_STATS(dsemul);
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 813ed78..7429ad0 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -30,6 +30,7 @@
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
+#include <asm/dsemul.h>
 #include <asm/dsp.h>
 #include <asm/fpu.h>
 #include <asm/msa.h>
@@ -68,11 +69,22 @@
 	lose_fpu(0);
 	clear_thread_flag(TIF_MSA_CTX_LIVE);
 	clear_used_math();
+	atomic_set(&current->thread.bd_emu_frame, BD_EMUFRAME_NONE);
 	init_dsp();
 	regs->cp0_epc = pc;
 	regs->regs[29] = sp;
 }
 
+void exit_thread(struct task_struct *tsk)
+{
+	/*
+	 * User threads may have allocated a delay slot emulation frame.
+	 * If so, clean up that allocation.
+	 */
+	if (!(current->flags & PF_KTHREAD))
+		dsemul_thread_cleanup(tsk);
+}
+
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	/*
@@ -159,6 +171,8 @@
 	clear_tsk_thread_flag(p, TIF_FPUBOUND);
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
+	atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
+
 	if (clone_flags & CLONE_SETTLS)
 		ti->tp_value = regs->regs[7];
 
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 9c0b387..51d3988 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -348,7 +348,7 @@
 	PTR	sys_ni_syscall			/* available, was setaltroot */
 	PTR	sys_add_key
 	PTR	sys_request_key
-	PTR	sys_keyctl			/* 6245 */
+	PTR	compat_sys_keyctl		/* 6245 */
 	PTR	sys_set_thread_area
 	PTR	sys_inotify_init
 	PTR	sys_inotify_add_watch
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f4f28b1..6efa713 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -504,7 +504,7 @@
 	PTR	sys_ni_syscall			/* available, was setaltroot */
 	PTR	sys_add_key			/* 4280 */
 	PTR	sys_request_key
-	PTR	sys_keyctl
+	PTR	compat_sys_keyctl
 	PTR	sys_set_thread_area
 	PTR	sys_inotify_init
 	PTR	sys_inotify_add_watch		/* 4285 */
diff --git a/arch/mips/kernel/segment.c b/arch/mips/kernel/segment.c
index 87bc74a..2703f21 100644
--- a/arch/mips/kernel/segment.c
+++ b/arch/mips/kernel/segment.c
@@ -26,17 +26,20 @@
 
 	/*
 	 * Access modes MK, MSK and MUSK are mapped segments. Therefore
-	 * there is no direct physical address mapping.
+	 * there is no direct physical address mapping unless it becomes
+	 * unmapped uncached at error level due to EU.
 	 */
-	if ((am == 0) || (am > 3)) {
+	if ((am == 0) || (am > 3) || (cfg & MIPS_SEGCFG_EU))
 		str += sprintf(str, "         %03lx",
 			((cfg & MIPS_SEGCFG_PA) >> MIPS_SEGCFG_PA_SHIFT));
+	else
+		str += sprintf(str, "         UND");
+
+	if ((am == 0) || (am > 3))
 		str += sprintf(str, "         %01ld",
 			((cfg & MIPS_SEGCFG_C) >> MIPS_SEGCFG_C_SHIFT));
-	} else {
-		str += sprintf(str, "         UND");
+	else
 		str += sprintf(str, "         U");
-	}
 
 	/* Exception configuration. */
 	str += sprintf(str, "       %01ld\n",
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index ef408a0..36cf8d6 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -875,6 +875,10 @@
 unsigned long kernelsp[NR_CPUS];
 unsigned long fw_arg0, fw_arg1, fw_arg2, fw_arg3;
 
+#ifdef CONFIG_USE_OF
+unsigned long fw_passed_dtb;
+#endif
+
 #ifdef CONFIG_DEBUG_FS
 struct dentry *mips_debugfs_dir;
 static int __init debugfs_mips(void)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index ae42314..9383635 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -772,6 +772,14 @@
 	struct mips_abi *abi = current->thread.abi;
 	void *vdso = current->mm->context.vdso;
 
+	/*
+	 * If we were emulating a delay slot instruction, exit that frame such
+	 * that addresses in the sigframe are as expected for userland and we
+	 * don't have a problem if we reuse the thread's frame for an
+	 * instruction within the signal handler.
+	 */
+	dsemul_thread_rollback(regs);
+
 	if (regs->regs[0]) {
 		switch(regs->regs[2]) {
 		case ERESTART_RESTARTBLOCK:
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 78c8349..97b7c51 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -6,129 +6,26 @@
  * Copyright (C) 1991, 1992  Linus Torvalds
  * Copyright (C) 1994 - 2000, 2006  Ralf Baechle
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2016, Imagination Technologies Ltd.
  */
-#include <linux/cache.h>
-#include <linux/compat.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/syscalls.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/suspend.h>
-#include <linux/compiler.h>
-#include <linux/uaccess.h>
 
-#include <asm/abi.h>
-#include <asm/asm.h>
+#include <asm/compat.h>
 #include <asm/compat-signal.h>
-#include <linux/bitops.h>
-#include <asm/cacheflush.h>
-#include <asm/sim.h>
-#include <asm/ucontext.h>
-#include <asm/fpu.h>
-#include <asm/war.h>
-#include <asm/dsp.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
 
 #include "signal-common.h"
 
-/*
- * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
- */
-#define __NR_O32_restart_syscall	4253
-
 /* 32-bit compatibility types */
 
 typedef unsigned int __sighandler32_t;
 typedef void (*vfptr_t)(void);
 
-struct ucontext32 {
-	u32		    uc_flags;
-	s32		    uc_link;
-	compat_stack_t      uc_stack;
-	struct sigcontext32 uc_mcontext;
-	compat_sigset_t	    uc_sigmask;	  /* mask last for extensibility */
-};
-
-struct sigframe32 {
-	u32 sf_ass[4];		/* argument save space for o32 */
-	u32 sf_pad[2];		/* Was: signal trampoline */
-	struct sigcontext32 sf_sc;
-	compat_sigset_t sf_mask;
-};
-
-struct rt_sigframe32 {
-	u32 rs_ass[4];			/* argument save space for o32 */
-	u32 rs_pad[2];			/* Was: signal trampoline */
-	compat_siginfo_t rs_info;
-	struct ucontext32 rs_uc;
-};
-
-static int setup_sigcontext32(struct pt_regs *regs,
-			      struct sigcontext32 __user *sc)
-{
-	int err = 0;
-	int i;
-
-	err |= __put_user(regs->cp0_epc, &sc->sc_pc);
-
-	err |= __put_user(0, &sc->sc_regs[0]);
-	for (i = 1; i < 32; i++)
-		err |= __put_user(regs->regs[i], &sc->sc_regs[i]);
-
-	err |= __put_user(regs->hi, &sc->sc_mdhi);
-	err |= __put_user(regs->lo, &sc->sc_mdlo);
-	if (cpu_has_dsp) {
-		err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
-		err |= __put_user(mfhi1(), &sc->sc_hi1);
-		err |= __put_user(mflo1(), &sc->sc_lo1);
-		err |= __put_user(mfhi2(), &sc->sc_hi2);
-		err |= __put_user(mflo2(), &sc->sc_lo2);
-		err |= __put_user(mfhi3(), &sc->sc_hi3);
-		err |= __put_user(mflo3(), &sc->sc_lo3);
-	}
-
-	/*
-	 * Save FPU state to signal context.  Signal handler
-	 * will "inherit" current FPU state.
-	 */
-	err |= protected_save_fp_context(sc);
-
-	return err;
-}
-
-static int restore_sigcontext32(struct pt_regs *regs,
-				struct sigcontext32 __user *sc)
-{
-	int err = 0;
-	s32 treg;
-	int i;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current->restart_block.fn = do_no_restart_syscall;
-
-	err |= __get_user(regs->cp0_epc, &sc->sc_pc);
-	err |= __get_user(regs->hi, &sc->sc_mdhi);
-	err |= __get_user(regs->lo, &sc->sc_mdlo);
-	if (cpu_has_dsp) {
-		err |= __get_user(treg, &sc->sc_hi1); mthi1(treg);
-		err |= __get_user(treg, &sc->sc_lo1); mtlo1(treg);
-		err |= __get_user(treg, &sc->sc_hi2); mthi2(treg);
-		err |= __get_user(treg, &sc->sc_lo2); mtlo2(treg);
-		err |= __get_user(treg, &sc->sc_hi3); mthi3(treg);
-		err |= __get_user(treg, &sc->sc_lo3); mtlo3(treg);
-		err |= __get_user(treg, &sc->sc_dsp); wrdsp(treg, DSP_MASK);
-	}
-
-	for (i = 1; i < 32; i++)
-		err |= __get_user(regs->regs[i], &sc->sc_regs[i]);
-
-	return err ?: protected_restore_fp_context(sc);
-}
-
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
@@ -247,176 +144,3 @@
 
 	return 0;
 }
-
-asmlinkage void sys32_sigreturn(nabi_no_regargs struct pt_regs regs)
-{
-	struct sigframe32 __user *frame;
-	sigset_t blocked;
-	int sig;
-
-	frame = (struct sigframe32 __user *) regs.regs[29];
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_conv_sigset_from_user(&blocked, &frame->sf_mask))
-		goto badframe;
-
-	set_current_blocked(&blocked);
-
-	sig = restore_sigcontext32(&regs, &frame->sf_sc);
-	if (sig < 0)
-		goto badframe;
-	else if (sig)
-		force_sig(sig, current);
-
-	/*
-	 * Don't let your children do this ...
-	 */
-	__asm__ __volatile__(
-		"move\t$29, %0\n\t"
-		"j\tsyscall_exit"
-		:/* no outputs */
-		:"r" (&regs));
-	/* Unreached */
-
-badframe:
-	force_sig(SIGSEGV, current);
-}
-
-asmlinkage void sys32_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
-{
-	struct rt_sigframe32 __user *frame;
-	sigset_t set;
-	int sig;
-
-	frame = (struct rt_sigframe32 __user *) regs.regs[29];
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_conv_sigset_from_user(&set, &frame->rs_uc.uc_sigmask))
-		goto badframe;
-
-	set_current_blocked(&set);
-
-	sig = restore_sigcontext32(&regs, &frame->rs_uc.uc_mcontext);
-	if (sig < 0)
-		goto badframe;
-	else if (sig)
-		force_sig(sig, current);
-
-	if (compat_restore_altstack(&frame->rs_uc.uc_stack))
-		goto badframe;
-
-	/*
-	 * Don't let your children do this ...
-	 */
-	__asm__ __volatile__(
-		"move\t$29, %0\n\t"
-		"j\tsyscall_exit"
-		:/* no outputs */
-		:"r" (&regs));
-	/* Unreached */
-
-badframe:
-	force_sig(SIGSEGV, current);
-}
-
-static int setup_frame_32(void *sig_return, struct ksignal *ksig,
-			  struct pt_regs *regs, sigset_t *set)
-{
-	struct sigframe32 __user *frame;
-	int err = 0;
-
-	frame = get_sigframe(ksig, regs, sizeof(*frame));
-	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
-		return -EFAULT;
-
-	err |= setup_sigcontext32(regs, &frame->sf_sc);
-	err |= __copy_conv_sigset_to_user(&frame->sf_mask, set);
-
-	if (err)
-		return -EFAULT;
-
-	/*
-	 * Arguments to signal handler:
-	 *
-	 *   a0 = signal number
-	 *   a1 = 0 (should be cause)
-	 *   a2 = pointer to struct sigcontext
-	 *
-	 * $25 and c0_epc point to the signal handler, $29 points to the
-	 * struct sigframe.
-	 */
-	regs->regs[ 4] = ksig->sig;
-	regs->regs[ 5] = 0;
-	regs->regs[ 6] = (unsigned long) &frame->sf_sc;
-	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) sig_return;
-	regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler;
-
-	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
-	       current->comm, current->pid,
-	       frame, regs->cp0_epc, regs->regs[31]);
-
-	return 0;
-}
-
-static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig,
-			     struct pt_regs *regs, sigset_t *set)
-{
-	struct rt_sigframe32 __user *frame;
-	int err = 0;
-
-	frame = get_sigframe(ksig, regs, sizeof(*frame));
-	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
-		return -EFAULT;
-
-	/* Convert (siginfo_t -> compat_siginfo_t) and copy to user. */
-	err |= copy_siginfo_to_user32(&frame->rs_info, &ksig->info);
-
-	/* Create the ucontext.	 */
-	err |= __put_user(0, &frame->rs_uc.uc_flags);
-	err |= __put_user(0, &frame->rs_uc.uc_link);
-	err |= __compat_save_altstack(&frame->rs_uc.uc_stack, regs->regs[29]);
-	err |= setup_sigcontext32(regs, &frame->rs_uc.uc_mcontext);
-	err |= __copy_conv_sigset_to_user(&frame->rs_uc.uc_sigmask, set);
-
-	if (err)
-		return -EFAULT;
-
-	/*
-	 * Arguments to signal handler:
-	 *
-	 *   a0 = signal number
-	 *   a1 = 0 (should be cause)
-	 *   a2 = pointer to ucontext
-	 *
-	 * $25 and c0_epc point to the signal handler, $29 points to
-	 * the struct rt_sigframe32.
-	 */
-	regs->regs[ 4] = ksig->sig;
-	regs->regs[ 5] = (unsigned long) &frame->rs_info;
-	regs->regs[ 6] = (unsigned long) &frame->rs_uc;
-	regs->regs[29] = (unsigned long) frame;
-	regs->regs[31] = (unsigned long) sig_return;
-	regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler;
-
-	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
-	       current->comm, current->pid,
-	       frame, regs->cp0_epc, regs->regs[31]);
-
-	return 0;
-}
-
-/*
- * o32 compatibility on 64-bit kernels, without DSP ASE
- */
-struct mips_abi mips_abi_32 = {
-	.setup_frame	= setup_frame_32,
-	.setup_rt_frame = setup_rt_frame_32,
-	.restart	= __NR_O32_restart_syscall,
-
-	.off_sc_fpregs = offsetof(struct sigcontext32, sc_fpregs),
-	.off_sc_fpc_csr = offsetof(struct sigcontext32, sc_fpc_csr),
-	.off_sc_used_math = offsetof(struct sigcontext32, sc_used_math),
-
-	.vdso		= &vdso_image_o32,
-};
diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c
new file mode 100644
index 0000000..5e169fc
--- /dev/null
+++ b/arch/mips/kernel/signal_o32.c
@@ -0,0 +1,285 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1991, 1992  Linus Torvalds
+ * Copyright (C) 1994 - 2000, 2006  Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2016, Imagination Technologies Ltd.
+ */
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/uaccess.h>
+
+#include <asm/abi.h>
+#include <asm/compat-signal.h>
+#include <asm/dsp.h>
+#include <asm/sim.h>
+#include <asm/unistd.h>
+
+#include "signal-common.h"
+
+/*
+ * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
+ */
+#define __NR_O32_restart_syscall	4253
+
+struct sigframe32 {
+	u32 sf_ass[4];		/* argument save space for o32 */
+	u32 sf_pad[2];		/* Was: signal trampoline */
+	struct sigcontext32 sf_sc;
+	compat_sigset_t sf_mask;
+};
+
+struct ucontext32 {
+	u32		    uc_flags;
+	s32		    uc_link;
+	compat_stack_t      uc_stack;
+	struct sigcontext32 uc_mcontext;
+	compat_sigset_t	    uc_sigmask;	  /* mask last for extensibility */
+};
+
+struct rt_sigframe32 {
+	u32 rs_ass[4];			/* argument save space for o32 */
+	u32 rs_pad[2];			/* Was: signal trampoline */
+	compat_siginfo_t rs_info;
+	struct ucontext32 rs_uc;
+};
+
+static int setup_sigcontext32(struct pt_regs *regs,
+			      struct sigcontext32 __user *sc)
+{
+	int err = 0;
+	int i;
+
+	err |= __put_user(regs->cp0_epc, &sc->sc_pc);
+
+	err |= __put_user(0, &sc->sc_regs[0]);
+	for (i = 1; i < 32; i++)
+		err |= __put_user(regs->regs[i], &sc->sc_regs[i]);
+
+	err |= __put_user(regs->hi, &sc->sc_mdhi);
+	err |= __put_user(regs->lo, &sc->sc_mdlo);
+	if (cpu_has_dsp) {
+		err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
+		err |= __put_user(mfhi1(), &sc->sc_hi1);
+		err |= __put_user(mflo1(), &sc->sc_lo1);
+		err |= __put_user(mfhi2(), &sc->sc_hi2);
+		err |= __put_user(mflo2(), &sc->sc_lo2);
+		err |= __put_user(mfhi3(), &sc->sc_hi3);
+		err |= __put_user(mflo3(), &sc->sc_lo3);
+	}
+
+	/*
+	 * Save FPU state to signal context.  Signal handler
+	 * will "inherit" current FPU state.
+	 */
+	err |= protected_save_fp_context(sc);
+
+	return err;
+}
+
+static int restore_sigcontext32(struct pt_regs *regs,
+				struct sigcontext32 __user *sc)
+{
+	int err = 0;
+	s32 treg;
+	int i;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	err |= __get_user(regs->cp0_epc, &sc->sc_pc);
+	err |= __get_user(regs->hi, &sc->sc_mdhi);
+	err |= __get_user(regs->lo, &sc->sc_mdlo);
+	if (cpu_has_dsp) {
+		err |= __get_user(treg, &sc->sc_hi1); mthi1(treg);
+		err |= __get_user(treg, &sc->sc_lo1); mtlo1(treg);
+		err |= __get_user(treg, &sc->sc_hi2); mthi2(treg);
+		err |= __get_user(treg, &sc->sc_lo2); mtlo2(treg);
+		err |= __get_user(treg, &sc->sc_hi3); mthi3(treg);
+		err |= __get_user(treg, &sc->sc_lo3); mtlo3(treg);
+		err |= __get_user(treg, &sc->sc_dsp); wrdsp(treg, DSP_MASK);
+	}
+
+	for (i = 1; i < 32; i++)
+		err |= __get_user(regs->regs[i], &sc->sc_regs[i]);
+
+	return err ?: protected_restore_fp_context(sc);
+}
+
+static int setup_frame_32(void *sig_return, struct ksignal *ksig,
+			  struct pt_regs *regs, sigset_t *set)
+{
+	struct sigframe32 __user *frame;
+	int err = 0;
+
+	frame = get_sigframe(ksig, regs, sizeof(*frame));
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+		return -EFAULT;
+
+	err |= setup_sigcontext32(regs, &frame->sf_sc);
+	err |= __copy_conv_sigset_to_user(&frame->sf_mask, set);
+
+	if (err)
+		return -EFAULT;
+
+	/*
+	 * Arguments to signal handler:
+	 *
+	 *   a0 = signal number
+	 *   a1 = 0 (should be cause)
+	 *   a2 = pointer to struct sigcontext
+	 *
+	 * $25 and c0_epc point to the signal handler, $29 points to the
+	 * struct sigframe.
+	 */
+	regs->regs[ 4] = ksig->sig;
+	regs->regs[ 5] = 0;
+	regs->regs[ 6] = (unsigned long) &frame->sf_sc;
+	regs->regs[29] = (unsigned long) frame;
+	regs->regs[31] = (unsigned long) sig_return;
+	regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler;
+
+	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
+	       current->comm, current->pid,
+	       frame, regs->cp0_epc, regs->regs[31]);
+
+	return 0;
+}
+
+asmlinkage void sys32_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
+{
+	struct rt_sigframe32 __user *frame;
+	sigset_t set;
+	int sig;
+
+	frame = (struct rt_sigframe32 __user *) regs.regs[29];
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_conv_sigset_from_user(&set, &frame->rs_uc.uc_sigmask))
+		goto badframe;
+
+	set_current_blocked(&set);
+
+	sig = restore_sigcontext32(&regs, &frame->rs_uc.uc_mcontext);
+	if (sig < 0)
+		goto badframe;
+	else if (sig)
+		force_sig(sig, current);
+
+	if (compat_restore_altstack(&frame->rs_uc.uc_stack))
+		goto badframe;
+
+	/*
+	 * Don't let your children do this ...
+	 */
+	__asm__ __volatile__(
+		"move\t$29, %0\n\t"
+		"j\tsyscall_exit"
+		:/* no outputs */
+		:"r" (&regs));
+	/* Unreached */
+
+badframe:
+	force_sig(SIGSEGV, current);
+}
+
+static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig,
+			     struct pt_regs *regs, sigset_t *set)
+{
+	struct rt_sigframe32 __user *frame;
+	int err = 0;
+
+	frame = get_sigframe(ksig, regs, sizeof(*frame));
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+		return -EFAULT;
+
+	/* Convert (siginfo_t -> compat_siginfo_t) and copy to user. */
+	err |= copy_siginfo_to_user32(&frame->rs_info, &ksig->info);
+
+	/* Create the ucontext.	 */
+	err |= __put_user(0, &frame->rs_uc.uc_flags);
+	err |= __put_user(0, &frame->rs_uc.uc_link);
+	err |= __compat_save_altstack(&frame->rs_uc.uc_stack, regs->regs[29]);
+	err |= setup_sigcontext32(regs, &frame->rs_uc.uc_mcontext);
+	err |= __copy_conv_sigset_to_user(&frame->rs_uc.uc_sigmask, set);
+
+	if (err)
+		return -EFAULT;
+
+	/*
+	 * Arguments to signal handler:
+	 *
+	 *   a0 = signal number
+	 *   a1 = 0 (should be cause)
+	 *   a2 = pointer to ucontext
+	 *
+	 * $25 and c0_epc point to the signal handler, $29 points to
+	 * the struct rt_sigframe32.
+	 */
+	regs->regs[ 4] = ksig->sig;
+	regs->regs[ 5] = (unsigned long) &frame->rs_info;
+	regs->regs[ 6] = (unsigned long) &frame->rs_uc;
+	regs->regs[29] = (unsigned long) frame;
+	regs->regs[31] = (unsigned long) sig_return;
+	regs->cp0_epc = regs->regs[25] = (unsigned long) ksig->ka.sa.sa_handler;
+
+	DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n",
+	       current->comm, current->pid,
+	       frame, regs->cp0_epc, regs->regs[31]);
+
+	return 0;
+}
+
+/*
+ * o32 compatibility on 64-bit kernels, without DSP ASE
+ */
+struct mips_abi mips_abi_32 = {
+	.setup_frame	= setup_frame_32,
+	.setup_rt_frame = setup_rt_frame_32,
+	.restart	= __NR_O32_restart_syscall,
+
+	.off_sc_fpregs = offsetof(struct sigcontext32, sc_fpregs),
+	.off_sc_fpc_csr = offsetof(struct sigcontext32, sc_fpc_csr),
+	.off_sc_used_math = offsetof(struct sigcontext32, sc_used_math),
+
+	.vdso		= &vdso_image_o32,
+};
+
+
+asmlinkage void sys32_sigreturn(nabi_no_regargs struct pt_regs regs)
+{
+	struct sigframe32 __user *frame;
+	sigset_t blocked;
+	int sig;
+
+	frame = (struct sigframe32 __user *) regs.regs[29];
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_conv_sigset_from_user(&blocked, &frame->sf_mask))
+		goto badframe;
+
+	set_current_blocked(&blocked);
+
+	sig = restore_sigcontext32(&regs, &frame->sf_sc);
+	if (sig < 0)
+		goto badframe;
+	else if (sig)
+		force_sig(sig, current);
+
+	/*
+	 * Don't let your children do this ...
+	 */
+	__asm__ __volatile__(
+		"move\t$29, %0\n\t"
+		"j\tsyscall_exit"
+		:/* no outputs */
+		:"r" (&regs));
+	/* Unreached */
+
+badframe:
+	force_sig(SIGSEGV, current);
+}
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index e02addc..6d0f132 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -363,6 +363,7 @@
 	pr_info("SMP: CPU%d is offline\n", cpu);
 
 	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	clear_c0_status(IE_IRQ5);
 
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 4ed36f2..39ba5b1 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -206,7 +206,7 @@
 	}
 }
 
-static void boot_core(unsigned core)
+static void boot_core(unsigned int core, unsigned int vpe_id)
 {
 	u32 access, stat, seq_state;
 	unsigned timeout;
@@ -233,8 +233,9 @@
 		mips_cpc_lock_other(core);
 
 		if (mips_cm_revision() >= CM_REV_CM3) {
-			/* Run VP0 following the reset */
-			write_cpc_co_vp_run(0x1);
+			/* Run only the requested VP following the reset */
+			write_cpc_co_vp_stop(0xf);
+			write_cpc_co_vp_run(1 << vpe_id);
 
 			/*
 			 * Ensure that the VP_RUN register is written before the
@@ -306,7 +307,7 @@
 
 	if (!test_bit(core, core_power)) {
 		/* Boot a VPE on a powered down core */
-		boot_core(core);
+		boot_core(core, vpe_id);
 		goto out;
 	}
 
@@ -397,6 +398,7 @@
 	atomic_sub(1 << cpu_vpe_id(&current_cpu_data), &core_cfg->vpe_mask);
 	smp_mb__after_atomic();
 	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 
 	return 0;
@@ -411,14 +413,16 @@
 
 void play_dead(void)
 {
-	unsigned cpu, core;
+	unsigned int cpu, core, vpe_id;
 
 	local_irq_disable();
 	idle_task_exit();
 	cpu = smp_processor_id();
 	cpu_death = CPU_DEATH_POWER;
 
-	if (cpu_has_mipsmt) {
+	pr_debug("CPU%d going offline\n", cpu);
+
+	if (cpu_has_mipsmt || cpu_has_vp) {
 		core = cpu_data[cpu].core;
 
 		/* Look for another online VPE within the core */
@@ -439,10 +443,21 @@
 	complete(&cpu_death_chosen);
 
 	if (cpu_death == CPU_DEATH_HALT) {
-		/* Halt this TC */
-		write_c0_tchalt(TCHALT_H);
-		instruction_hazard();
+		vpe_id = cpu_vpe_id(&cpu_data[cpu]);
+
+		pr_debug("Halting core %d VP%d\n", core, vpe_id);
+		if (cpu_has_mipsmt) {
+			/* Halt this TC */
+			write_c0_tchalt(TCHALT_H);
+			instruction_hazard();
+		} else if (cpu_has_vp) {
+			write_cpc_cl_vp_stop(1 << vpe_id);
+
+			/* Ensure that the VP_STOP register is written */
+			wmb();
+		}
 	} else {
+		pr_debug("Gating power to core %d\n", core);
 		/* Power down the core */
 		cps_pm_enter_state(CPS_PM_POWER_GATED);
 	}
@@ -469,6 +484,7 @@
 static void cps_cpu_die(unsigned int cpu)
 {
 	unsigned core = cpu_data[cpu].core;
+	unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]);
 	unsigned stat;
 	int err;
 
@@ -497,10 +513,12 @@
 		 * in which case the CPC will refuse to power down the core.
 		 */
 		do {
+			mips_cm_lock_other(core, vpe_id);
 			mips_cpc_lock_other(core);
 			stat = read_cpc_co_stat_conf();
 			stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK;
 			mips_cpc_unlock_other();
+			mips_cm_unlock_other();
 		} while (stat != CPC_Cx_STAT_CONF_SEQSTATE_D0 &&
 			 stat != CPC_Cx_STAT_CONF_SEQSTATE_D2 &&
 			 stat != CPC_Cx_STAT_CONF_SEQSTATE_U2);
@@ -517,6 +535,12 @@
 					       (void *)(unsigned long)cpu, 1);
 		if (err)
 			panic("Failed to call remote sibling CPU\n");
+	} else if (cpu_has_vp) {
+		do {
+			mips_cm_lock_other(core, vpe_id);
+			stat = read_cpc_co_vp_running();
+			mips_cm_unlock_other();
+		} while (stat & (1 << vpe_id));
 	}
 }
 
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index f9d01e9..f95f094 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -72,7 +72,7 @@
  * A logcal cpu mask containing only one VPE per core to
  * reduce the number of IPIs on large MT systems.
  */
-cpumask_t cpu_foreign_map __read_mostly;
+cpumask_t cpu_foreign_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_foreign_map);
 
 /* representing cpus for which sibling maps can be computed */
@@ -124,7 +124,7 @@
  * Calculate a new cpu_foreign_map mask whenever a
  * new cpu appears or disappears.
  */
-static inline void calculate_cpu_foreign_map(void)
+void calculate_cpu_foreign_map(void)
 {
 	int i, k, core_present;
 	cpumask_t temp_foreign_map;
@@ -141,7 +141,9 @@
 			cpumask_set_cpu(i, &temp_foreign_map);
 	}
 
-	cpumask_copy(&cpu_foreign_map, &temp_foreign_map);
+	for_each_online_cpu(i)
+		cpumask_andnot(&cpu_foreign_map[i],
+			       &temp_foreign_map, &cpu_sibling_map[i]);
 }
 
 struct plat_smp_ops *mp_ops;
@@ -344,16 +346,9 @@
 static void stop_this_cpu(void *dummy)
 {
 	/*
-	 * Remove this CPU. Be a bit slow here and
-	 * set the bits for every online CPU so we don't miss
-	 * any IPI whilst taking this VPE down.
+	 * Remove this CPU:
 	 */
 
-	cpumask_copy(&cpu_foreign_map, cpu_online_mask);
-
-	/* Make it visible to every other CPU */
-	smp_mb();
-
 	set_cpu_online(smp_processor_id(), false);
 	calculate_cpu_foreign_map();
 	local_irq_disable();
@@ -512,10 +507,17 @@
 		smp_on_other_tlbs(flush_tlb_range_ipi, &fd);
 	} else {
 		unsigned int cpu;
+		int exec = vma->vm_flags & VM_EXEC;
 
 		for_each_online_cpu(cpu) {
+			/*
+			 * flush_cache_range() will only fully flush icache if
+			 * the VMA is executable, otherwise we must invalidate
+			 * ASID without it appearing to has_valid_asid() as if
+			 * mm has been completely unused by that CPU.
+			 */
 			if (cpu != smp_processor_id() && cpu_context(cpu, mm))
-				cpu_context(cpu, mm) = 0;
+				cpu_context(cpu, mm) = !exec;
 		}
 	}
 	local_flush_tlb_range(vma, start, end);
@@ -560,8 +562,14 @@
 		unsigned int cpu;
 
 		for_each_online_cpu(cpu) {
+			/*
+			 * flush_cache_page() only does partial flushes, so
+			 * invalidate ASID without it appearing to
+			 * has_valid_asid() as if mm has been completely unused
+			 * by that CPU.
+			 */
 			if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm))
-				cpu_context(cpu, vma->vm_mm) = 0;
+				cpu_context(cpu, vma->vm_mm) = 1;
 		}
 	}
 	local_flush_tlb_page(vma, page);
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 54e1663..9abe447 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -107,6 +107,16 @@
 	if (down_write_killable(&mm->mmap_sem))
 		return -EINTR;
 
+	/* Map delay slot emulation page */
+	base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
+			   VM_READ|VM_WRITE|VM_EXEC|
+			   VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+			   0);
+	if (IS_ERR_VALUE(base)) {
+		ret = base;
+		goto out;
+	}
+
 	/*
 	 * Determine total area size. This includes the VDSO data itself, the
 	 * data page, and the GIC user page if present. Always create a mapping
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 7cce277..8ac0e59 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -125,7 +125,7 @@
 {
 	int i;
 
-	for (i = 0; i < MAX_EIU; i++) {
+	for (i = 0; i < exin_avail; i++) {
 		if (d->hwirq == ltq_eiu_irq[i]) {
 			int val = 0;
 			int edge = 0;
@@ -173,7 +173,7 @@
 	int i;
 
 	ltq_enable_irq(d);
-	for (i = 0; i < MAX_EIU; i++) {
+	for (i = 0; i < exin_avail; i++) {
 		if (d->hwirq == ltq_eiu_irq[i]) {
 			/* by default we are low level triggered */
 			ltq_eiu_settype(d, IRQF_TRIGGER_LOW);
@@ -195,7 +195,7 @@
 	int i;
 
 	ltq_disable_irq(d);
-	for (i = 0; i < MAX_EIU; i++) {
+	for (i = 0; i < exin_avail; i++) {
 		if (d->hwirq == ltq_eiu_irq[i]) {
 			/* disable */
 			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~BIT(i),
@@ -206,7 +206,7 @@
 }
 
 static struct irq_chip ltq_irq_type = {
-	"icu",
+	.name = "icu",
 	.irq_enable = ltq_enable_irq,
 	.irq_disable = ltq_disable_irq,
 	.irq_unmask = ltq_enable_irq,
@@ -216,7 +216,7 @@
 };
 
 static struct irq_chip ltq_eiu_type = {
-	"eiu",
+	.name = "eiu",
 	.irq_startup = ltq_startup_eiu_irq,
 	.irq_shutdown = ltq_shutdown_eiu_irq,
 	.irq_enable = ltq_enable_irq,
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
index 5f693ac7..4cbb000 100644
--- a/arch/mips/lantiq/prom.c
+++ b/arch/mips/lantiq/prom.c
@@ -74,8 +74,8 @@
 
 	set_io_port_base((unsigned long) KSEG1);
 
-	if (fw_arg0 == -2) /* UHI interface */
-		dtb = (void *)fw_arg1;
+	if (fw_passed_dtb) /* UHI interface */
+		dtb = (void *)fw_passed_dtb;
 	else if (__dtb_start != __dtb_end)
 		dtb = (void *)__dtb_start;
 	else
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index e59759a..2fec6f7 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -417,6 +417,7 @@
 		return -EBUSY;
 
 	set_cpu_online(cpu, false);
+	calculate_cpu_foreign_map();
 	cpumask_clear_cpu(cpu, &cpu_callin_map);
 	local_irq_save(flags);
 	fixup_irqs();
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index d96e912b..8afa090 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -434,8 +434,8 @@
  * a single subroutine should be used across both
  * modules.
  */
-static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
-			 unsigned long *contpc)
+int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
+		  unsigned long *contpc)
 {
 	union mips_instruction insn = (union mips_instruction)dec_insn.insn;
 	unsigned int fcr31;
@@ -1268,7 +1268,7 @@
 						 * instruction in the dslot.
 						 */
 						sig = mips_dsemul(xcp, ir,
-								  contpc);
+								  bcpc, contpc);
 						if (sig < 0)
 							break;
 						if (sig)
@@ -1323,7 +1323,7 @@
 				 * Single step the non-cp1
 				 * instruction in the dslot
 				 */
-				sig = mips_dsemul(xcp, ir, contpc);
+				sig = mips_dsemul(xcp, ir, bcpc, contpc);
 				if (sig < 0)
 					break;
 				if (sig)
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 4707488..72a4642 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -1,3 +1,6 @@
+#include <linux/err.h>
+#include <linux/slab.h>
+
 #include <asm/branch.h>
 #include <asm/cacheflush.h>
 #include <asm/fpu_emulator.h>
@@ -5,43 +8,211 @@
 #include <asm/mipsregs.h>
 #include <asm/uaccess.h>
 
-#include "ieee754.h"
-
-/*
- * Emulate the arbitrary instruction ir at xcp->cp0_epc.  Required when
- * we have to emulate the instruction in a COP1 branch delay slot.  Do
- * not change cp0_epc due to the instruction
+/**
+ * struct emuframe - The 'emulation' frame structure
+ * @emul:	The instruction to 'emulate'.
+ * @badinst:	A break instruction to cause a return to the kernel.
  *
- * According to the spec:
- * 1) it shouldn't be a branch :-)
- * 2) it can be a COP instruction :-(
- * 3) if we are tring to run a protected memory space we must take
- *    special care on memory access instructions :-(
+ * This structure defines the frames placed within the delay slot emulation
+ * page in response to a call to mips_dsemul(). Each thread may be allocated
+ * only one frame at any given time. The kernel stores within it the
+ * instruction to be 'emulated' followed by a break instruction, then
+ * executes the frame in user mode. The break causes a trap to the kernel
+ * which leads to do_dsemulret() being called unless the instruction in
+ * @emul causes a trap itself, is a branch, or a signal is delivered to
+ * the thread. In these cases the allocated frame will either be reused by
+ * a subsequent delay slot 'emulation', or be freed during signal delivery or
+ * upon thread exit.
+ *
+ * This approach is used because:
+ *
+ * - Actually emulating all instructions isn't feasible. We would need to
+ *   be able to handle instructions from all revisions of the MIPS ISA,
+ *   all ASEs & all vendor instruction set extensions. This would be a
+ *   whole lot of work & continual maintenance burden as new instructions
+ *   are introduced, and in the case of some vendor extensions may not
+ *   even be possible. Thus we need to take the approach of actually
+ *   executing the instruction.
+ *
+ * - We must execute the instruction within user context. If we were to
+ *   execute the instruction in kernel mode then it would have access to
+ *   kernel resources without very careful checks, leaving us with a
+ *   high potential for security or stability issues to arise.
+ *
+ * - We used to place the frame on the users stack, but this requires
+ *   that the stack be executable. This is bad for security so the
+ *   per-process page is now used instead.
+ *
+ * - The instruction in @emul may be something entirely invalid for a
+ *   delay slot. The user may (intentionally or otherwise) place a branch
+ *   in a delay slot, or a kernel mode instruction, or something else
+ *   which generates an exception. Thus we can't rely upon the break in
+ *   @badinst always being hit. For this reason we track the index of the
+ *   frame allocated to each thread, allowing us to clean it up at later
+ *   points such as signal delivery or thread exit.
+ *
+ * - The user may generate a fake struct emuframe if they wish, invoking
+ *   the BRK_MEMU break instruction themselves. We must therefore not
+ *   trust that BRK_MEMU means there's actually a valid frame allocated
+ *   to the thread, and must not allow the user to do anything they
+ *   couldn't already.
  */
-
-/*
- * "Trampoline" return routine to catch exception following
- *  execution of delay-slot instruction execution.
- */
-
 struct emuframe {
 	mips_instruction	emul;
 	mips_instruction	badinst;
-	mips_instruction	cookie;
-	unsigned long		epc;
 };
 
-/*
- * Set up an emulation frame for instruction IR, from a delay slot of
- * a branch jumping to CPC.  Return 0 if successful, -1 if no emulation
- * required, otherwise a signal number causing a frame setup failure.
- */
-int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
+static const int emupage_frame_count = PAGE_SIZE / sizeof(struct emuframe);
+
+static inline __user struct emuframe *dsemul_page(void)
+{
+	return (__user struct emuframe *)STACK_TOP;
+}
+
+static int alloc_emuframe(void)
+{
+	mm_context_t *mm_ctx = &current->mm->context;
+	int idx;
+
+retry:
+	spin_lock(&mm_ctx->bd_emupage_lock);
+
+	/* Ensure we have an allocation bitmap */
+	if (!mm_ctx->bd_emupage_allocmap) {
+		mm_ctx->bd_emupage_allocmap =
+			kcalloc(BITS_TO_LONGS(emupage_frame_count),
+					      sizeof(unsigned long),
+				GFP_ATOMIC);
+
+		if (!mm_ctx->bd_emupage_allocmap) {
+			idx = BD_EMUFRAME_NONE;
+			goto out_unlock;
+		}
+	}
+
+	/* Attempt to allocate a single bit/frame */
+	idx = bitmap_find_free_region(mm_ctx->bd_emupage_allocmap,
+				      emupage_frame_count, 0);
+	if (idx < 0) {
+		/*
+		 * Failed to allocate a frame. We'll wait until one becomes
+		 * available. We unlock the page so that other threads actually
+		 * get the opportunity to free their frames, which means
+		 * technically the result of bitmap_full may be incorrect.
+		 * However the worst case is that we repeat all this and end up
+		 * back here again.
+		 */
+		spin_unlock(&mm_ctx->bd_emupage_lock);
+		if (!wait_event_killable(mm_ctx->bd_emupage_queue,
+			!bitmap_full(mm_ctx->bd_emupage_allocmap,
+				     emupage_frame_count)))
+			goto retry;
+
+		/* Received a fatal signal - just give in */
+		return BD_EMUFRAME_NONE;
+	}
+
+	/* Success! */
+	pr_debug("allocate emuframe %d to %d\n", idx, current->pid);
+out_unlock:
+	spin_unlock(&mm_ctx->bd_emupage_lock);
+	return idx;
+}
+
+static void free_emuframe(int idx, struct mm_struct *mm)
+{
+	mm_context_t *mm_ctx = &mm->context;
+
+	spin_lock(&mm_ctx->bd_emupage_lock);
+
+	pr_debug("free emuframe %d from %d\n", idx, current->pid);
+	bitmap_clear(mm_ctx->bd_emupage_allocmap, idx, 1);
+
+	/* If some thread is waiting for a frame, now's its chance */
+	wake_up(&mm_ctx->bd_emupage_queue);
+
+	spin_unlock(&mm_ctx->bd_emupage_lock);
+}
+
+static bool within_emuframe(struct pt_regs *regs)
+{
+	unsigned long base = (unsigned long)dsemul_page();
+
+	if (regs->cp0_epc < base)
+		return false;
+	if (regs->cp0_epc >= (base + PAGE_SIZE))
+		return false;
+
+	return true;
+}
+
+bool dsemul_thread_cleanup(struct task_struct *tsk)
+{
+	int fr_idx;
+
+	/* Clear any allocated frame, retrieving its index */
+	fr_idx = atomic_xchg(&tsk->thread.bd_emu_frame, BD_EMUFRAME_NONE);
+
+	/* If no frame was allocated, we're done */
+	if (fr_idx == BD_EMUFRAME_NONE)
+		return false;
+
+	task_lock(tsk);
+
+	/* Free the frame that this thread had allocated */
+	if (tsk->mm)
+		free_emuframe(fr_idx, tsk->mm);
+
+	task_unlock(tsk);
+	return true;
+}
+
+bool dsemul_thread_rollback(struct pt_regs *regs)
+{
+	struct emuframe __user *fr;
+	int fr_idx;
+
+	/* Do nothing if we're not executing from a frame */
+	if (!within_emuframe(regs))
+		return false;
+
+	/* Find the frame being executed */
+	fr_idx = atomic_read(&current->thread.bd_emu_frame);
+	if (fr_idx == BD_EMUFRAME_NONE)
+		return false;
+	fr = &dsemul_page()[fr_idx];
+
+	/*
+	 * If the PC is at the emul instruction, roll back to the branch. If
+	 * PC is at the badinst (break) instruction, we've already emulated the
+	 * instruction so progress to the continue PC. If it's anything else
+	 * then something is amiss & the user has branched into some other area
+	 * of the emupage - we'll free the allocated frame anyway.
+	 */
+	if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->emul)
+		regs->cp0_epc = current->thread.bd_emu_branch_pc;
+	else if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->badinst)
+		regs->cp0_epc = current->thread.bd_emu_cont_pc;
+
+	atomic_set(&current->thread.bd_emu_frame, BD_EMUFRAME_NONE);
+	free_emuframe(fr_idx, current->mm);
+	return true;
+}
+
+void dsemul_mm_cleanup(struct mm_struct *mm)
+{
+	mm_context_t *mm_ctx = &mm->context;
+
+	kfree(mm_ctx->bd_emupage_allocmap);
+}
+
+int mips_dsemul(struct pt_regs *regs, mips_instruction ir,
+		unsigned long branch_pc, unsigned long cont_pc)
 {
 	int isa16 = get_isa16_mode(regs->cp0_epc);
 	mips_instruction break_math;
 	struct emuframe __user *fr;
-	int err;
+	int err, fr_idx;
 
 	/* NOP is easy */
 	if (ir == 0)
@@ -68,30 +239,20 @@
 		}
 	}
 
-	pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc);
+	pr_debug("dsemul 0x%08lx cont at 0x%08lx\n", regs->cp0_epc, cont_pc);
 
-	/*
-	 * The strategy is to push the instruction onto the user stack
-	 * and put a trap after it which we can catch and jump to
-	 * the required address any alternative apart from full
-	 * instruction emulation!!.
-	 *
-	 * Algorithmics used a system call instruction, and
-	 * borrowed that vector.  MIPS/Linux version is a bit
-	 * more heavyweight in the interests of portability and
-	 * multiprocessor support.  For Linux we use a BREAK 514
-	 * instruction causing a breakpoint exception.
-	 */
+	/* Allocate a frame if we don't already have one */
+	fr_idx = atomic_read(&current->thread.bd_emu_frame);
+	if (fr_idx == BD_EMUFRAME_NONE)
+		fr_idx = alloc_emuframe();
+	if (fr_idx == BD_EMUFRAME_NONE)
+		return SIGBUS;
+	fr = &dsemul_page()[fr_idx];
+
+	/* Retrieve the appropriately encoded break instruction */
 	break_math = BREAK_MATH(isa16);
 
-	/* Ensure that the two instructions are in the same cache line */
-	fr = (struct emuframe __user *)
-		((regs->regs[29] - sizeof(struct emuframe)) & ~0x7);
-
-	/* Verify that the stack pointer is not completely insane */
-	if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe))))
-		return SIGBUS;
-
+	/* Write the instructions to the frame */
 	if (isa16) {
 		err = __put_user(ir >> 16,
 				 (u16 __user *)(&fr->emul));
@@ -106,84 +267,36 @@
 		err |= __put_user(break_math, &fr->badinst);
 	}
 
-	err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie);
-	err |= __put_user(cpc, &fr->epc);
-
 	if (unlikely(err)) {
 		MIPS_FPU_EMU_INC_STATS(errors);
+		free_emuframe(fr_idx, current->mm);
 		return SIGBUS;
 	}
 
+	/* Record the PC of the branch, PC to continue from & frame index */
+	current->thread.bd_emu_branch_pc = branch_pc;
+	current->thread.bd_emu_cont_pc = cont_pc;
+	atomic_set(&current->thread.bd_emu_frame, fr_idx);
+
+	/* Change user register context to execute the frame */
 	regs->cp0_epc = (unsigned long)&fr->emul | isa16;
 
+	/* Ensure the icache observes our newly written frame */
 	flush_cache_sigtramp((unsigned long)&fr->emul);
 
 	return 0;
 }
 
-int do_dsemulret(struct pt_regs *xcp)
+bool do_dsemulret(struct pt_regs *xcp)
 {
-	int isa16 = get_isa16_mode(xcp->cp0_epc);
-	struct emuframe __user *fr;
-	unsigned long epc;
-	u32 insn, cookie;
-	int err = 0;
-	u16 instr[2];
-
-	fr = (struct emuframe __user *)
-		(msk_isa16_mode(xcp->cp0_epc) - sizeof(mips_instruction));
-
-	/*
-	 * If we can't even access the area, something is very wrong, but we'll
-	 * leave that to the default handling
-	 */
-	if (!access_ok(VERIFY_READ, fr, sizeof(struct emuframe)))
-		return 0;
-
-	/*
-	 * Do some sanity checking on the stackframe:
-	 *
-	 *  - Is the instruction pointed to by the EPC an BREAK_MATH?
-	 *  - Is the following memory word the BD_COOKIE?
-	 */
-	if (isa16) {
-		err = __get_user(instr[0],
-				 (u16 __user *)(&fr->badinst));
-		err |= __get_user(instr[1],
-				  (u16 __user *)((long)(&fr->badinst) + 2));
-		insn = (instr[0] << 16) | instr[1];
-	} else {
-		err = __get_user(insn, &fr->badinst);
-	}
-	err |= __get_user(cookie, &fr->cookie);
-
-	if (unlikely(err ||
-		     insn != BREAK_MATH(isa16) || cookie != BD_COOKIE)) {
+	/* Cleanup the allocated frame, returning if there wasn't one */
+	if (!dsemul_thread_cleanup(current)) {
 		MIPS_FPU_EMU_INC_STATS(errors);
-		return 0;
-	}
-
-	/*
-	 * At this point, we are satisfied that it's a BD emulation trap.  Yes,
-	 * a user might have deliberately put two malformed and useless
-	 * instructions in a row in his program, in which case he's in for a
-	 * nasty surprise - the next instruction will be treated as a
-	 * continuation address!  Alas, this seems to be the only way that we
-	 * can handle signals, recursion, and longjmps() in the context of
-	 * emulating the branch delay instruction.
-	 */
-
-	pr_debug("dsemulret\n");
-
-	if (__get_user(epc, &fr->epc)) {		/* Saved EPC */
-		/* This is not a good situation to be in */
-		force_sig(SIGBUS, current);
-
-		return 0;
+		return false;
 	}
 
 	/* Set EPC to return to post-branch instruction */
-	xcp->cp0_epc = epc;
-	MIPS_FPU_EMU_INC_STATS(ds_emul);
-	return 1;
+	xcp->cp0_epc = current->thread.bd_emu_cont_pc;
+	pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc);
+	return true;
 }
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index ef7f925..3227a0a 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -40,6 +40,51 @@
 #include <asm/mips-cm.h>
 
 /*
+ * Bits describing what cache ops an SMP callback function may perform.
+ *
+ * R4K_HIT   -	Virtual user or kernel address based cache operations. The
+ *		active_mm must be checked before using user addresses, falling
+ *		back to kmap.
+ * R4K_INDEX -	Index based cache operations.
+ */
+
+#define R4K_HIT		BIT(0)
+#define R4K_INDEX	BIT(1)
+
+/**
+ * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core.
+ * @type:	Type of cache operations (R4K_HIT or R4K_INDEX).
+ *
+ * Decides whether a cache op needs to be performed on every core in the system.
+ * This may change depending on the @type of cache operation, as well as the set
+ * of online CPUs, so preemption should be disabled by the caller to prevent CPU
+ * hotplug from changing the result.
+ *
+ * Returns:	1 if the cache operation @type should be done on every core in
+ *		the system.
+ *		0 if the cache operation @type is globalized and only needs to
+ *		be performed on a simple CPU.
+ */
+static inline bool r4k_op_needs_ipi(unsigned int type)
+{
+	/* The MIPS Coherence Manager (CM) globalizes address-based cache ops */
+	if (type == R4K_HIT && mips_cm_present())
+		return false;
+
+	/*
+	 * Hardware doesn't globalize the required cache ops, so SMP calls may
+	 * be needed, but only if there are foreign CPUs (non-siblings with
+	 * separate caches).
+	 */
+	/* cpu_foreign_map[] undeclared when !CONFIG_SMP */
+#ifdef CONFIG_SMP
+	return !cpumask_empty(&cpu_foreign_map[0]);
+#else
+	return false;
+#endif
+}
+
+/*
  * Special Variant of smp_call_function for use by cache functions:
  *
  *  o No return value
@@ -48,30 +93,17 @@
  *    primary cache.
  *  o doesn't disable interrupts on the local CPU
  */
-static inline void r4k_on_each_cpu(void (*func) (void *info), void *info)
+static inline void r4k_on_each_cpu(unsigned int type,
+				   void (*func)(void *info), void *info)
 {
 	preempt_disable();
-
-	/*
-	 * The Coherent Manager propagates address-based cache ops to other
-	 * cores but not index-based ops. However, r4k_on_each_cpu is used
-	 * in both cases so there is no easy way to tell what kind of op is
-	 * executed to the other cores. The best we can probably do is
-	 * to restrict that call when a CM is not present because both
-	 * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
-	 */
-	if (!mips_cm_present())
-		smp_call_function_many(&cpu_foreign_map, func, info, 1);
+	if (r4k_op_needs_ipi(type))
+		smp_call_function_many(&cpu_foreign_map[smp_processor_id()],
+				       func, info, 1);
 	func(info);
 	preempt_enable();
 }
 
-#if defined(CONFIG_MIPS_CMP) || defined(CONFIG_MIPS_CPS)
-#define cpu_has_safe_index_cacheops 0
-#else
-#define cpu_has_safe_index_cacheops 1
-#endif
-
 /*
  * Must die.
  */
@@ -462,22 +494,44 @@
 
 static void r4k___flush_cache_all(void)
 {
-	r4k_on_each_cpu(local_r4k___flush_cache_all, NULL);
+	r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL);
 }
 
-static inline int has_valid_asid(const struct mm_struct *mm)
+/**
+ * has_valid_asid() - Determine if an mm already has an ASID.
+ * @mm:		Memory map.
+ * @type:	R4K_HIT or R4K_INDEX, type of cache op.
+ *
+ * Determines whether @mm already has an ASID on any of the CPUs which cache ops
+ * of type @type within an r4k_on_each_cpu() call will affect. If
+ * r4k_on_each_cpu() does an SMP call to a single VPE in each core, then the
+ * scope of the operation is confined to sibling CPUs, otherwise all online CPUs
+ * will need to be checked.
+ *
+ * Must be called in non-preemptive context.
+ *
+ * Returns:	1 if the CPUs affected by @type cache ops have an ASID for @mm.
+ *		0 otherwise.
+ */
+static inline int has_valid_asid(const struct mm_struct *mm, unsigned int type)
 {
-#ifdef CONFIG_MIPS_MT_SMP
-	int i;
+	unsigned int i;
+	const cpumask_t *mask = cpu_present_mask;
 
-	for_each_online_cpu(i)
+	/* cpu_sibling_map[] undeclared when !CONFIG_SMP */
+#ifdef CONFIG_SMP
+	/*
+	 * If r4k_on_each_cpu does SMP calls, it does them to a single VPE in
+	 * each foreign core, so we only need to worry about siblings.
+	 * Otherwise we need to worry about all present CPUs.
+	 */
+	if (r4k_op_needs_ipi(type))
+		mask = &cpu_sibling_map[smp_processor_id()];
+#endif
+	for_each_cpu(i, mask)
 		if (cpu_context(i, mm))
 			return 1;
-
 	return 0;
-#else
-	return cpu_context(smp_processor_id(), mm);
-#endif
 }
 
 static void r4k__flush_cache_vmap(void)
@@ -490,12 +544,16 @@
 	r4k_blast_dcache();
 }
 
+/*
+ * Note: flush_tlb_range() assumes flush_cache_range() sufficiently flushes
+ * whole caches when vma is executable.
+ */
 static inline void local_r4k_flush_cache_range(void * args)
 {
 	struct vm_area_struct *vma = args;
 	int exec = vma->vm_flags & VM_EXEC;
 
-	if (!(has_valid_asid(vma->vm_mm)))
+	if (!has_valid_asid(vma->vm_mm, R4K_INDEX))
 		return;
 
 	/*
@@ -516,14 +574,14 @@
 	int exec = vma->vm_flags & VM_EXEC;
 
 	if (cpu_has_dc_aliases || exec)
-		r4k_on_each_cpu(local_r4k_flush_cache_range, vma);
+		r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma);
 }
 
 static inline void local_r4k_flush_cache_mm(void * args)
 {
 	struct mm_struct *mm = args;
 
-	if (!has_valid_asid(mm))
+	if (!has_valid_asid(mm, R4K_INDEX))
 		return;
 
 	/*
@@ -548,7 +606,7 @@
 	if (!cpu_has_dc_aliases)
 		return;
 
-	r4k_on_each_cpu(local_r4k_flush_cache_mm, mm);
+	r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm);
 }
 
 struct flush_cache_page_args {
@@ -573,10 +631,10 @@
 	void *vaddr;
 
 	/*
-	 * If ownes no valid ASID yet, cannot possibly have gotten
+	 * If owns no valid ASID yet, cannot possibly have gotten
 	 * this page into the cache.
 	 */
-	if (!has_valid_asid(mm))
+	if (!has_valid_asid(mm, R4K_HIT))
 		return;
 
 	addr &= PAGE_MASK;
@@ -643,7 +701,7 @@
 	args.addr = addr;
 	args.pfn = pfn;
 
-	r4k_on_each_cpu(local_r4k_flush_cache_page, &args);
+	r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_page, &args);
 }
 
 static inline void local_r4k_flush_data_cache_page(void * addr)
@@ -656,18 +714,23 @@
 	if (in_atomic())
 		local_r4k_flush_data_cache_page((void *)addr);
 	else
-		r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr);
+		r4k_on_each_cpu(R4K_HIT, local_r4k_flush_data_cache_page,
+				(void *) addr);
 }
 
 struct flush_icache_range_args {
 	unsigned long start;
 	unsigned long end;
+	unsigned int type;
 };
 
-static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end)
+static inline void __local_r4k_flush_icache_range(unsigned long start,
+						  unsigned long end,
+						  unsigned int type)
 {
 	if (!cpu_has_ic_fills_f_dc) {
-		if (end - start >= dcache_size) {
+		if (type == R4K_INDEX ||
+		    (type & R4K_INDEX && end - start >= dcache_size)) {
 			r4k_blast_dcache();
 		} else {
 			R4600_HIT_CACHEOP_WAR_IMPL;
@@ -675,7 +738,8 @@
 		}
 	}
 
-	if (end - start > icache_size)
+	if (type == R4K_INDEX ||
+	    (type & R4K_INDEX && end - start > icache_size))
 		r4k_blast_icache();
 	else {
 		switch (boot_cpu_type()) {
@@ -701,23 +765,52 @@
 #endif
 }
 
+static inline void local_r4k_flush_icache_range(unsigned long start,
+						unsigned long end)
+{
+	__local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX);
+}
+
 static inline void local_r4k_flush_icache_range_ipi(void *args)
 {
 	struct flush_icache_range_args *fir_args = args;
 	unsigned long start = fir_args->start;
 	unsigned long end = fir_args->end;
+	unsigned int type = fir_args->type;
 
-	local_r4k_flush_icache_range(start, end);
+	__local_r4k_flush_icache_range(start, end, type);
 }
 
 static void r4k_flush_icache_range(unsigned long start, unsigned long end)
 {
 	struct flush_icache_range_args args;
+	unsigned long size, cache_size;
 
 	args.start = start;
 	args.end = end;
+	args.type = R4K_HIT | R4K_INDEX;
 
-	r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args);
+	/*
+	 * Indexed cache ops require an SMP call.
+	 * Consider if that can or should be avoided.
+	 */
+	preempt_disable();
+	if (r4k_op_needs_ipi(R4K_INDEX) && !r4k_op_needs_ipi(R4K_HIT)) {
+		/*
+		 * If address-based cache ops don't require an SMP call, then
+		 * use them exclusively for small flushes.
+		 */
+		size = start - end;
+		cache_size = icache_size;
+		if (!cpu_has_ic_fills_f_dc) {
+			size *= 2;
+			cache_size += dcache_size;
+		}
+		if (size <= cache_size)
+			args.type &= ~R4K_INDEX;
+	}
+	r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args);
+	preempt_enable();
 	instruction_hazard();
 }
 
@@ -744,7 +837,7 @@
 	 * subset property so we have to flush the primary caches
 	 * explicitly
 	 */
-	if (cpu_has_safe_index_cacheops && size >= dcache_size) {
+	if (size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
 		R4600_HIT_CACHEOP_WAR_IMPL;
@@ -781,7 +874,7 @@
 		return;
 	}
 
-	if (cpu_has_safe_index_cacheops && size >= dcache_size) {
+	if (size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
 		R4600_HIT_CACHEOP_WAR_IMPL;
@@ -794,25 +887,76 @@
 }
 #endif /* CONFIG_DMA_NONCOHERENT || CONFIG_DMA_MAYBE_COHERENT */
 
+struct flush_cache_sigtramp_args {
+	struct mm_struct *mm;
+	struct page *page;
+	unsigned long addr;
+};
+
 /*
  * While we're protected against bad userland addresses we don't care
  * very much about what happens in that case.  Usually a segmentation
  * fault will dump the process later on anyway ...
  */
-static void local_r4k_flush_cache_sigtramp(void * arg)
+static void local_r4k_flush_cache_sigtramp(void *args)
 {
+	struct flush_cache_sigtramp_args *fcs_args = args;
+	unsigned long addr = fcs_args->addr;
+	struct page *page = fcs_args->page;
+	struct mm_struct *mm = fcs_args->mm;
+	int map_coherent = 0;
+	void *vaddr;
+
 	unsigned long ic_lsize = cpu_icache_line_size();
 	unsigned long dc_lsize = cpu_dcache_line_size();
 	unsigned long sc_lsize = cpu_scache_line_size();
-	unsigned long addr = (unsigned long) arg;
+
+	/*
+	 * If owns no valid ASID yet, cannot possibly have gotten
+	 * this page into the cache.
+	 */
+	if (!has_valid_asid(mm, R4K_HIT))
+		return;
+
+	if (mm == current->active_mm) {
+		vaddr = NULL;
+	} else {
+		/*
+		 * Use kmap_coherent or kmap_atomic to do flushes for
+		 * another ASID than the current one.
+		 */
+		map_coherent = (cpu_has_dc_aliases &&
+				page_mapcount(page) &&
+				!Page_dcache_dirty(page));
+		if (map_coherent)
+			vaddr = kmap_coherent(page, addr);
+		else
+			vaddr = kmap_atomic(page);
+		addr = (unsigned long)vaddr + (addr & ~PAGE_MASK);
+	}
 
 	R4600_HIT_CACHEOP_WAR_IMPL;
-	if (dc_lsize)
-		protected_writeback_dcache_line(addr & ~(dc_lsize - 1));
-	if (!cpu_icache_snoops_remote_store && scache_size)
-		protected_writeback_scache_line(addr & ~(sc_lsize - 1));
+	if (!cpu_has_ic_fills_f_dc) {
+		if (dc_lsize)
+			vaddr ? flush_dcache_line(addr & ~(dc_lsize - 1))
+			      : protected_writeback_dcache_line(
+							addr & ~(dc_lsize - 1));
+		if (!cpu_icache_snoops_remote_store && scache_size)
+			vaddr ? flush_scache_line(addr & ~(sc_lsize - 1))
+			      : protected_writeback_scache_line(
+							addr & ~(sc_lsize - 1));
+	}
 	if (ic_lsize)
-		protected_flush_icache_line(addr & ~(ic_lsize - 1));
+		vaddr ? flush_icache_line(addr & ~(ic_lsize - 1))
+		      : protected_flush_icache_line(addr & ~(ic_lsize - 1));
+
+	if (vaddr) {
+		if (map_coherent)
+			kunmap_coherent();
+		else
+			kunmap_atomic(vaddr);
+	}
+
 	if (MIPS4K_ICACHE_REFILL_WAR) {
 		__asm__ __volatile__ (
 			".set push\n\t"
@@ -837,7 +981,23 @@
 
 static void r4k_flush_cache_sigtramp(unsigned long addr)
 {
-	r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr);
+	struct flush_cache_sigtramp_args args;
+	int npages;
+
+	down_read(&current->mm->mmap_sem);
+
+	npages = get_user_pages_fast(addr, 1, 0, &args.page);
+	if (npages < 1)
+		goto out;
+
+	args.mm = current->mm;
+	args.addr = addr;
+
+	r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_sigtramp, &args);
+
+	put_page(args.page);
+out:
+	up_read(&current->mm->mmap_sem);
 }
 
 static void r4k_flush_icache_all(void)
@@ -851,6 +1011,15 @@
 	int		size;
 };
 
+static inline void local_r4k_flush_kernel_vmap_range_index(void *args)
+{
+	/*
+	 * Aliases only affect the primary caches so don't bother with
+	 * S-caches or T-caches.
+	 */
+	r4k_blast_dcache();
+}
+
 static inline void local_r4k_flush_kernel_vmap_range(void *args)
 {
 	struct flush_kernel_vmap_range_args *vmra = args;
@@ -861,12 +1030,8 @@
 	 * Aliases only affect the primary caches so don't bother with
 	 * S-caches or T-caches.
 	 */
-	if (cpu_has_safe_index_cacheops && size >= dcache_size)
-		r4k_blast_dcache();
-	else {
-		R4600_HIT_CACHEOP_WAR_IMPL;
-		blast_dcache_range(vaddr, vaddr + size);
-	}
+	R4600_HIT_CACHEOP_WAR_IMPL;
+	blast_dcache_range(vaddr, vaddr + size);
 }
 
 static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size)
@@ -876,7 +1041,12 @@
 	args.vaddr = (unsigned long) vaddr;
 	args.size = size;
 
-	r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args);
+	if (size >= dcache_size)
+		r4k_on_each_cpu(R4K_INDEX,
+				local_r4k_flush_kernel_vmap_range_index, NULL);
+	else
+		r4k_on_each_cpu(R4K_HIT, local_r4k_flush_kernel_vmap_range,
+				&args);
 }
 
 static inline void rm7k_erratum31(void)
diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c
index 5eefe32..01f1154 100644
--- a/arch/mips/mm/sc-debugfs.c
+++ b/arch/mips/mm/sc-debugfs.c
@@ -73,8 +73,8 @@
 
 	file = debugfs_create_file("prefetch", S_IRUGO | S_IWUSR, dir,
 				   NULL, &sc_prefetch_fops);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
+	if (!file)
+		return -ENOMEM;
 
 	return 0;
 }
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 4004b65..7886cce 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -888,7 +888,7 @@
 		}
 	}
 	if (!did_vmalloc_branch) {
-		if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) {
+		if (single_insn_swpd) {
 			uasm_il_b(p, r, label_vmalloc_done);
 			uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
 		} else {
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 9c2220a..45e3b87 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -65,7 +65,7 @@
 #ifndef CONFIG_CPU_MIPSR6
 	{ insn_cache,  M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 #else
-	{ insn_cache,  M6(cache_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9 },
+	{ insn_cache,  M6(spec3_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9 },
 #endif
 	{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
 	{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index ad718de..0b37340 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -370,11 +370,7 @@
 int ISAFUNC(uasm_in_compat_space_p)(long addr)
 {
 	/* Is this address in 32bit compat space? */
-#ifdef CONFIG_64BIT
-	return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L);
-#else
-	return 1;
-#endif
+	return addr == (int)addr;
 }
 UASM_EXPORT_SYMBOL(ISAFUNC(uasm_in_compat_space_p));
 
diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c
index 775ff90..a794037 100644
--- a/arch/mips/pic32/pic32mzda/init.c
+++ b/arch/mips/pic32/pic32mzda/init.c
@@ -33,8 +33,8 @@
 {
 	ulong ftaddr = 0;
 
-	if ((fw_arg0 == -2) && fw_arg1 && !fw_arg2 && !fw_arg3)
-		return (ulong)fw_arg1;
+	if (fw_passed_dtb && !fw_arg2 && !fw_arg3)
+		return (ulong)fw_passed_dtb;
 
 	if (__dtb_start < __dtb_end)
 		ftaddr = (ulong)__dtb_start;
diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c
index ab79828..387b9df 100644
--- a/arch/mips/pistachio/init.c
+++ b/arch/mips/pistachio/init.c
@@ -60,29 +60,6 @@
 	return sys_type;
 }
 
-static void __init plat_setup_iocoherency(void)
-{
-	/*
-	 * Kernel has been configured with software coherency
-	 * but we might choose to turn it off and use hardware
-	 * coherency instead.
-	 */
-	if (mips_cm_numiocu() != 0) {
-		/* Nothing special needs to be done to enable coherency */
-		pr_info("CMP IOCU detected\n");
-		hw_coherentio = 1;
-		if (coherentio == 0)
-			pr_info("Hardware DMA cache coherency disabled\n");
-		else
-			pr_info("Hardware DMA cache coherency enabled\n");
-	} else {
-		if (coherentio == 1)
-			pr_info("Hardware DMA cache coherency unsupported, but enabled from command line!\n");
-		else
-			pr_info("Software DMA cache coherency enabled\n");
-	}
-}
-
 void __init *plat_get_fdt(void)
 {
 	if (fw_arg0 != -2)
@@ -93,8 +70,6 @@
 void __init plat_mem_setup(void)
 {
 	__dt_setup_arch(plat_get_fdt());
-
-	plat_setup_iocoherency();
 }
 
 #define DEFAULT_CPC_BASE_ADDR	0x1bde0000
diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c
index d40edda..3c7c9bf 100644
--- a/arch/mips/ralink/mt7620.c
+++ b/arch/mips/ralink/mt7620.c
@@ -175,7 +175,7 @@
 };
 
 static struct rt2880_pmx_func spis_grp_mt7628[] = {
-	FUNC("pwm", 3, 14, 4),
+	FUNC("pwm_uart2", 3, 14, 4),
 	FUNC("util", 2, 14, 4),
 	FUNC("gpio", 1, 14, 4),
 	FUNC("spis", 0, 14, 4),
diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
index 180e027..796e220 100644
--- a/drivers/ssb/driver_gpio.c
+++ b/drivers/ssb/driver_gpio.c
@@ -23,7 +23,7 @@
  **************************************************/
 
 #if IS_ENABLED(CONFIG_SSB_EMBEDDED)
-static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
+static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned int gpio)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
 
@@ -38,14 +38,14 @@
  * ChipCommon
  **************************************************/
 
-static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned gpio)
+static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned int gpio)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
 
 	return !!ssb_chipco_gpio_in(&bus->chipco, 1 << gpio);
 }
 
-static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned gpio,
+static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned int gpio,
 				      int value)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
@@ -54,7 +54,7 @@
 }
 
 static int ssb_gpio_chipco_direction_input(struct gpio_chip *chip,
-					   unsigned gpio)
+					   unsigned int gpio)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
 
@@ -63,7 +63,7 @@
 }
 
 static int ssb_gpio_chipco_direction_output(struct gpio_chip *chip,
-					    unsigned gpio, int value)
+					    unsigned int gpio, int value)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
 
@@ -72,7 +72,7 @@
 	return 0;
 }
 
-static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned gpio)
+static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned int gpio)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
 
@@ -85,7 +85,7 @@
 	return 0;
 }
 
-static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio)
+static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned int gpio)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
 
@@ -256,14 +256,14 @@
 
 #ifdef CONFIG_SSB_DRIVER_EXTIF
 
-static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned gpio)
+static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned int gpio)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
 
 	return !!ssb_extif_gpio_in(&bus->extif, 1 << gpio);
 }
 
-static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned gpio,
+static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned int gpio,
 				     int value)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
@@ -272,7 +272,7 @@
 }
 
 static int ssb_gpio_extif_direction_input(struct gpio_chip *chip,
-					  unsigned gpio)
+					  unsigned int gpio)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);
 
@@ -281,7 +281,7 @@
 }
 
 static int ssb_gpio_extif_direction_output(struct gpio_chip *chip,
-					   unsigned gpio, int value)
+					   unsigned int gpio, int value)
 {
 	struct ssb_bus *bus = gpiochip_get_data(chip);