Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus

Pull MIPS updates from Ralf Baechle:
 "This is the main pull request for MIPS:

   - a number of fixes that didn't make the 3.19 release.

   - a number of cleanups.

   - preliminary support for Cavium's Octeon 3 SOCs which feature up to
     48 MIPS64 R3 cores with FPU and hardware virtualization.

   - support for MIPS R6 processors.

     Revision 6 of the MIPS architecture is a major revision of the MIPS
     architecture which does away with many of original sins of the
     architecture such as branch delay slots.  This and other changes in
     R6 require major changes throughout the entire MIPS core
     architecture code and make up for the lion share of this pull
     request.

   - finally some preparatory work for eXtendend Physical Address
     support, which allows support of up to 40 bit of physical address
     space on 32 bit processors"

     [ Ahh, MIPS can't leave the PAE brain damage alone.  It's like
       every CPU architect has to make that mistake, but pee in the snow
       by changing the TLA.  But whether it's called PAE, LPAE or XPA,
       it's horrid crud   - Linus ]

* 'upstream' of git://git.linux-mips.org/pub/scm/ralf/upstream-linus: (114 commits)
  MIPS: sead3: Corrected get_c0_perfcount_int
  MIPS: mm: Remove dead macro definitions
  MIPS: OCTEON: irq: add CIB and other fixes
  MIPS: OCTEON: Don't do acknowledge operations for level triggered irqs.
  MIPS: OCTEON: More OCTEONIII support
  MIPS: OCTEON: Remove setting of processor specific CVMCTL icache bits.
  MIPS: OCTEON: Core-15169 Workaround and general CVMSEG cleanup.
  MIPS: OCTEON: Update octeon-model.h code for new SoCs.
  MIPS: OCTEON: Implement DCache errata workaround for all CN6XXX
  MIPS: OCTEON: Add little-endian support to asm/octeon/octeon.h
  MIPS: OCTEON: Implement the core-16057 workaround
  MIPS: OCTEON: Delete unused COP2 saving code
  MIPS: OCTEON: Use correct instruction to read 64-bit COP0 register
  MIPS: OCTEON: Save and restore CP2 SHA3 state
  MIPS: OCTEON: Fix FP context save.
  MIPS: OCTEON: Save/Restore wider multiply registers in OCTEON III CPUs
  MIPS: boot: Provide more uImage options
  MIPS: Remove unneeded #ifdef __KERNEL__ from asm/processor.h
  MIPS: ip22-gio: Remove legacy suspend/resume support
  mips: pci: Add ifdef around pci_proc_domain
  ...
diff --git a/Documentation/devicetree/bindings/mips/cavium/cib.txt b/Documentation/devicetree/bindings/mips/cavium/cib.txt
new file mode 100644
index 0000000..f39a1aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/cib.txt
@@ -0,0 +1,43 @@
+* Cavium Interrupt Bus widget
+
+Properties:
+- compatible: "cavium,octeon-7130-cib"
+
+  Compatibility with cn70XX SoCs.
+
+- interrupt-controller:  This is an interrupt controller.
+
+- reg: Two elements consisting of the addresses of the RAW and EN
+  registers of the CIB block
+
+- cavium,max-bits: The index (zero based) of the highest numbered bit
+  in the CIB block.
+
+- interrupt-parent:  Always the CIU on the SoC.
+
+- interrupts: The CIU line to which the CIB block is connected.
+
+- #interrupt-cells: Must be <2>.  The first cell is the bit within the
+   CIB.  The second cell specifies the triggering semantics of the
+   line.
+
+Example:
+
+	interrupt-controller@107000000e000 {
+		compatible = "cavium,octeon-7130-cib";
+		reg = <0x10700 0x0000e000 0x0 0x8>, /* RAW */
+		      <0x10700 0x0000e100 0x0 0x8>; /* EN */
+		cavium,max-bits = <23>;
+
+		interrupt-controller;
+		interrupt-parent = <&ciu>;
+		interrupts = <1 24>;
+		/* Interrupts are specified by two parts:
+		 * 1) Bit number in the CIB* registers
+		 * 2) Triggering (1 - edge rising
+		 *		  2 - edge falling
+		 *		  4 - level active high
+		 *		  8 - level active low)
+		 */
+		#interrupt-cells = <2>;
+	};
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 843713c..c7a1690 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -54,6 +54,7 @@
 	select CPU_PM if CPU_IDLE
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_BINFMT_ELF_STATE
+	select SYSCTL_EXCEPTION_TRACE
 
 menu "Machine selection"
 
@@ -376,8 +377,10 @@
 	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_HAS_CPU_MIPS32_R2
 	select SYS_HAS_CPU_MIPS32_R3_5
+	select SYS_HAS_CPU_MIPS32_R6
 	select SYS_HAS_CPU_MIPS64_R1
 	select SYS_HAS_CPU_MIPS64_R2
+	select SYS_HAS_CPU_MIPS64_R6
 	select SYS_HAS_CPU_NEVADA
 	select SYS_HAS_CPU_RM7000
 	select SYS_SUPPORTS_32BIT_KERNEL
@@ -1033,6 +1036,9 @@
 config NO_IOPORT_MAP
 	def_bool n
 
+config GENERIC_CSUM
+	bool
+
 config GENERIC_ISA_DMA
 	bool
 	select ZONE_DMA if GENERIC_ISA_DMA_SUPPORT_BROKEN=n
@@ -1146,6 +1152,9 @@
 	bool
 	select SOC_PNX833X
 
+config MIPS_SPRAM
+	bool
+
 config SWAP_IO_SPACE
 	bool
 
@@ -1304,6 +1313,22 @@
 	  specific type of processor in your system, choose those that one
 	  otherwise CPU_MIPS32_R1 is a safe bet for any MIPS32 system.
 
+config CPU_MIPS32_R6
+	bool "MIPS32 Release 6 (EXPERIMENTAL)"
+	depends on SYS_HAS_CPU_MIPS32_R6
+	select CPU_HAS_PREFETCH
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select CPU_SUPPORTS_MSA
+	select GENERIC_CSUM
+	select HAVE_KVM
+	select MIPS_O32_FP64_SUPPORT
+	help
+	  Choose this option to build a kernel for release 6 or later of the
+	  MIPS32 architecture.  New MIPS processors, starting with the Warrior
+	  family, are based on a MIPS32r6 processor. If you own an older
+	  processor, you probably need to select MIPS32r1 or MIPS32r2 instead.
+
 config CPU_MIPS64_R1
 	bool "MIPS64 Release 1"
 	depends on SYS_HAS_CPU_MIPS64_R1
@@ -1339,6 +1364,21 @@
 	  specific type of processor in your system, choose those that one
 	  otherwise CPU_MIPS64_R1 is a safe bet for any MIPS64 system.
 
+config CPU_MIPS64_R6
+	bool "MIPS64 Release 6 (EXPERIMENTAL)"
+	depends on SYS_HAS_CPU_MIPS64_R6
+	select CPU_HAS_PREFETCH
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_64BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select CPU_SUPPORTS_MSA
+	select GENERIC_CSUM
+	help
+	  Choose this option to build a kernel for release 6 or later of the
+	  MIPS64 architecture.  New MIPS processors, starting with the Warrior
+	  family, are based on a MIPS64r6 processor. If you own an older
+	  processor, you probably need to select MIPS64r1 or MIPS64r2 instead.
+
 config CPU_R3000
 	bool "R3000"
 	depends on SYS_HAS_CPU_R3000
@@ -1539,7 +1579,7 @@
 config CPU_MIPS32_3_5_FEATURES
 	bool "MIPS32 Release 3.5 Features"
 	depends on SYS_HAS_CPU_MIPS32_R3_5
-	depends on CPU_MIPS32_R2
+	depends on CPU_MIPS32_R2 || CPU_MIPS32_R6
 	help
 	  Choose this option to build a kernel for release 2 or later of the
 	  MIPS32 architecture including features from the 3.5 release such as
@@ -1659,12 +1699,18 @@
 config SYS_HAS_CPU_MIPS32_R3_5
 	bool
 
+config SYS_HAS_CPU_MIPS32_R6
+	bool
+
 config SYS_HAS_CPU_MIPS64_R1
 	bool
 
 config SYS_HAS_CPU_MIPS64_R2
 	bool
 
+config SYS_HAS_CPU_MIPS64_R6
+	bool
+
 config SYS_HAS_CPU_R3000
 	bool
 
@@ -1764,11 +1810,11 @@
 #
 config CPU_MIPS32
 	bool
-	default y if CPU_MIPS32_R1 || CPU_MIPS32_R2
+	default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6
 
 config CPU_MIPS64
 	bool
-	default y if CPU_MIPS64_R1 || CPU_MIPS64_R2
+	default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6
 
 #
 # These two indicate the revision of the architecture, either Release 1 or Release 2
@@ -1780,6 +1826,12 @@
 config CPU_MIPSR2
 	bool
 	default y if CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_CAVIUM_OCTEON
+	select MIPS_SPRAM
+
+config CPU_MIPSR6
+	bool
+	default y if CPU_MIPS32_R6 || CPU_MIPS64_R6
+	select MIPS_SPRAM
 
 config EVA
 	bool
@@ -2013,6 +2065,19 @@
 	default y
 	depends on MIPS_MT_SMP
 
+config MIPSR2_TO_R6_EMULATOR
+	bool "MIPS R2-to-R6 emulator"
+	depends on CPU_MIPSR6 && !SMP
+	default y
+	help
+	  Choose this option if you want to run non-R6 MIPS userland code.
+	  Even if you say 'Y' here, the emulator will still be disabled by
+	  default. You can enable it using the 'mipsr2emul' kernel option.
+	  The only reason this is a build-time option is to save ~14K from the
+	  final kernel image.
+comment "MIPS R2-to-R6 emulator is only available for UP kernels"
+	depends on SMP && CPU_MIPSR6
+
 config MIPS_VPE_LOADER
 	bool "VPE loader support."
 	depends on SYS_SUPPORTS_MULTITHREADING && MODULES
@@ -2148,7 +2213,7 @@
 	  here.
 
 config CPU_MICROMIPS
-	depends on 32BIT && SYS_SUPPORTS_MICROMIPS
+	depends on 32BIT && SYS_SUPPORTS_MICROMIPS && !CPU_MIPSR6
 	bool "microMIPS"
 	help
 	  When this option is enabled the kernel will be built using the
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 88a9f43..3a2b775 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -122,17 +122,4 @@
 	help
 	  Add several files to the debugfs to test spinlock speed.
 
-config FP32XX_HYBRID_FPRS
-	bool "Run FP32 & FPXX code with hybrid FPRs"
-	depends on MIPS_O32_FP64_SUPPORT
-	help
-	  The hybrid FPR scheme is normally used only when a program needs to
-	  execute a mix of FP32 & FP64A code, since the trapping & emulation
-	  that it entails is expensive. When enabled, this option will lead
-	  to the kernel running programs which use the FP32 & FPXX FP ABIs
-	  using the hybrid FPR scheme, which can be useful for debugging
-	  purposes.
-
-	  If unsure, say N.
-
 endmenu
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 2563a08..8f57fc7 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -122,26 +122,8 @@
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB $(undef-all) $(predef-be))
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL $(undef-all) $(predef-le))
 
-# For smartmips configurations, there are hundreds of warnings due to ISA overrides
-# in assembly and header files. smartmips is only supported for MIPS32r1 onwards
-# and there is no support for 64-bit. Various '.set mips2' or '.set mips3' or
-# similar directives in the kernel will spam the build logs with the following warnings:
-# Warning: the `smartmips' extension requires MIPS32 revision 1 or greater
-# or
-# Warning: the 64-bit MIPS architecture does not support the `smartmips' extension
-# Pass -Wa,--no-warn to disable all assembler warnings until the kernel code has
-# been fixed properly.
-cflags-$(CONFIG_CPU_HAS_SMARTMIPS)	+= $(call cc-option,-msmartmips) -Wa,--no-warn
-cflags-$(CONFIG_CPU_MICROMIPS) += $(call cc-option,-mmicromips)
-
 cflags-$(CONFIG_SB1XXX_CORELIS)	+= $(call cc-option,-mno-sched-prolog) \
 				   -fno-omit-frame-pointer
-
-ifeq ($(CONFIG_CPU_HAS_MSA),y)
-toolchain-msa	:= $(call cc-option-yn,-mhard-float -mfp64 -Wa$(comma)-mmsa)
-cflags-$(toolchain-msa)		+= -DTOOLCHAIN_SUPPORTS_MSA
-endif
-
 #
 # CPU-dependent compiler/assembler options for optimization.
 #
@@ -156,10 +138,12 @@
 			-Wa,-mips32 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS32_R2)	+= $(call cc-option,-march=mips32r2,-mips32r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS32) \
 			-Wa,-mips32r2 -Wa,--trap
+cflags-$(CONFIG_CPU_MIPS32_R6)	+= -march=mips32r6 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R1)	+= $(call cc-option,-march=mips64,-mips64 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
 			-Wa,-mips64 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R2)	+= $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
 			-Wa,-mips64r2 -Wa,--trap
+cflags-$(CONFIG_CPU_MIPS64_R6)	+= -march=mips64r6 -Wa,--trap
 cflags-$(CONFIG_CPU_R5000)	+= -march=r5000 -Wa,--trap
 cflags-$(CONFIG_CPU_R5432)	+= $(call cc-option,-march=r5400,-march=r5000) \
 			-Wa,--trap
@@ -182,6 +166,16 @@
 endif
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 cflags-$(CONFIG_CPU_BMIPS)	+= -march=mips32 -Wa,-mips32 -Wa,--trap
+#
+# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
+# as MIPS64 R1; older versions as just R1.  This leaves the possibility open
+# that GCC might generate R2 code for -march=loongson3a which then is rejected
+# by GAS.  The cc-option can't probe for this behaviour so -march=loongson3a
+# can't easily be used safely within the kbuild framework.
+#
+cflags-$(CONFIG_CPU_LOONGSON3)  +=					\
+	$(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
+	-Wa,-mips64r2 -Wa,--trap
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS)	+= $(call cc-option,-mfix-r4000,)
 cflags-$(CONFIG_CPU_R4400_WORKAROUNDS)	+= $(call cc-option,-mfix-r4400,)
@@ -194,6 +188,23 @@
 endif
 endif
 
+# For smartmips configurations, there are hundreds of warnings due to ISA overrides
+# in assembly and header files. smartmips is only supported for MIPS32r1 onwards
+# and there is no support for 64-bit. Various '.set mips2' or '.set mips3' or
+# similar directives in the kernel will spam the build logs with the following warnings:
+# Warning: the `smartmips' extension requires MIPS32 revision 1 or greater
+# or
+# Warning: the 64-bit MIPS architecture does not support the `smartmips' extension
+# Pass -Wa,--no-warn to disable all assembler warnings until the kernel code has
+# been fixed properly.
+mips-cflags				:= "$(cflags-y)"
+cflags-$(CONFIG_CPU_HAS_SMARTMIPS)	+= $(call cc-option,$(mips-cflags),-msmartmips) -Wa,--no-warn
+cflags-$(CONFIG_CPU_MICROMIPS)		+= $(call cc-option,$(mips-cflags),-mmicromips)
+ifeq ($(CONFIG_CPU_HAS_MSA),y)
+toolchain-msa				:= $(call cc-option-yn,-$(mips-cflags),mhard-float -mfp64 -Wa$(comma)-mmsa)
+cflags-$(toolchain-msa)			+= -DTOOLCHAIN_SUPPORTS_MSA
+endif
+
 #
 # Firmware support
 #
@@ -287,7 +298,11 @@
 boot-y			+= vmlinux.srec
 ifeq ($(shell expr $(load-y) \< 0xffffffff80000000 2> /dev/null), 0)
 boot-y			+= uImage
+boot-y			+= uImage.bin
+boot-y			+= uImage.bz2
 boot-y			+= uImage.gz
+boot-y			+= uImage.lzma
+boot-y			+= uImage.lzo
 endif
 
 # compressed boot image targets (arch/mips/boot/compressed/)
@@ -386,7 +401,11 @@
 	echo '  vmlinuz.bin          - Raw binary zboot image'
 	echo '  vmlinuz.srec         - SREC zboot image'
 	echo '  uImage               - U-Boot image'
+	echo '  uImage.bin           - U-Boot image (uncompressed)'
+	echo '  uImage.bz2           - U-Boot image (bz2)'
 	echo '  uImage.gz            - U-Boot image (gzip)'
+	echo '  uImage.lzma          - U-Boot image (lzma)'
+	echo '  uImage.lzo           - U-Boot image (lzo)'
 	echo '  dtbs                 - Device-tree blobs for enabled boards'
 	echo
 	echo '  These will be default as appropriate for a configured platform.'
diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index 604b7d0..6a98d2c 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c
@@ -127,12 +127,20 @@
 		t = 396000000;
 	else {
 		t = alchemy_rdsys(AU1000_SYS_CPUPLL) & 0x7f;
+		if (alchemy_get_cputype() < ALCHEMY_CPU_AU1300)
+			t &= 0x3f;
 		t *= parent_rate;
 	}
 
 	return t;
 }
 
+void __init alchemy_set_lpj(void)
+{
+	preset_lpj = alchemy_clk_cpu_recalc(NULL, ALCHEMY_ROOTCLK_RATE);
+	preset_lpj /= 2 * HZ;
+}
+
 static struct clk_ops alchemy_clkops_cpu = {
 	.recalc_rate	= alchemy_clk_cpu_recalc,
 };
@@ -315,17 +323,26 @@
 
 /* lrclk: external synchronous static bus clock ***********************/
 
-static struct clk __init *alchemy_clk_setup_lrclk(const char *pn)
+static struct clk __init *alchemy_clk_setup_lrclk(const char *pn, int t)
 {
-	/* MEM_STCFG0[15:13] = divisor.
+	/* Au1000, Au1500: MEM_STCFG0[11]: If bit is set, lrclk=pclk/5,
+	 * otherwise lrclk=pclk/4.
+	 * All other variants: MEM_STCFG0[15:13] = divisor.
 	 * L/RCLK = periph_clk / (divisor + 1)
 	 * On Au1000, Au1500, Au1100 it's called LCLK,
 	 * on later models it's called RCLK, but it's the same thing.
 	 */
 	struct clk *c;
-	unsigned long v = alchemy_rdsmem(AU1000_MEM_STCFG0) >> 13;
+	unsigned long v = alchemy_rdsmem(AU1000_MEM_STCFG0);
 
-	v = (v & 7) + 1;
+	switch (t) {
+	case ALCHEMY_CPU_AU1000:
+	case ALCHEMY_CPU_AU1500:
+		v = 4 + ((v >> 11) & 1);
+		break;
+	default:	/* all other models */
+		v = ((v >> 13) & 7) + 1;
+	}
 	c = clk_register_fixed_factor(NULL, ALCHEMY_LR_CLK,
 				      pn, 0, 1, v);
 	if (!IS_ERR(c))
@@ -1066,7 +1083,7 @@
 	ERRCK(c)
 
 	/* L/RCLK: external static bus clock for synchronous mode */
-	c = alchemy_clk_setup_lrclk(ALCHEMY_PERIPH_CLK);
+	c = alchemy_clk_setup_lrclk(ALCHEMY_PERIPH_CLK, ctype);
 	ERRCK(c)
 
 	/* Frequency dividers 0-5 */
diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index 4e72daf..2902138 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -34,10 +34,12 @@
 #include <au1000.h>
 
 extern void __init board_setup(void);
-extern void set_cpuspec(void);
+extern void __init alchemy_set_lpj(void);
 
 void __init plat_mem_setup(void)
 {
+	alchemy_set_lpj();
+
 	if (au1xxx_cpu_needs_config_od())
 		/* Various early Au1xx0 errata corrected by this */
 		set_c0_config(1 << 19); /* Set Config[OD] */
diff --git a/arch/mips/bcm3384/irq.c b/arch/mips/bcm3384/irq.c
index 0fb5134..fd94fe8 100644
--- a/arch/mips/bcm3384/irq.c
+++ b/arch/mips/bcm3384/irq.c
@@ -180,7 +180,7 @@
 
 static struct of_device_id of_irq_ids[] __initdata = {
 	{ .compatible = "mti,cpu-interrupt-controller",
-	  .data = mips_cpu_intc_init },
+	  .data = mips_cpu_irq_of_init },
 	{ .compatible = "brcm,bcm3384-intc",
 	  .data = intc_of_init },
 	{},
diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile
index 1466c00..acb1988 100644
--- a/arch/mips/boot/Makefile
+++ b/arch/mips/boot/Makefile
@@ -23,6 +23,12 @@
 
 hostprogs-y := elf2ecoff
 
+suffix-y			:= bin
+suffix-$(CONFIG_KERNEL_BZIP2)	:= bz2
+suffix-$(CONFIG_KERNEL_GZIP)	:= gz
+suffix-$(CONFIG_KERNEL_LZMA)	:= lzma
+suffix-$(CONFIG_KERNEL_LZO)	:= lzo
+
 targets := vmlinux.ecoff
 quiet_cmd_ecoff = ECOFF	  $@
       cmd_ecoff = $(obj)/elf2ecoff $(VMLINUX) $@ $(e2eflag)
@@ -44,14 +50,53 @@
 UIMAGE_LOADADDR  = $(VMLINUX_LOAD_ADDRESS)
 UIMAGE_ENTRYADDR = $(VMLINUX_ENTRY_ADDRESS)
 
+#
+# Compressed vmlinux images
+#
+
+extra-y += vmlinux.bin.bz2
+extra-y += vmlinux.bin.gz
+extra-y += vmlinux.bin.lzma
+extra-y += vmlinux.bin.lzo
+
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,bzip2)
+
 $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
 	$(call if_changed,gzip)
 
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzma)
+
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzo)
+
+#
+# Compressed u-boot images
+#
+
+targets += uImage
+targets += uImage.bin
+targets += uImage.bz2
 targets += uImage.gz
+targets += uImage.lzma
+targets += uImage.lzo
+
+$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,uimage,none)
+
+$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 FORCE
+	$(call if_changed,uimage,bzip2)
+
 $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
 	$(call if_changed,uimage,gzip)
 
-targets += uImage
-$(obj)/uImage: $(obj)/uImage.gz FORCE
+$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE
+	$(call if_changed,uimage,lzma)
+
+$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE
+	$(call if_changed,uimage,lzo)
+
+$(obj)/uImage: $(obj)/uImage.$(suffix-y)
 	@ln -sf $(notdir $<) $@
 	@echo '  Image $@ is ready'
diff --git a/arch/mips/boot/elf2ecoff.c b/arch/mips/boot/elf2ecoff.c
index 2a4c52e..266c813 100644
--- a/arch/mips/boot/elf2ecoff.c
+++ b/arch/mips/boot/elf2ecoff.c
@@ -268,7 +268,6 @@
 	Elf32_Ehdr ex;
 	Elf32_Phdr *ph;
 	Elf32_Shdr *sh;
-	char *shstrtab;
 	int i, pad;
 	struct sect text, data, bss;
 	struct filehdr efh;
@@ -336,9 +335,6 @@
 				     "sh");
 	if (must_convert_endian)
 		convert_elf_shdrs(sh, ex.e_shnum);
-	/* Read in the section string table. */
-	shstrtab = saveRead(infile, sh[ex.e_shstrndx].sh_offset,
-			    sh[ex.e_shstrndx].sh_size, "shstrtab");
 
 	/* Figure out if we can cram the program header into an ECOFF
 	   header...  Basically, we can't handle anything but loadable
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index b752c4e..1882e64 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -18,7 +18,7 @@
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/cvmx-ipd-defs.h>
 #include <asm/octeon/cvmx-mio-defs.h>
-
+#include <asm/octeon/cvmx-rst-defs.h>
 
 static u64 f;
 static u64 rdiv;
@@ -39,11 +39,20 @@
 
 	if (current_cpu_type() == CPU_CAVIUM_OCTEON2) {
 		union cvmx_mio_rst_boot rst_boot;
+
 		rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT);
 		rdiv = rst_boot.s.c_mul;	/* CPU clock */
 		sdiv = rst_boot.s.pnr_mul;	/* I/O clock */
 		f = (0x8000000000000000ull / sdiv) * 2;
+	} else if (current_cpu_type() == CPU_CAVIUM_OCTEON3) {
+		union cvmx_rst_boot rst_boot;
+
+		rst_boot.u64 = cvmx_read_csr(CVMX_RST_BOOT);
+		rdiv = rst_boot.s.c_mul;	/* CPU clock */
+		sdiv = rst_boot.s.pnr_mul;	/* I/O clock */
+		f = (0x8000000000000000ull / sdiv) * 2;
 	}
+
 }
 
 /*
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index 3778655..7d89878 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -276,7 +276,7 @@
 			continue;
 
 		/* These addresses map low for PCI. */
-		if (e->addr > 0x410000000ull && !OCTEON_IS_MODEL(OCTEON_CN6XXX))
+		if (e->addr > 0x410000000ull && !OCTEON_IS_OCTEON2())
 			continue;
 
 		addr_size += e->size;
@@ -308,7 +308,7 @@
 #endif
 #ifdef CONFIG_USB_OCTEON_OHCI
 	/* OCTEON II ohci is only 32-bit. */
-	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && max_addr >= 0x100000000ul)
+	if (OCTEON_IS_OCTEON2() && max_addr >= 0x100000000ul)
 		swiotlbsize = 64 * (1<<20);
 #endif
 	swiotlb_nslabs = swiotlbsize >> IO_TLB_SHIFT;
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
index 5dfef84..9eb0fee 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
@@ -767,7 +767,7 @@
 		break;
 	}
 	/* Most boards except NIC10e use a 12MHz crystal */
-	if (OCTEON_IS_MODEL(OCTEON_FAM_2))
+	if (OCTEON_IS_OCTEON2())
 		return USB_CLOCK_TYPE_CRYSTAL_12;
 	return USB_CLOCK_TYPE_REF_48;
 }
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 2bc4aa9..10f7625 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -3,12 +3,14 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2004-2012 Cavium, Inc.
+ * Copyright (C) 2004-2014 Cavium, Inc.
  */
 
+#include <linux/of_address.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 #include <linux/bitops.h>
+#include <linux/of_irq.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/irq.h>
@@ -22,16 +24,25 @@
 static DEFINE_PER_CPU(unsigned long, octeon_irq_ciu1_en_mirror);
 static DEFINE_PER_CPU(raw_spinlock_t, octeon_irq_ciu_spinlock);
 
+struct octeon_irq_ciu_domain_data {
+	int num_sum;  /* number of sum registers (2 or 3). */
+};
+
 static __read_mostly u8 octeon_irq_ciu_to_irq[8][64];
 
-union octeon_ciu_chip_data {
-	void *p;
-	unsigned long l;
-	struct {
-		unsigned long line:6;
-		unsigned long bit:6;
-		unsigned long gpio_line:6;
-	} s;
+struct octeon_ciu_chip_data {
+	union {
+		struct {		/* only used for ciu3 */
+			u64 ciu3_addr;
+			unsigned int intsn;
+		};
+		struct {		/* only used for ciu/ciu2 */
+			u8 line;
+			u8 bit;
+			u8 gpio_line;
+		};
+	};
+	int current_cpu;	/* Next CPU expected to take this irq */
 };
 
 struct octeon_core_chip_data {
@@ -45,27 +56,40 @@
 
 static struct octeon_core_chip_data octeon_irq_core_chip_data[MIPS_CORE_IRQ_LINES];
 
-static void octeon_irq_set_ciu_mapping(int irq, int line, int bit, int gpio_line,
-				       struct irq_chip *chip,
-				       irq_flow_handler_t handler)
+static int octeon_irq_set_ciu_mapping(int irq, int line, int bit, int gpio_line,
+				      struct irq_chip *chip,
+				      irq_flow_handler_t handler)
 {
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
+
+	cd = kzalloc(sizeof(*cd), GFP_KERNEL);
+	if (!cd)
+		return -ENOMEM;
 
 	irq_set_chip_and_handler(irq, chip, handler);
 
-	cd.l = 0;
-	cd.s.line = line;
-	cd.s.bit = bit;
-	cd.s.gpio_line = gpio_line;
+	cd->line = line;
+	cd->bit = bit;
+	cd->gpio_line = gpio_line;
 
-	irq_set_chip_data(irq, cd.p);
+	irq_set_chip_data(irq, cd);
 	octeon_irq_ciu_to_irq[line][bit] = irq;
+	return 0;
 }
 
-static void octeon_irq_force_ciu_mapping(struct irq_domain *domain,
-					 int irq, int line, int bit)
+static void octeon_irq_free_cd(struct irq_domain *d, unsigned int irq)
 {
-	irq_domain_associate(domain, irq, line << 6 | bit);
+	struct irq_data *data = irq_get_irq_data(irq);
+	struct octeon_ciu_chip_data *cd = irq_data_get_irq_chip_data(data);
+
+	irq_set_chip_data(irq, NULL);
+	kfree(cd);
+}
+
+static int octeon_irq_force_ciu_mapping(struct irq_domain *domain,
+					int irq, int line, int bit)
+{
+	return irq_domain_associate(domain, irq, line << 6 | bit);
 }
 
 static int octeon_coreid_for_cpu(int cpu)
@@ -202,9 +226,10 @@
 #ifdef CONFIG_SMP
 	int cpu;
 	int weight = cpumask_weight(data->affinity);
+	struct octeon_ciu_chip_data *cd = irq_data_get_irq_chip_data(data);
 
 	if (weight > 1) {
-		cpu = smp_processor_id();
+		cpu = cd->current_cpu;
 		for (;;) {
 			cpu = cpumask_next(cpu, data->affinity);
 			if (cpu >= nr_cpu_ids) {
@@ -219,6 +244,7 @@
 	} else {
 		cpu = smp_processor_id();
 	}
+	cd->current_cpu = cpu;
 	return cpu;
 #else
 	return smp_processor_id();
@@ -231,15 +257,15 @@
 	int coreid = octeon_coreid_for_cpu(cpu);
 	unsigned long *pen;
 	unsigned long flags;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 	raw_spinlock_t *lock = &per_cpu(octeon_irq_ciu_spinlock, cpu);
 
-	cd.p = irq_data_get_irq_chip_data(data);
+	cd = irq_data_get_irq_chip_data(data);
 
 	raw_spin_lock_irqsave(lock, flags);
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
-		__set_bit(cd.s.bit, pen);
+		__set_bit(cd->bit, pen);
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
 		 * enabling the irq.
@@ -248,7 +274,7 @@
 		cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), *pen);
 	} else {
 		pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
-		__set_bit(cd.s.bit, pen);
+		__set_bit(cd->bit, pen);
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
 		 * enabling the irq.
@@ -263,15 +289,15 @@
 {
 	unsigned long *pen;
 	unsigned long flags;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 	raw_spinlock_t *lock = this_cpu_ptr(&octeon_irq_ciu_spinlock);
 
-	cd.p = irq_data_get_irq_chip_data(data);
+	cd = irq_data_get_irq_chip_data(data);
 
 	raw_spin_lock_irqsave(lock, flags);
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		pen = this_cpu_ptr(&octeon_irq_ciu0_en_mirror);
-		__set_bit(cd.s.bit, pen);
+		__set_bit(cd->bit, pen);
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
 		 * enabling the irq.
@@ -280,7 +306,7 @@
 		cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2), *pen);
 	} else {
 		pen = this_cpu_ptr(&octeon_irq_ciu1_en_mirror);
-		__set_bit(cd.s.bit, pen);
+		__set_bit(cd->bit, pen);
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
 		 * enabling the irq.
@@ -295,15 +321,15 @@
 {
 	unsigned long *pen;
 	unsigned long flags;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 	raw_spinlock_t *lock = this_cpu_ptr(&octeon_irq_ciu_spinlock);
 
-	cd.p = irq_data_get_irq_chip_data(data);
+	cd = irq_data_get_irq_chip_data(data);
 
 	raw_spin_lock_irqsave(lock, flags);
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		pen = this_cpu_ptr(&octeon_irq_ciu0_en_mirror);
-		__clear_bit(cd.s.bit, pen);
+		__clear_bit(cd->bit, pen);
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
 		 * enabling the irq.
@@ -312,7 +338,7 @@
 		cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2), *pen);
 	} else {
 		pen = this_cpu_ptr(&octeon_irq_ciu1_en_mirror);
-		__clear_bit(cd.s.bit, pen);
+		__clear_bit(cd->bit, pen);
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
 		 * enabling the irq.
@@ -328,27 +354,27 @@
 	unsigned long flags;
 	unsigned long *pen;
 	int cpu;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 	raw_spinlock_t *lock;
 
-	cd.p = irq_data_get_irq_chip_data(data);
+	cd = irq_data_get_irq_chip_data(data);
 
 	for_each_online_cpu(cpu) {
 		int coreid = octeon_coreid_for_cpu(cpu);
 		lock = &per_cpu(octeon_irq_ciu_spinlock, cpu);
-		if (cd.s.line == 0)
+		if (cd->line == 0)
 			pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
 		else
 			pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
 
 		raw_spin_lock_irqsave(lock, flags);
-		__clear_bit(cd.s.bit, pen);
+		__clear_bit(cd->bit, pen);
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
 		 * enabling the irq.
 		 */
 		wmb();
-		if (cd.s.line == 0)
+		if (cd->line == 0)
 			cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), *pen);
 		else
 			cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), *pen);
@@ -361,27 +387,27 @@
 	unsigned long flags;
 	unsigned long *pen;
 	int cpu;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 	raw_spinlock_t *lock;
 
-	cd.p = irq_data_get_irq_chip_data(data);
+	cd = irq_data_get_irq_chip_data(data);
 
 	for_each_online_cpu(cpu) {
 		int coreid = octeon_coreid_for_cpu(cpu);
 		lock = &per_cpu(octeon_irq_ciu_spinlock, cpu);
-		if (cd.s.line == 0)
+		if (cd->line == 0)
 			pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
 		else
 			pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
 
 		raw_spin_lock_irqsave(lock, flags);
-		__set_bit(cd.s.bit, pen);
+		__set_bit(cd->bit, pen);
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
 		 * enabling the irq.
 		 */
 		wmb();
-		if (cd.s.line == 0)
+		if (cd->line == 0)
 			cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), *pen);
 		else
 			cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), *pen);
@@ -397,45 +423,106 @@
 {
 	u64 mask;
 	int cpu = next_cpu_for_irq(data);
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
 	/*
 	 * Called under the desc lock, so these should never get out
 	 * of sync.
 	 */
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		int index = octeon_coreid_for_cpu(cpu) * 2;
-		set_bit(cd.s.bit, &per_cpu(octeon_irq_ciu0_en_mirror, cpu));
+		set_bit(cd->bit, &per_cpu(octeon_irq_ciu0_en_mirror, cpu));
 		cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
 	} else {
 		int index = octeon_coreid_for_cpu(cpu) * 2 + 1;
-		set_bit(cd.s.bit, &per_cpu(octeon_irq_ciu1_en_mirror, cpu));
+		set_bit(cd->bit, &per_cpu(octeon_irq_ciu1_en_mirror, cpu));
 		cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
 	}
 }
 
 /*
+ * Enable the irq in the sum2 registers.
+ */
+static void octeon_irq_ciu_enable_sum2(struct irq_data *data)
+{
+	u64 mask;
+	int cpu = next_cpu_for_irq(data);
+	int index = octeon_coreid_for_cpu(cpu);
+	struct octeon_ciu_chip_data *cd;
+
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
+
+	cvmx_write_csr(CVMX_CIU_EN2_PPX_IP4_W1S(index), mask);
+}
+
+/*
+ * Disable the irq in the sum2 registers.
+ */
+static void octeon_irq_ciu_disable_local_sum2(struct irq_data *data)
+{
+	u64 mask;
+	int cpu = next_cpu_for_irq(data);
+	int index = octeon_coreid_for_cpu(cpu);
+	struct octeon_ciu_chip_data *cd;
+
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
+
+	cvmx_write_csr(CVMX_CIU_EN2_PPX_IP4_W1C(index), mask);
+}
+
+static void octeon_irq_ciu_ack_sum2(struct irq_data *data)
+{
+	u64 mask;
+	int cpu = next_cpu_for_irq(data);
+	int index = octeon_coreid_for_cpu(cpu);
+	struct octeon_ciu_chip_data *cd;
+
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
+
+	cvmx_write_csr(CVMX_CIU_SUM2_PPX_IP4(index), mask);
+}
+
+static void octeon_irq_ciu_disable_all_sum2(struct irq_data *data)
+{
+	int cpu;
+	struct octeon_ciu_chip_data *cd;
+	u64 mask;
+
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
+
+	for_each_online_cpu(cpu) {
+		int coreid = octeon_coreid_for_cpu(cpu);
+
+		cvmx_write_csr(CVMX_CIU_EN2_PPX_IP4_W1C(coreid), mask);
+	}
+}
+
+/*
  * Enable the irq on the current CPU for chips that
  * have the EN*_W1{S,C} registers.
  */
 static void octeon_irq_ciu_enable_local_v2(struct irq_data *data)
 {
 	u64 mask;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		int index = cvmx_get_core_num() * 2;
-		set_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu0_en_mirror));
+		set_bit(cd->bit, this_cpu_ptr(&octeon_irq_ciu0_en_mirror));
 		cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
 	} else {
 		int index = cvmx_get_core_num() * 2 + 1;
-		set_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu1_en_mirror));
+		set_bit(cd->bit, this_cpu_ptr(&octeon_irq_ciu1_en_mirror));
 		cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
 	}
 }
@@ -443,18 +530,18 @@
 static void octeon_irq_ciu_disable_local_v2(struct irq_data *data)
 {
 	u64 mask;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		int index = cvmx_get_core_num() * 2;
-		clear_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu0_en_mirror));
+		clear_bit(cd->bit, this_cpu_ptr(&octeon_irq_ciu0_en_mirror));
 		cvmx_write_csr(CVMX_CIU_INTX_EN0_W1C(index), mask);
 	} else {
 		int index = cvmx_get_core_num() * 2 + 1;
-		clear_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu1_en_mirror));
+		clear_bit(cd->bit, this_cpu_ptr(&octeon_irq_ciu1_en_mirror));
 		cvmx_write_csr(CVMX_CIU_INTX_EN1_W1C(index), mask);
 	}
 }
@@ -465,12 +552,12 @@
 static void octeon_irq_ciu_ack(struct irq_data *data)
 {
 	u64 mask;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		int index = cvmx_get_core_num() * 2;
 		cvmx_write_csr(CVMX_CIU_INTX_SUM0(index), mask);
 	} else {
@@ -486,21 +573,23 @@
 {
 	int cpu;
 	u64 mask;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		for_each_online_cpu(cpu) {
 			int index = octeon_coreid_for_cpu(cpu) * 2;
-			clear_bit(cd.s.bit, &per_cpu(octeon_irq_ciu0_en_mirror, cpu));
+			clear_bit(cd->bit,
+				&per_cpu(octeon_irq_ciu0_en_mirror, cpu));
 			cvmx_write_csr(CVMX_CIU_INTX_EN0_W1C(index), mask);
 		}
 	} else {
 		for_each_online_cpu(cpu) {
 			int index = octeon_coreid_for_cpu(cpu) * 2 + 1;
-			clear_bit(cd.s.bit, &per_cpu(octeon_irq_ciu1_en_mirror, cpu));
+			clear_bit(cd->bit,
+				&per_cpu(octeon_irq_ciu1_en_mirror, cpu));
 			cvmx_write_csr(CVMX_CIU_INTX_EN1_W1C(index), mask);
 		}
 	}
@@ -514,21 +603,23 @@
 {
 	int cpu;
 	u64 mask;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		for_each_online_cpu(cpu) {
 			int index = octeon_coreid_for_cpu(cpu) * 2;
-			set_bit(cd.s.bit, &per_cpu(octeon_irq_ciu0_en_mirror, cpu));
+			set_bit(cd->bit,
+				&per_cpu(octeon_irq_ciu0_en_mirror, cpu));
 			cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
 		}
 	} else {
 		for_each_online_cpu(cpu) {
 			int index = octeon_coreid_for_cpu(cpu) * 2 + 1;
-			set_bit(cd.s.bit, &per_cpu(octeon_irq_ciu1_en_mirror, cpu));
+			set_bit(cd->bit,
+				&per_cpu(octeon_irq_ciu1_en_mirror, cpu));
 			cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
 		}
 	}
@@ -537,10 +628,10 @@
 static void octeon_irq_gpio_setup(struct irq_data *data)
 {
 	union cvmx_gpio_bit_cfgx cfg;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 	u32 t = irqd_get_trigger_type(data);
 
-	cd.p = irq_data_get_irq_chip_data(data);
+	cd = irq_data_get_irq_chip_data(data);
 
 	cfg.u64 = 0;
 	cfg.s.int_en = 1;
@@ -551,7 +642,7 @@
 	cfg.s.fil_cnt = 7;
 	cfg.s.fil_sel = 3;
 
-	cvmx_write_csr(CVMX_GPIO_BIT_CFGX(cd.s.gpio_line), cfg.u64);
+	cvmx_write_csr(CVMX_GPIO_BIT_CFGX(cd->gpio_line), cfg.u64);
 }
 
 static void octeon_irq_ciu_enable_gpio_v2(struct irq_data *data)
@@ -576,36 +667,36 @@
 
 static void octeon_irq_ciu_disable_gpio_v2(struct irq_data *data)
 {
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	cvmx_write_csr(CVMX_GPIO_BIT_CFGX(cd.s.gpio_line), 0);
+	cd = irq_data_get_irq_chip_data(data);
+	cvmx_write_csr(CVMX_GPIO_BIT_CFGX(cd->gpio_line), 0);
 
 	octeon_irq_ciu_disable_all_v2(data);
 }
 
 static void octeon_irq_ciu_disable_gpio(struct irq_data *data)
 {
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	cvmx_write_csr(CVMX_GPIO_BIT_CFGX(cd.s.gpio_line), 0);
+	cd = irq_data_get_irq_chip_data(data);
+	cvmx_write_csr(CVMX_GPIO_BIT_CFGX(cd->gpio_line), 0);
 
 	octeon_irq_ciu_disable_all(data);
 }
 
 static void octeon_irq_ciu_gpio_ack(struct irq_data *data)
 {
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 	u64 mask;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.gpio_line);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->gpio_line);
 
 	cvmx_write_csr(CVMX_GPIO_INT_CLR, mask);
 }
 
-static void octeon_irq_handle_gpio(unsigned int irq, struct irq_desc *desc)
+static void octeon_irq_handle_trigger(unsigned int irq, struct irq_desc *desc)
 {
 	if (irq_get_trigger_type(irq) & IRQ_TYPE_EDGE_BOTH)
 		handle_edge_irq(irq, desc);
@@ -644,11 +735,11 @@
 	int cpu;
 	bool enable_one = !irqd_irq_disabled(data) && !irqd_irq_masked(data);
 	unsigned long flags;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 	unsigned long *pen;
 	raw_spinlock_t *lock;
 
-	cd.p = irq_data_get_irq_chip_data(data);
+	cd = irq_data_get_irq_chip_data(data);
 
 	/*
 	 * For non-v2 CIU, we will allow only single CPU affinity.
@@ -668,16 +759,16 @@
 		lock = &per_cpu(octeon_irq_ciu_spinlock, cpu);
 		raw_spin_lock_irqsave(lock, flags);
 
-		if (cd.s.line == 0)
+		if (cd->line == 0)
 			pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
 		else
 			pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
 
 		if (cpumask_test_cpu(cpu, dest) && enable_one) {
 			enable_one = 0;
-			__set_bit(cd.s.bit, pen);
+			__set_bit(cd->bit, pen);
 		} else {
-			__clear_bit(cd.s.bit, pen);
+			__clear_bit(cd->bit, pen);
 		}
 		/*
 		 * Must be visible to octeon_irq_ip{2,3}_ciu() before
@@ -685,7 +776,7 @@
 		 */
 		wmb();
 
-		if (cd.s.line == 0)
+		if (cd->line == 0)
 			cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), *pen);
 		else
 			cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), *pen);
@@ -706,24 +797,24 @@
 	int cpu;
 	bool enable_one = !irqd_irq_disabled(data) && !irqd_irq_masked(data);
 	u64 mask;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
 	if (!enable_one)
 		return 0;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << cd.s.bit;
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << cd->bit;
 
-	if (cd.s.line == 0) {
+	if (cd->line == 0) {
 		for_each_online_cpu(cpu) {
 			unsigned long *pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
 			int index = octeon_coreid_for_cpu(cpu) * 2;
 			if (cpumask_test_cpu(cpu, dest) && enable_one) {
 				enable_one = false;
-				set_bit(cd.s.bit, pen);
+				set_bit(cd->bit, pen);
 				cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
 			} else {
-				clear_bit(cd.s.bit, pen);
+				clear_bit(cd->bit, pen);
 				cvmx_write_csr(CVMX_CIU_INTX_EN0_W1C(index), mask);
 			}
 		}
@@ -733,16 +824,44 @@
 			int index = octeon_coreid_for_cpu(cpu) * 2 + 1;
 			if (cpumask_test_cpu(cpu, dest) && enable_one) {
 				enable_one = false;
-				set_bit(cd.s.bit, pen);
+				set_bit(cd->bit, pen);
 				cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
 			} else {
-				clear_bit(cd.s.bit, pen);
+				clear_bit(cd->bit, pen);
 				cvmx_write_csr(CVMX_CIU_INTX_EN1_W1C(index), mask);
 			}
 		}
 	}
 	return 0;
 }
+
+static int octeon_irq_ciu_set_affinity_sum2(struct irq_data *data,
+					    const struct cpumask *dest,
+					    bool force)
+{
+	int cpu;
+	bool enable_one = !irqd_irq_disabled(data) && !irqd_irq_masked(data);
+	u64 mask;
+	struct octeon_ciu_chip_data *cd;
+
+	if (!enable_one)
+		return 0;
+
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << cd->bit;
+
+	for_each_online_cpu(cpu) {
+		int index = octeon_coreid_for_cpu(cpu);
+
+		if (cpumask_test_cpu(cpu, dest) && enable_one) {
+			enable_one = false;
+			cvmx_write_csr(CVMX_CIU_EN2_PPX_IP4_W1S(index), mask);
+		} else {
+			cvmx_write_csr(CVMX_CIU_EN2_PPX_IP4_W1C(index), mask);
+		}
+	}
+	return 0;
+}
 #endif
 
 /*
@@ -752,6 +871,18 @@
 	.name = "CIU",
 	.irq_enable = octeon_irq_ciu_enable_v2,
 	.irq_disable = octeon_irq_ciu_disable_all_v2,
+	.irq_mask = octeon_irq_ciu_disable_local_v2,
+	.irq_unmask = octeon_irq_ciu_enable_v2,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu_set_affinity_v2,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+static struct irq_chip octeon_irq_chip_ciu_v2_edge = {
+	.name = "CIU",
+	.irq_enable = octeon_irq_ciu_enable_v2,
+	.irq_disable = octeon_irq_ciu_disable_all_v2,
 	.irq_ack = octeon_irq_ciu_ack,
 	.irq_mask = octeon_irq_ciu_disable_local_v2,
 	.irq_unmask = octeon_irq_ciu_enable_v2,
@@ -761,10 +892,50 @@
 #endif
 };
 
+/*
+ * Newer octeon chips have support for lockless CIU operation.
+ */
+static struct irq_chip octeon_irq_chip_ciu_sum2 = {
+	.name = "CIU",
+	.irq_enable = octeon_irq_ciu_enable_sum2,
+	.irq_disable = octeon_irq_ciu_disable_all_sum2,
+	.irq_mask = octeon_irq_ciu_disable_local_sum2,
+	.irq_unmask = octeon_irq_ciu_enable_sum2,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu_set_affinity_sum2,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+static struct irq_chip octeon_irq_chip_ciu_sum2_edge = {
+	.name = "CIU",
+	.irq_enable = octeon_irq_ciu_enable_sum2,
+	.irq_disable = octeon_irq_ciu_disable_all_sum2,
+	.irq_ack = octeon_irq_ciu_ack_sum2,
+	.irq_mask = octeon_irq_ciu_disable_local_sum2,
+	.irq_unmask = octeon_irq_ciu_enable_sum2,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu_set_affinity_sum2,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
 static struct irq_chip octeon_irq_chip_ciu = {
 	.name = "CIU",
 	.irq_enable = octeon_irq_ciu_enable,
 	.irq_disable = octeon_irq_ciu_disable_all,
+	.irq_mask = octeon_irq_ciu_disable_local,
+	.irq_unmask = octeon_irq_ciu_enable,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu_set_affinity,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+static struct irq_chip octeon_irq_chip_ciu_edge = {
+	.name = "CIU",
+	.irq_enable = octeon_irq_ciu_enable,
+	.irq_disable = octeon_irq_ciu_disable_all,
 	.irq_ack = octeon_irq_ciu_ack,
 	.irq_mask = octeon_irq_ciu_disable_local,
 	.irq_unmask = octeon_irq_ciu_enable,
@@ -970,11 +1141,12 @@
 			       unsigned int *out_type)
 {
 	unsigned int ciu, bit;
+	struct octeon_irq_ciu_domain_data *dd = d->host_data;
 
 	ciu = intspec[0];
 	bit = intspec[1];
 
-	if (ciu > 1 || bit > 63)
+	if (ciu >= dd->num_sum || bit > 63)
 		return -EINVAL;
 
 	*out_hwirq = (ciu << 6) | bit;
@@ -984,6 +1156,7 @@
 }
 
 static struct irq_chip *octeon_irq_ciu_chip;
+static struct irq_chip *octeon_irq_ciu_chip_edge;
 static struct irq_chip *octeon_irq_gpio_chip;
 
 static bool octeon_irq_virq_in_range(unsigned int virq)
@@ -999,8 +1172,10 @@
 static int octeon_irq_ciu_map(struct irq_domain *d,
 			      unsigned int virq, irq_hw_number_t hw)
 {
+	int rv;
 	unsigned int line = hw >> 6;
 	unsigned int bit = hw & 63;
+	struct octeon_irq_ciu_domain_data *dd = d->host_data;
 
 	if (!octeon_irq_virq_in_range(virq))
 		return -EINVAL;
@@ -1009,54 +1184,61 @@
 	if (line == 0 && bit >= 16 && bit <32)
 		return 0;
 
-	if (line > 1 || octeon_irq_ciu_to_irq[line][bit] != 0)
+	if (line >= dd->num_sum || octeon_irq_ciu_to_irq[line][bit] != 0)
 		return -EINVAL;
 
-	if (octeon_irq_ciu_is_edge(line, bit))
-		octeon_irq_set_ciu_mapping(virq, line, bit, 0,
-					   octeon_irq_ciu_chip,
-					   handle_edge_irq);
-	else
-		octeon_irq_set_ciu_mapping(virq, line, bit, 0,
-					   octeon_irq_ciu_chip,
-					   handle_level_irq);
-
-	return 0;
+	if (line == 2) {
+		if (octeon_irq_ciu_is_edge(line, bit))
+			rv = octeon_irq_set_ciu_mapping(virq, line, bit, 0,
+				&octeon_irq_chip_ciu_sum2_edge,
+				handle_edge_irq);
+		else
+			rv = octeon_irq_set_ciu_mapping(virq, line, bit, 0,
+				&octeon_irq_chip_ciu_sum2,
+				handle_level_irq);
+	} else {
+		if (octeon_irq_ciu_is_edge(line, bit))
+			rv = octeon_irq_set_ciu_mapping(virq, line, bit, 0,
+				octeon_irq_ciu_chip_edge,
+				handle_edge_irq);
+		else
+			rv = octeon_irq_set_ciu_mapping(virq, line, bit, 0,
+				octeon_irq_ciu_chip,
+				handle_level_irq);
+	}
+	return rv;
 }
 
-static int octeon_irq_gpio_map_common(struct irq_domain *d,
-				      unsigned int virq, irq_hw_number_t hw,
-				      int line_limit, struct irq_chip *chip)
+static int octeon_irq_gpio_map(struct irq_domain *d,
+			       unsigned int virq, irq_hw_number_t hw)
 {
 	struct octeon_irq_gpio_domain_data *gpiod = d->host_data;
 	unsigned int line, bit;
+	int r;
 
 	if (!octeon_irq_virq_in_range(virq))
 		return -EINVAL;
 
 	line = (hw + gpiod->base_hwirq) >> 6;
 	bit = (hw + gpiod->base_hwirq) & 63;
-	if (line > line_limit || octeon_irq_ciu_to_irq[line][bit] != 0)
+	if (line > ARRAY_SIZE(octeon_irq_ciu_to_irq) ||
+		octeon_irq_ciu_to_irq[line][bit] != 0)
 		return -EINVAL;
 
-	octeon_irq_set_ciu_mapping(virq, line, bit, hw,
-				   chip, octeon_irq_handle_gpio);
-	return 0;
-}
-
-static int octeon_irq_gpio_map(struct irq_domain *d,
-			       unsigned int virq, irq_hw_number_t hw)
-{
-	return octeon_irq_gpio_map_common(d, virq, hw, 1, octeon_irq_gpio_chip);
+	r = octeon_irq_set_ciu_mapping(virq, line, bit, hw,
+		octeon_irq_gpio_chip, octeon_irq_handle_trigger);
+	return r;
 }
 
 static struct irq_domain_ops octeon_irq_domain_ciu_ops = {
 	.map = octeon_irq_ciu_map,
+	.unmap = octeon_irq_free_cd,
 	.xlate = octeon_irq_ciu_xlat,
 };
 
 static struct irq_domain_ops octeon_irq_domain_gpio_ops = {
 	.map = octeon_irq_gpio_map,
+	.unmap = octeon_irq_free_cd,
 	.xlate = octeon_irq_gpio_xlat,
 };
 
@@ -1095,6 +1277,26 @@
 	}
 }
 
+static void octeon_irq_ip4_ciu(void)
+{
+	int coreid = cvmx_get_core_num();
+	u64 ciu_sum = cvmx_read_csr(CVMX_CIU_SUM2_PPX_IP4(coreid));
+	u64 ciu_en = cvmx_read_csr(CVMX_CIU_EN2_PPX_IP4(coreid));
+
+	ciu_sum &= ciu_en;
+	if (likely(ciu_sum)) {
+		int bit = fls64(ciu_sum) - 1;
+		int irq = octeon_irq_ciu_to_irq[2][bit];
+
+		if (likely(irq))
+			do_IRQ(irq);
+		else
+			spurious_interrupt();
+	} else {
+		spurious_interrupt();
+	}
+}
+
 static bool octeon_irq_use_ip4;
 
 static void octeon_irq_local_enable_ip4(void *arg)
@@ -1176,7 +1378,10 @@
 
 	/* Enable the CIU lines */
 	set_c0_status(STATUSF_IP3 | STATUSF_IP2);
-	clear_c0_status(STATUSF_IP4);
+	if (octeon_irq_use_ip4)
+		set_c0_status(STATUSF_IP4);
+	else
+		clear_c0_status(STATUSF_IP4);
 }
 
 static void octeon_irq_setup_secondary_ciu2(void)
@@ -1192,95 +1397,194 @@
 		clear_c0_status(STATUSF_IP4);
 }
 
-static void __init octeon_irq_init_ciu(void)
+static int __init octeon_irq_init_ciu(
+	struct device_node *ciu_node, struct device_node *parent)
 {
-	unsigned int i;
+	unsigned int i, r;
 	struct irq_chip *chip;
+	struct irq_chip *chip_edge;
 	struct irq_chip *chip_mbox;
 	struct irq_chip *chip_wd;
-	struct device_node *gpio_node;
-	struct device_node *ciu_node;
 	struct irq_domain *ciu_domain = NULL;
+	struct octeon_irq_ciu_domain_data *dd;
+
+	dd = kzalloc(sizeof(*dd), GFP_KERNEL);
+	if (!dd)
+		return -ENOMEM;
 
 	octeon_irq_init_ciu_percpu();
 	octeon_irq_setup_secondary = octeon_irq_setup_secondary_ciu;
 
 	octeon_irq_ip2 = octeon_irq_ip2_ciu;
 	octeon_irq_ip3 = octeon_irq_ip3_ciu;
+	if ((OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3())
+		&& !OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		octeon_irq_ip4 =  octeon_irq_ip4_ciu;
+		dd->num_sum = 3;
+		octeon_irq_use_ip4 = true;
+	} else {
+		octeon_irq_ip4 = octeon_irq_ip4_mask;
+		dd->num_sum = 2;
+		octeon_irq_use_ip4 = false;
+	}
 	if (OCTEON_IS_MODEL(OCTEON_CN58XX_PASS2_X) ||
 	    OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_X) ||
 	    OCTEON_IS_MODEL(OCTEON_CN52XX_PASS2_X) ||
-	    OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+	    OCTEON_IS_OCTEON2() || OCTEON_IS_OCTEON3()) {
 		chip = &octeon_irq_chip_ciu_v2;
+		chip_edge = &octeon_irq_chip_ciu_v2_edge;
 		chip_mbox = &octeon_irq_chip_ciu_mbox_v2;
 		chip_wd = &octeon_irq_chip_ciu_wd_v2;
 		octeon_irq_gpio_chip = &octeon_irq_chip_ciu_gpio_v2;
 	} else {
 		chip = &octeon_irq_chip_ciu;
+		chip_edge = &octeon_irq_chip_ciu_edge;
 		chip_mbox = &octeon_irq_chip_ciu_mbox;
 		chip_wd = &octeon_irq_chip_ciu_wd;
 		octeon_irq_gpio_chip = &octeon_irq_chip_ciu_gpio;
 	}
 	octeon_irq_ciu_chip = chip;
-	octeon_irq_ip4 = octeon_irq_ip4_mask;
+	octeon_irq_ciu_chip_edge = chip_edge;
 
 	/* Mips internal */
 	octeon_irq_init_core();
 
-	gpio_node = of_find_compatible_node(NULL, NULL, "cavium,octeon-3860-gpio");
-	if (gpio_node) {
-		struct octeon_irq_gpio_domain_data *gpiod;
-
-		gpiod = kzalloc(sizeof(*gpiod), GFP_KERNEL);
-		if (gpiod) {
-			/* gpio domain host_data is the base hwirq number. */
-			gpiod->base_hwirq = 16;
-			irq_domain_add_linear(gpio_node, 16, &octeon_irq_domain_gpio_ops, gpiod);
-			of_node_put(gpio_node);
-		} else
-			pr_warn("Cannot allocate memory for GPIO irq_domain.\n");
-	} else
-		pr_warn("Cannot find device node for cavium,octeon-3860-gpio.\n");
-
-	ciu_node = of_find_compatible_node(NULL, NULL, "cavium,octeon-3860-ciu");
-	if (ciu_node) {
-		ciu_domain = irq_domain_add_tree(ciu_node, &octeon_irq_domain_ciu_ops, NULL);
-		irq_set_default_host(ciu_domain);
-		of_node_put(ciu_node);
-	} else
-		panic("Cannot find device node for cavium,octeon-3860-ciu.");
+	ciu_domain = irq_domain_add_tree(
+		ciu_node, &octeon_irq_domain_ciu_ops, dd);
+	irq_set_default_host(ciu_domain);
 
 	/* CIU_0 */
-	for (i = 0; i < 16; i++)
-		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_WORKQ0, 0, i + 0);
+	for (i = 0; i < 16; i++) {
+		r = octeon_irq_force_ciu_mapping(
+			ciu_domain, i + OCTEON_IRQ_WORKQ0, 0, i + 0);
+		if (r)
+			goto err;
+	}
 
-	octeon_irq_set_ciu_mapping(OCTEON_IRQ_MBOX0, 0, 32, 0, chip_mbox, handle_percpu_irq);
-	octeon_irq_set_ciu_mapping(OCTEON_IRQ_MBOX1, 0, 33, 0, chip_mbox, handle_percpu_irq);
+	r = octeon_irq_set_ciu_mapping(
+		OCTEON_IRQ_MBOX0, 0, 32, 0, chip_mbox, handle_percpu_irq);
+	if (r)
+		goto err;
+	r = octeon_irq_set_ciu_mapping(
+		OCTEON_IRQ_MBOX1, 0, 33, 0, chip_mbox, handle_percpu_irq);
+	if (r)
+		goto err;
 
-	for (i = 0; i < 4; i++)
-		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_PCI_INT0, 0, i + 36);
-	for (i = 0; i < 4; i++)
-		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_PCI_MSI0, 0, i + 40);
+	for (i = 0; i < 4; i++) {
+		r = octeon_irq_force_ciu_mapping(
+			ciu_domain, i + OCTEON_IRQ_PCI_INT0, 0, i + 36);
+		if (r)
+			goto err;
+	}
+	for (i = 0; i < 4; i++) {
+		r = octeon_irq_force_ciu_mapping(
+			ciu_domain, i + OCTEON_IRQ_PCI_MSI0, 0, i + 40);
+		if (r)
+			goto err;
+	}
 
-	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI, 0, 45);
-	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_RML, 0, 46);
-	for (i = 0; i < 4; i++)
-		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_TIMER0, 0, i + 52);
+	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI, 0, 45);
+	if (r)
+		goto err;
 
-	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 0, 56);
-	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI2, 0, 59);
+	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_RML, 0, 46);
+	if (r)
+		goto err;
+
+	for (i = 0; i < 4; i++) {
+		r = octeon_irq_force_ciu_mapping(
+			ciu_domain, i + OCTEON_IRQ_TIMER0, 0, i + 52);
+		if (r)
+			goto err;
+	}
+
+	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 0, 56);
+	if (r)
+		goto err;
+
+	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_TWSI2, 0, 59);
+	if (r)
+		goto err;
 
 	/* CIU_1 */
-	for (i = 0; i < 16; i++)
-		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_WDOG0, 1, i + 0, 0, chip_wd, handle_level_irq);
+	for (i = 0; i < 16; i++) {
+		r = octeon_irq_set_ciu_mapping(
+			i + OCTEON_IRQ_WDOG0, 1, i + 0, 0, chip_wd,
+			handle_level_irq);
+		if (r)
+			goto err;
+	}
 
-	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB1, 1, 17);
+	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB1, 1, 17);
+	if (r)
+		goto err;
 
 	/* Enable the CIU lines */
 	set_c0_status(STATUSF_IP3 | STATUSF_IP2);
-	clear_c0_status(STATUSF_IP4);
+	if (octeon_irq_use_ip4)
+		set_c0_status(STATUSF_IP4);
+	else
+		clear_c0_status(STATUSF_IP4);
+
+	return 0;
+err:
+	return r;
 }
 
+static int __init octeon_irq_init_gpio(
+	struct device_node *gpio_node, struct device_node *parent)
+{
+	struct octeon_irq_gpio_domain_data *gpiod;
+	u32 interrupt_cells;
+	unsigned int base_hwirq;
+	int r;
+
+	r = of_property_read_u32(parent, "#interrupt-cells", &interrupt_cells);
+	if (r)
+		return r;
+
+	if (interrupt_cells == 1) {
+		u32 v;
+
+		r = of_property_read_u32_index(gpio_node, "interrupts", 0, &v);
+		if (r) {
+			pr_warn("No \"interrupts\" property.\n");
+			return r;
+		}
+		base_hwirq = v;
+	} else if (interrupt_cells == 2) {
+		u32 v0, v1;
+
+		r = of_property_read_u32_index(gpio_node, "interrupts", 0, &v0);
+		if (r) {
+			pr_warn("No \"interrupts\" property.\n");
+			return r;
+		}
+		r = of_property_read_u32_index(gpio_node, "interrupts", 1, &v1);
+		if (r) {
+			pr_warn("No \"interrupts\" property.\n");
+			return r;
+		}
+		base_hwirq = (v0 << 6) | v1;
+	} else {
+		pr_warn("Bad \"#interrupt-cells\" property: %u\n",
+			interrupt_cells);
+		return -EINVAL;
+	}
+
+	gpiod = kzalloc(sizeof(*gpiod), GFP_KERNEL);
+	if (gpiod) {
+		/* gpio domain host_data is the base hwirq number. */
+		gpiod->base_hwirq = base_hwirq;
+		irq_domain_add_linear(
+			gpio_node, 16, &octeon_irq_domain_gpio_ops, gpiod);
+	} else {
+		pr_warn("Cannot allocate memory for GPIO irq_domain.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
 /*
  * Watchdog interrupts are special.  They are associated with a single
  * core, so we hardwire the affinity to that core.
@@ -1290,12 +1594,13 @@
 	u64 mask;
 	u64 en_addr;
 	int coreid = data->irq - OCTEON_IRQ_WDOG0;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1S(coreid) + (0x1000ull * cd.s.line);
+	en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1S(coreid) +
+		(0x1000ull * cd->line);
 	cvmx_write_csr(en_addr, mask);
 
 }
@@ -1306,12 +1611,13 @@
 	u64 en_addr;
 	int cpu = next_cpu_for_irq(data);
 	int coreid = octeon_coreid_for_cpu(cpu);
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1S(coreid) + (0x1000ull * cd.s.line);
+	en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1S(coreid) +
+		(0x1000ull * cd->line);
 	cvmx_write_csr(en_addr, mask);
 }
 
@@ -1320,12 +1626,13 @@
 	u64 mask;
 	u64 en_addr;
 	int coreid = cvmx_get_core_num();
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1S(coreid) + (0x1000ull * cd.s.line);
+	en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1S(coreid) +
+		(0x1000ull * cd->line);
 	cvmx_write_csr(en_addr, mask);
 
 }
@@ -1335,12 +1642,13 @@
 	u64 mask;
 	u64 en_addr;
 	int coreid = cvmx_get_core_num();
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1C(coreid) + (0x1000ull * cd.s.line);
+	en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1C(coreid) +
+		(0x1000ull * cd->line);
 	cvmx_write_csr(en_addr, mask);
 
 }
@@ -1350,12 +1658,12 @@
 	u64 mask;
 	u64 en_addr;
 	int coreid = cvmx_get_core_num();
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
-	en_addr = CVMX_CIU2_RAW_PPX_IP2_WRKQ(coreid) + (0x1000ull * cd.s.line);
+	en_addr = CVMX_CIU2_RAW_PPX_IP2_WRKQ(coreid) + (0x1000ull * cd->line);
 	cvmx_write_csr(en_addr, mask);
 
 }
@@ -1364,13 +1672,14 @@
 {
 	int cpu;
 	u64 mask;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << (cd.s.bit);
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd->bit);
 
 	for_each_online_cpu(cpu) {
-		u64 en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1C(octeon_coreid_for_cpu(cpu)) + (0x1000ull * cd.s.line);
+		u64 en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1C(
+			octeon_coreid_for_cpu(cpu)) + (0x1000ull * cd->line);
 		cvmx_write_csr(en_addr, mask);
 	}
 }
@@ -1383,7 +1692,8 @@
 	mask = 1ull << (data->irq - OCTEON_IRQ_MBOX0);
 
 	for_each_online_cpu(cpu) {
-		u64 en_addr = CVMX_CIU2_EN_PPX_IP3_MBOX_W1S(octeon_coreid_for_cpu(cpu));
+		u64 en_addr = CVMX_CIU2_EN_PPX_IP3_MBOX_W1S(
+			octeon_coreid_for_cpu(cpu));
 		cvmx_write_csr(en_addr, mask);
 	}
 }
@@ -1396,7 +1706,8 @@
 	mask = 1ull << (data->irq - OCTEON_IRQ_MBOX0);
 
 	for_each_online_cpu(cpu) {
-		u64 en_addr = CVMX_CIU2_EN_PPX_IP3_MBOX_W1C(octeon_coreid_for_cpu(cpu));
+		u64 en_addr = CVMX_CIU2_EN_PPX_IP3_MBOX_W1C(
+			octeon_coreid_for_cpu(cpu));
 		cvmx_write_csr(en_addr, mask);
 	}
 }
@@ -1430,21 +1741,25 @@
 	int cpu;
 	bool enable_one = !irqd_irq_disabled(data) && !irqd_irq_masked(data);
 	u64 mask;
-	union octeon_ciu_chip_data cd;
+	struct octeon_ciu_chip_data *cd;
 
 	if (!enable_one)
 		return 0;
 
-	cd.p = irq_data_get_irq_chip_data(data);
-	mask = 1ull << cd.s.bit;
+	cd = irq_data_get_irq_chip_data(data);
+	mask = 1ull << cd->bit;
 
 	for_each_online_cpu(cpu) {
 		u64 en_addr;
 		if (cpumask_test_cpu(cpu, dest) && enable_one) {
 			enable_one = false;
-			en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1S(octeon_coreid_for_cpu(cpu)) + (0x1000ull * cd.s.line);
+			en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1S(
+				octeon_coreid_for_cpu(cpu)) +
+				(0x1000ull * cd->line);
 		} else {
-			en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1C(octeon_coreid_for_cpu(cpu)) + (0x1000ull * cd.s.line);
+			en_addr = CVMX_CIU2_EN_PPX_IP2_WRKQ_W1C(
+				octeon_coreid_for_cpu(cpu)) +
+				(0x1000ull * cd->line);
 		}
 		cvmx_write_csr(en_addr, mask);
 	}
@@ -1461,10 +1776,11 @@
 
 static void octeon_irq_ciu2_disable_gpio(struct irq_data *data)
 {
-	union octeon_ciu_chip_data cd;
-	cd.p = irq_data_get_irq_chip_data(data);
+	struct octeon_ciu_chip_data *cd;
 
-	cvmx_write_csr(CVMX_GPIO_BIT_CFGX(cd.s.gpio_line), 0);
+	cd = irq_data_get_irq_chip_data(data);
+
+	cvmx_write_csr(CVMX_GPIO_BIT_CFGX(cd->gpio_line), 0);
 
 	octeon_irq_ciu2_disable_all(data);
 }
@@ -1473,6 +1789,18 @@
 	.name = "CIU2-E",
 	.irq_enable = octeon_irq_ciu2_enable,
 	.irq_disable = octeon_irq_ciu2_disable_all,
+	.irq_mask = octeon_irq_ciu2_disable_local,
+	.irq_unmask = octeon_irq_ciu2_enable,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu2_set_affinity,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+static struct irq_chip octeon_irq_chip_ciu2_edge = {
+	.name = "CIU2-E",
+	.irq_enable = octeon_irq_ciu2_enable,
+	.irq_disable = octeon_irq_ciu2_disable_all,
 	.irq_ack = octeon_irq_ciu2_ack,
 	.irq_mask = octeon_irq_ciu2_disable_local,
 	.irq_unmask = octeon_irq_ciu2_enable,
@@ -1582,7 +1910,7 @@
 
 	if (octeon_irq_ciu2_is_edge(line, bit))
 		octeon_irq_set_ciu_mapping(virq, line, bit, 0,
-					   &octeon_irq_chip_ciu2,
+					   &octeon_irq_chip_ciu2_edge,
 					   handle_edge_irq);
 	else
 		octeon_irq_set_ciu_mapping(virq, line, bit, 0,
@@ -1591,22 +1919,13 @@
 
 	return 0;
 }
-static int octeon_irq_ciu2_gpio_map(struct irq_domain *d,
-				    unsigned int virq, irq_hw_number_t hw)
-{
-	return octeon_irq_gpio_map_common(d, virq, hw, 7, &octeon_irq_chip_ciu2_gpio);
-}
 
 static struct irq_domain_ops octeon_irq_domain_ciu2_ops = {
 	.map = octeon_irq_ciu2_map,
+	.unmap = octeon_irq_free_cd,
 	.xlate = octeon_irq_ciu2_xlat,
 };
 
-static struct irq_domain_ops octeon_irq_domain_ciu2_gpio_ops = {
-	.map = octeon_irq_ciu2_gpio_map,
-	.xlate = octeon_irq_gpio_xlat,
-};
-
 static void octeon_irq_ciu2(void)
 {
 	int line;
@@ -1674,16 +1993,16 @@
 	return;
 }
 
-static void __init octeon_irq_init_ciu2(void)
+static int __init octeon_irq_init_ciu2(
+	struct device_node *ciu_node, struct device_node *parent)
 {
-	unsigned int i;
-	struct device_node *gpio_node;
-	struct device_node *ciu_node;
+	unsigned int i, r;
 	struct irq_domain *ciu_domain = NULL;
 
 	octeon_irq_init_ciu2_percpu();
 	octeon_irq_setup_secondary = octeon_irq_setup_secondary_ciu2;
 
+	octeon_irq_gpio_chip = &octeon_irq_chip_ciu2_gpio;
 	octeon_irq_ip2 = octeon_irq_ciu2;
 	octeon_irq_ip3 = octeon_irq_ciu2_mbox;
 	octeon_irq_ip4 = octeon_irq_ip4_mask;
@@ -1691,47 +2010,49 @@
 	/* Mips internal */
 	octeon_irq_init_core();
 
-	gpio_node = of_find_compatible_node(NULL, NULL, "cavium,octeon-3860-gpio");
-	if (gpio_node) {
-		struct octeon_irq_gpio_domain_data *gpiod;
-
-		gpiod = kzalloc(sizeof(*gpiod), GFP_KERNEL);
-		if (gpiod) {
-			/* gpio domain host_data is the base hwirq number. */
-			gpiod->base_hwirq = 7 << 6;
-			irq_domain_add_linear(gpio_node, 16, &octeon_irq_domain_ciu2_gpio_ops, gpiod);
-			of_node_put(gpio_node);
-		} else
-			pr_warn("Cannot allocate memory for GPIO irq_domain.\n");
-	} else
-		pr_warn("Cannot find device node for cavium,octeon-3860-gpio.\n");
-
-	ciu_node = of_find_compatible_node(NULL, NULL, "cavium,octeon-6880-ciu2");
-	if (ciu_node) {
-		ciu_domain = irq_domain_add_tree(ciu_node, &octeon_irq_domain_ciu2_ops, NULL);
-		irq_set_default_host(ciu_domain);
-		of_node_put(ciu_node);
-	} else
-		panic("Cannot find device node for cavium,octeon-6880-ciu2.");
+	ciu_domain = irq_domain_add_tree(
+		ciu_node, &octeon_irq_domain_ciu2_ops, NULL);
+	irq_set_default_host(ciu_domain);
 
 	/* CUI2 */
-	for (i = 0; i < 64; i++)
-		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_WORKQ0, 0, i);
+	for (i = 0; i < 64; i++) {
+		r = octeon_irq_force_ciu_mapping(
+			ciu_domain, i + OCTEON_IRQ_WORKQ0, 0, i);
+		if (r)
+			goto err;
+	}
 
-	for (i = 0; i < 32; i++)
-		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_WDOG0, 1, i, 0,
-					   &octeon_irq_chip_ciu2_wd, handle_level_irq);
+	for (i = 0; i < 32; i++) {
+		r = octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_WDOG0, 1, i, 0,
+			&octeon_irq_chip_ciu2_wd, handle_level_irq);
+		if (r)
+			goto err;
+	}
 
-	for (i = 0; i < 4; i++)
-		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_TIMER0, 3, i + 8);
+	for (i = 0; i < 4; i++) {
+		r = octeon_irq_force_ciu_mapping(
+			ciu_domain, i + OCTEON_IRQ_TIMER0, 3, i + 8);
+		if (r)
+			goto err;
+	}
 
-	octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 3, 44);
+	r = octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 3, 44);
+	if (r)
+		goto err;
 
-	for (i = 0; i < 4; i++)
-		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_PCI_INT0, 4, i);
+	for (i = 0; i < 4; i++) {
+		r = octeon_irq_force_ciu_mapping(
+			ciu_domain, i + OCTEON_IRQ_PCI_INT0, 4, i);
+		if (r)
+			goto err;
+	}
 
-	for (i = 0; i < 4; i++)
-		octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_PCI_MSI0, 4, i + 8);
+	for (i = 0; i < 4; i++) {
+		r = octeon_irq_force_ciu_mapping(
+			ciu_domain, i + OCTEON_IRQ_PCI_MSI0, 4, i + 8);
+		if (r)
+			goto err;
+	}
 
 	irq_set_chip_and_handler(OCTEON_IRQ_MBOX0, &octeon_irq_chip_ciu2_mbox, handle_percpu_irq);
 	irq_set_chip_and_handler(OCTEON_IRQ_MBOX1, &octeon_irq_chip_ciu2_mbox, handle_percpu_irq);
@@ -1741,8 +2062,242 @@
 	/* Enable the CIU lines */
 	set_c0_status(STATUSF_IP3 | STATUSF_IP2);
 	clear_c0_status(STATUSF_IP4);
+	return 0;
+err:
+	return r;
 }
 
+struct octeon_irq_cib_host_data {
+	raw_spinlock_t lock;
+	u64 raw_reg;
+	u64 en_reg;
+	int max_bits;
+};
+
+struct octeon_irq_cib_chip_data {
+	struct octeon_irq_cib_host_data *host_data;
+	int bit;
+};
+
+static void octeon_irq_cib_enable(struct irq_data *data)
+{
+	unsigned long flags;
+	u64 en;
+	struct octeon_irq_cib_chip_data *cd = irq_data_get_irq_chip_data(data);
+	struct octeon_irq_cib_host_data *host_data = cd->host_data;
+
+	raw_spin_lock_irqsave(&host_data->lock, flags);
+	en = cvmx_read_csr(host_data->en_reg);
+	en |= 1ull << cd->bit;
+	cvmx_write_csr(host_data->en_reg, en);
+	raw_spin_unlock_irqrestore(&host_data->lock, flags);
+}
+
+static void octeon_irq_cib_disable(struct irq_data *data)
+{
+	unsigned long flags;
+	u64 en;
+	struct octeon_irq_cib_chip_data *cd = irq_data_get_irq_chip_data(data);
+	struct octeon_irq_cib_host_data *host_data = cd->host_data;
+
+	raw_spin_lock_irqsave(&host_data->lock, flags);
+	en = cvmx_read_csr(host_data->en_reg);
+	en &= ~(1ull << cd->bit);
+	cvmx_write_csr(host_data->en_reg, en);
+	raw_spin_unlock_irqrestore(&host_data->lock, flags);
+}
+
+static int octeon_irq_cib_set_type(struct irq_data *data, unsigned int t)
+{
+	irqd_set_trigger_type(data, t);
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip octeon_irq_chip_cib = {
+	.name = "CIB",
+	.irq_enable = octeon_irq_cib_enable,
+	.irq_disable = octeon_irq_cib_disable,
+	.irq_mask = octeon_irq_cib_disable,
+	.irq_unmask = octeon_irq_cib_enable,
+	.irq_set_type = octeon_irq_cib_set_type,
+};
+
+static int octeon_irq_cib_xlat(struct irq_domain *d,
+				   struct device_node *node,
+				   const u32 *intspec,
+				   unsigned int intsize,
+				   unsigned long *out_hwirq,
+				   unsigned int *out_type)
+{
+	unsigned int type = 0;
+
+	if (intsize == 2)
+		type = intspec[1];
+
+	switch (type) {
+	case 0: /* unofficial value, but we might as well let it work. */
+	case 4: /* official value for level triggering. */
+		*out_type = IRQ_TYPE_LEVEL_HIGH;
+		break;
+	case 1: /* official value for edge triggering. */
+		*out_type = IRQ_TYPE_EDGE_RISING;
+		break;
+	default: /* Nothing else is acceptable. */
+		return -EINVAL;
+	}
+
+	*out_hwirq = intspec[0];
+
+	return 0;
+}
+
+static int octeon_irq_cib_map(struct irq_domain *d,
+			      unsigned int virq, irq_hw_number_t hw)
+{
+	struct octeon_irq_cib_host_data *host_data = d->host_data;
+	struct octeon_irq_cib_chip_data *cd;
+
+	if (hw >= host_data->max_bits) {
+		pr_err("ERROR: %s mapping %u is to big!\n",
+		       d->of_node->name, (unsigned)hw);
+		return -EINVAL;
+	}
+
+	cd = kzalloc(sizeof(*cd), GFP_KERNEL);
+	cd->host_data = host_data;
+	cd->bit = hw;
+
+	irq_set_chip_and_handler(virq, &octeon_irq_chip_cib,
+				 handle_simple_irq);
+	irq_set_chip_data(virq, cd);
+	return 0;
+}
+
+static struct irq_domain_ops octeon_irq_domain_cib_ops = {
+	.map = octeon_irq_cib_map,
+	.unmap = octeon_irq_free_cd,
+	.xlate = octeon_irq_cib_xlat,
+};
+
+/* Chain to real handler. */
+static irqreturn_t octeon_irq_cib_handler(int my_irq, void *data)
+{
+	u64 en;
+	u64 raw;
+	u64 bits;
+	int i;
+	int irq;
+	struct irq_domain *cib_domain = data;
+	struct octeon_irq_cib_host_data *host_data = cib_domain->host_data;
+
+	en = cvmx_read_csr(host_data->en_reg);
+	raw = cvmx_read_csr(host_data->raw_reg);
+
+	bits = en & raw;
+
+	for (i = 0; i < host_data->max_bits; i++) {
+		if ((bits & 1ull << i) == 0)
+			continue;
+		irq = irq_find_mapping(cib_domain, i);
+		if (!irq) {
+			unsigned long flags;
+
+			pr_err("ERROR: CIB bit %d@%llx IRQ unhandled, disabling\n",
+				i, host_data->raw_reg);
+			raw_spin_lock_irqsave(&host_data->lock, flags);
+			en = cvmx_read_csr(host_data->en_reg);
+			en &= ~(1ull << i);
+			cvmx_write_csr(host_data->en_reg, en);
+			cvmx_write_csr(host_data->raw_reg, 1ull << i);
+			raw_spin_unlock_irqrestore(&host_data->lock, flags);
+		} else {
+			struct irq_desc *desc = irq_to_desc(irq);
+			struct irq_data *irq_data = irq_desc_get_irq_data(desc);
+			/* If edge, acknowledge the bit we will be sending. */
+			if (irqd_get_trigger_type(irq_data) &
+				IRQ_TYPE_EDGE_BOTH)
+				cvmx_write_csr(host_data->raw_reg, 1ull << i);
+			generic_handle_irq_desc(irq, desc);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int __init octeon_irq_init_cib(struct device_node *ciu_node,
+				      struct device_node *parent)
+{
+	const __be32 *addr;
+	u32 val;
+	struct octeon_irq_cib_host_data *host_data;
+	int parent_irq;
+	int r;
+	struct irq_domain *cib_domain;
+
+	parent_irq = irq_of_parse_and_map(ciu_node, 0);
+	if (!parent_irq) {
+		pr_err("ERROR: Couldn't acquire parent_irq for %s\n.",
+			ciu_node->name);
+		return -EINVAL;
+	}
+
+	host_data = kzalloc(sizeof(*host_data), GFP_KERNEL);
+	raw_spin_lock_init(&host_data->lock);
+
+	addr = of_get_address(ciu_node, 0, NULL, NULL);
+	if (!addr) {
+		pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name);
+		return -EINVAL;
+	}
+	host_data->raw_reg = (u64)phys_to_virt(
+		of_translate_address(ciu_node, addr));
+
+	addr = of_get_address(ciu_node, 1, NULL, NULL);
+	if (!addr) {
+		pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name);
+		return -EINVAL;
+	}
+	host_data->en_reg = (u64)phys_to_virt(
+		of_translate_address(ciu_node, addr));
+
+	r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
+	if (r) {
+		pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.",
+			ciu_node->name);
+		return r;
+	}
+	host_data->max_bits = val;
+
+	cib_domain = irq_domain_add_linear(ciu_node, host_data->max_bits,
+					   &octeon_irq_domain_cib_ops,
+					   host_data);
+	if (!cib_domain) {
+		pr_err("ERROR: Couldn't irq_domain_add_linear()\n.");
+		return -ENOMEM;
+	}
+
+	cvmx_write_csr(host_data->en_reg, 0); /* disable all IRQs */
+	cvmx_write_csr(host_data->raw_reg, ~0); /* ack any outstanding */
+
+	r = request_irq(parent_irq, octeon_irq_cib_handler,
+			IRQF_NO_THREAD, "cib", cib_domain);
+	if (r) {
+		pr_err("request_irq cib failed %d\n", r);
+		return r;
+	}
+	pr_info("CIB interrupt controller probed: %llx %d\n",
+		host_data->raw_reg, host_data->max_bits);
+	return 0;
+}
+
+static struct of_device_id ciu_types[] __initdata = {
+	{.compatible = "cavium,octeon-3860-ciu", .data = octeon_irq_init_ciu},
+	{.compatible = "cavium,octeon-3860-gpio", .data = octeon_irq_init_gpio},
+	{.compatible = "cavium,octeon-6880-ciu2", .data = octeon_irq_init_ciu2},
+	{.compatible = "cavium,octeon-7130-cib", .data = octeon_irq_init_cib},
+	{}
+};
+
 void __init arch_init_irq(void)
 {
 #ifdef CONFIG_SMP
@@ -1750,10 +2305,7 @@
 	cpumask_clear(irq_default_affinity);
 	cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
 #endif
-	if (OCTEON_IS_MODEL(OCTEON_CN68XX))
-		octeon_irq_init_ciu2();
-	else
-		octeon_irq_init_ciu();
+	of_irq_init(ciu_types);
 }
 
 asmlinkage void plat_irq_dispatch(void)
@@ -1767,13 +2319,13 @@
 		cop0_cause &= cop0_status;
 		cop0_cause &= ST0_IM;
 
-		if (unlikely(cop0_cause & STATUSF_IP2))
+		if (cop0_cause & STATUSF_IP2)
 			octeon_irq_ip2();
-		else if (unlikely(cop0_cause & STATUSF_IP3))
+		else if (cop0_cause & STATUSF_IP3)
 			octeon_irq_ip3();
-		else if (unlikely(cop0_cause & STATUSF_IP4))
+		else if (cop0_cause & STATUSF_IP4)
 			octeon_irq_ip4();
-		else if (likely(cop0_cause))
+		else if (cop0_cause)
 			do_IRQ(fls(cop0_cause) - 9 + MIPS_CPU_IRQ_BASE);
 		else
 			break;
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 94f888d..a42110e 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -41,6 +41,7 @@
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/pci-octeon.h>
 #include <asm/octeon/cvmx-mio-defs.h>
+#include <asm/octeon/cvmx-rst-defs.h>
 
 extern struct plat_smp_ops octeon_smp_ops;
 
@@ -579,12 +580,10 @@
 	/* R/W If set, CVMSEG is available for loads/stores in user
 	 * mode. */
 	cvmmemctl.s.cvmsegenau = 0;
-	/* R/W Size of local memory in cache blocks, 54 (6912 bytes)
-	 * is max legal value. */
-	cvmmemctl.s.lmemsz = CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE;
 
 	write_c0_cvmmemctl(cvmmemctl.u64);
 
+	/* Setup of CVMSEG is done in kernel-entry-init.h */
 	if (smp_processor_id() == 0)
 		pr_notice("CVMSEG size: %d cache lines (%d bytes)\n",
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
@@ -615,6 +614,7 @@
 	const char *arg;
 	char *p;
 	int i;
+	u64 t;
 	int argc;
 #ifdef CONFIG_CAVIUM_RESERVE32
 	int64_t addr = -1;
@@ -654,15 +654,56 @@
 	sysinfo->dfa_ref_clock_hz = octeon_bootinfo->dfa_ref_clock_hz;
 	sysinfo->bootloader_config_flags = octeon_bootinfo->config_flags;
 
-	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+	if (OCTEON_IS_OCTEON2()) {
 		/* I/O clock runs at a different rate than the CPU. */
 		union cvmx_mio_rst_boot rst_boot;
 		rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT);
 		octeon_io_clock_rate = 50000000 * rst_boot.s.pnr_mul;
+	} else if (OCTEON_IS_OCTEON3()) {
+		/* I/O clock runs at a different rate than the CPU. */
+		union cvmx_rst_boot rst_boot;
+		rst_boot.u64 = cvmx_read_csr(CVMX_RST_BOOT);
+		octeon_io_clock_rate = 50000000 * rst_boot.s.pnr_mul;
 	} else {
 		octeon_io_clock_rate = sysinfo->cpu_clock_hz;
 	}
 
+	t = read_c0_cvmctl();
+	if ((t & (1ull << 27)) == 0) {
+		/*
+		 * Setup the multiplier save/restore code if
+		 * CvmCtl[NOMUL] clear.
+		 */
+		void *save;
+		void *save_end;
+		void *restore;
+		void *restore_end;
+		int save_len;
+		int restore_len;
+		int save_max = (char *)octeon_mult_save_end -
+			(char *)octeon_mult_save;
+		int restore_max = (char *)octeon_mult_restore_end -
+			(char *)octeon_mult_restore;
+		if (current_cpu_data.cputype == CPU_CAVIUM_OCTEON3) {
+			save = octeon_mult_save3;
+			save_end = octeon_mult_save3_end;
+			restore = octeon_mult_restore3;
+			restore_end = octeon_mult_restore3_end;
+		} else {
+			save = octeon_mult_save2;
+			save_end = octeon_mult_save2_end;
+			restore = octeon_mult_restore2;
+			restore_end = octeon_mult_restore2_end;
+		}
+		save_len = (char *)save_end - (char *)save;
+		restore_len = (char *)restore_end - (char *)restore;
+		if (!WARN_ON(save_len > save_max ||
+				restore_len > restore_max)) {
+			memcpy(octeon_mult_save, save, save_len);
+			memcpy(octeon_mult_restore, restore, restore_len);
+		}
+	}
+
 	/*
 	 * Only enable the LED controller if we're running on a CN38XX, CN58XX,
 	 * or CN56XX. The CN30XX and CN31XX don't have an LED controller.
@@ -1004,7 +1045,7 @@
 
 void prom_free_prom_memory(void)
 {
-	if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X)) {
+	if (CAVIUM_OCTEON_DCACHE_PREFETCH_WAR) {
 		/* Check for presence of Core-14449 fix.  */
 		u32 insn;
 		u32 *foo;
@@ -1026,8 +1067,9 @@
 			panic("No PREF instruction at Core-14449 probe point.");
 
 		if (((insn >> 16) & 0x1f) != 28)
-			panic("Core-14449 WAR not in place (%04x).\n"
-			      "Please build kernel with proper options (CONFIG_CAVIUM_CN63XXP1).", insn);
+			panic("OCTEON II DCache prefetch workaround not in place (%04x).\n"
+			      "Please build kernel with proper options (CONFIG_CAVIUM_CN63XXP1).",
+			      insn);
 	}
 }
 
diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig
new file mode 100644
index 0000000..4bce1f8
--- /dev/null
+++ b/arch/mips/configs/malta_qemu_32r6_defconfig
@@ -0,0 +1,193 @@
+CONFIG_MIPS_MALTA=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CPU_MIPS32_R6=y
+CONFIG_PAGE_SIZE_16KB=y
+CONFIG_HZ_100=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=15
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_EMBEDDED=y
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PCI=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_INET_LRO is not set
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=m
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=y
+CONFIG_NET_CLS_IND=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_IDE=y
+# CONFIG_IDE_PROC_FS is not set
+# CONFIG_IDEPCI_PCIBUS_ORDER is not set
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+CONFIG_PCNET32=y
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EXAR is not set
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_TOSHIBA is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+# CONFIG_VT is not set
+CONFIG_LEGACY_PTY_COUNT=4
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_HW_RANDOM=y
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_G=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_IDE_DISK=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_BACKLIGHT=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_QUOTA=y
+CONFIG_QFMT_V2=y
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_CIFS=m
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_ISO8859_1=m
+# CONFIG_FTRACE is not set
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/mips/fw/arc/misc.c b/arch/mips/fw/arc/misc.c
index f9f5307..19f7101 100644
--- a/arch/mips/fw/arc/misc.c
+++ b/arch/mips/fw/arc/misc.c
@@ -9,6 +9,7 @@
  * Copyright (C) 1999 Ralf Baechle (ralf@gnu.org)
  * Copyright (C) 1999 Silicon Graphics, Inc.
  */
+#include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/irqflags.h>
@@ -19,50 +20,55 @@
 #include <asm/sgialib.h>
 #include <asm/bootinfo.h>
 
-VOID
+VOID __noreturn
 ArcHalt(VOID)
 {
 	bc_disable();
 	local_irq_disable();
 	ARC_CALL0(halt);
-never:	goto never;
+
+	unreachable();
 }
 
-VOID
+VOID __noreturn
 ArcPowerDown(VOID)
 {
 	bc_disable();
 	local_irq_disable();
 	ARC_CALL0(pdown);
-never:	goto never;
+
+	unreachable();
 }
 
 /* XXX is this a soft reset basically? XXX */
-VOID
+VOID __noreturn
 ArcRestart(VOID)
 {
 	bc_disable();
 	local_irq_disable();
 	ARC_CALL0(restart);
-never:	goto never;
+
+	unreachable();
 }
 
-VOID
+VOID __noreturn
 ArcReboot(VOID)
 {
 	bc_disable();
 	local_irq_disable();
 	ARC_CALL0(reboot);
-never:	goto never;
+
+	unreachable();
 }
 
-VOID
+VOID __noreturn
 ArcEnterInteractiveMode(VOID)
 {
 	bc_disable();
 	local_irq_disable();
 	ARC_CALL0(imode);
-never:	goto never;
+
+	unreachable();
 }
 
 LONG
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 200efea..526539c 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -1,4 +1,5 @@
 # MIPS headers
+generic-(CONFIG_GENERIC_CSUM) += checksum.h
 generic-y += cputime.h
 generic-y += current.h
 generic-y += dma-contiguous.h
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 6caf876..0cae459 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -19,7 +19,7 @@
 #include <asm/asmmacro-64.h>
 #endif
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	.macro	local_irq_enable reg=t0
 	ei
 	irq_enable_hazard
@@ -104,7 +104,8 @@
 	.endm
 
 	.macro	fpu_save_double thread status tmp
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) || \
+		defined(CONFIG_CPU_MIPS32_R6)
 	sll	\tmp, \status, 5
 	bgez	\tmp, 10f
 	fpu_save_16odd \thread
@@ -160,7 +161,8 @@
 	.endm
 
 	.macro	fpu_restore_double thread status tmp
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) || \
+		defined(CONFIG_CPU_MIPS32_R6)
 	sll	\tmp, \status, 5
 	bgez	\tmp, 10f				# 16 register mode?
 
@@ -170,16 +172,16 @@
 	fpu_restore_16even \thread \tmp
 	.endm
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	.macro	_EXT	rd, rs, p, s
 	ext	\rd, \rs, \p, \s
 	.endm
-#else /* !CONFIG_CPU_MIPSR2 */
+#else /* !CONFIG_CPU_MIPSR2 || !CONFIG_CPU_MIPSR6 */
 	.macro	_EXT	rd, rs, p, s
 	srl	\rd, \rs, \p
 	andi	\rd, \rd, (1 << \s) - 1
 	.endm
-#endif /* !CONFIG_CPU_MIPSR2 */
+#endif /* !CONFIG_CPU_MIPSR2 || !CONFIG_CPU_MIPSR6 */
 
 /*
  * Temporary until all gas have MT ASE support
@@ -304,7 +306,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	add	$1, \base, \off
+	addu	$1, \base, \off
 	.word	LDD_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
@@ -313,7 +315,7 @@
 	.set	push
 	.set	noat
 	SET_HARDFLOAT
-	add	$1, \base, \off
+	addu	$1, \base, \off
 	.word	STD_MSA_INSN | (\wd << 6)
 	.set	pop
 	.endm
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 857da84..26d4363 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -54,19 +54,19 @@
 		"	sc	%0, %1					\n"   \
 		"	beqzl	%0, 1b					\n"   \
 		"	.set	mips0					\n"   \
-		: "=&r" (temp), "+" GCC_OFF12_ASM() (v->counter)	      \
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
 		: "Ir" (i));						      \
 	} else if (kernel_uses_llsc) {					      \
 		int temp;						      \
 									      \
 		do {							      \
 			__asm__ __volatile__(				      \
-			"	.set	arch=r4000			\n"   \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
 			"	ll	%0, %1		# atomic_" #op "\n"   \
 			"	" #asm_op " %0, %2			\n"   \
 			"	sc	%0, %1				\n"   \
 			"	.set	mips0				\n"   \
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (v->counter)      \
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
 			: "Ir" (i));					      \
 		} while (unlikely(!temp));				      \
 	} else {							      \
@@ -97,20 +97,20 @@
 		"	" #asm_op " %0, %1, %3				\n"   \
 		"	.set	mips0					\n"   \
 		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF12_ASM() (v->counter)			      \
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
 		: "Ir" (i));						      \
 	} else if (kernel_uses_llsc) {					      \
 		int temp;						      \
 									      \
 		do {							      \
 			__asm__ __volatile__(				      \
-			"	.set	arch=r4000			\n"   \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
 			"	ll	%1, %2	# atomic_" #op "_return	\n"   \
 			"	" #asm_op " %0, %1, %3			\n"   \
 			"	sc	%0, %2				\n"   \
 			"	.set	mips0				\n"   \
 			: "=&r" (result), "=&r" (temp),			      \
-			  "+" GCC_OFF12_ASM() (v->counter)		      \
+			  "+" GCC_OFF_SMALL_ASM() (v->counter)		      \
 			: "Ir" (i));					      \
 		} while (unlikely(!result));				      \
 									      \
@@ -171,14 +171,14 @@
 		"1:							\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF12_ASM() (v->counter)
-		: "Ir" (i), GCC_OFF12_ASM() (v->counter)
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)
+		: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		int temp;
 
 		__asm__ __volatile__(
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"1:	ll	%1, %2		# atomic_sub_if_positive\n"
 		"	subu	%0, %1, %3				\n"
 		"	bltz	%0, 1f					\n"
@@ -190,7 +190,7 @@
 		"1:							\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF12_ASM() (v->counter)
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)
 		: "Ir" (i));
 	} else {
 		unsigned long flags;
@@ -333,19 +333,19 @@
 		"	scd	%0, %1					\n"   \
 		"	beqzl	%0, 1b					\n"   \
 		"	.set	mips0					\n"   \
-		: "=&r" (temp), "+" GCC_OFF12_ASM() (v->counter)	      \
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
 		: "Ir" (i));						      \
 	} else if (kernel_uses_llsc) {					      \
 		long temp;						      \
 									      \
 		do {							      \
 			__asm__ __volatile__(				      \
-			"	.set	arch=r4000			\n"   \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
 			"	lld	%0, %1		# atomic64_" #op "\n" \
 			"	" #asm_op " %0, %2			\n"   \
 			"	scd	%0, %1				\n"   \
 			"	.set	mips0				\n"   \
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (v->counter)      \
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
 			: "Ir" (i));					      \
 		} while (unlikely(!temp));				      \
 	} else {							      \
@@ -376,21 +376,21 @@
 		"	" #asm_op " %0, %1, %3				\n"   \
 		"	.set	mips0					\n"   \
 		: "=&r" (result), "=&r" (temp),				      \
-		  "+" GCC_OFF12_ASM() (v->counter)			      \
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
 		: "Ir" (i));						      \
 	} else if (kernel_uses_llsc) {					      \
 		long temp;						      \
 									      \
 		do {							      \
 			__asm__ __volatile__(				      \
-			"	.set	arch=r4000			\n"   \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
 			"	lld	%1, %2	# atomic64_" #op "_return\n"  \
 			"	" #asm_op " %0, %1, %3			\n"   \
 			"	scd	%0, %2				\n"   \
 			"	.set	mips0				\n"   \
 			: "=&r" (result), "=&r" (temp),			      \
-			  "=" GCC_OFF12_ASM() (v->counter)		      \
-			: "Ir" (i), GCC_OFF12_ASM() (v->counter)	      \
+			  "=" GCC_OFF_SMALL_ASM() (v->counter)		      \
+			: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)	      \
 			: "memory");					      \
 		} while (unlikely(!result));				      \
 									      \
@@ -452,14 +452,14 @@
 		"1:							\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp),
-		  "=" GCC_OFF12_ASM() (v->counter)
-		: "Ir" (i), GCC_OFF12_ASM() (v->counter)
+		  "=" GCC_OFF_SMALL_ASM() (v->counter)
+		: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		long temp;
 
 		__asm__ __volatile__(
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"1:	lld	%1, %2		# atomic64_sub_if_positive\n"
 		"	dsubu	%0, %1, %3				\n"
 		"	bltz	%0, 1f					\n"
@@ -471,7 +471,7 @@
 		"1:							\n"
 		"	.set	mips0					\n"
 		: "=&r" (result), "=&r" (temp),
-		  "+" GCC_OFF12_ASM() (v->counter)
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)
 		: "Ir" (i));
 	} else {
 		unsigned long flags;
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index 6663bcc..9f935f6 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -79,28 +79,28 @@
 		"	" __SC	"%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=" GCC_OFF12_ASM() (*m)
-		: "ir" (1UL << bit), GCC_OFF12_ASM() (*m));
-#ifdef CONFIG_CPU_MIPSR2
+		: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*m)
+		: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
 		do {
 			__asm__ __volatile__(
 			"	" __LL "%0, %1		# set_bit	\n"
 			"	" __INS "%0, %3, %2, 1			\n"
 			"	" __SC "%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 			: "ir" (bit), "r" (~0));
 		} while (unlikely(!temp));
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 	} else if (kernel_uses_llsc) {
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	" __LL "%0, %1		# set_bit	\n"
 			"	or	%0, %2				\n"
 			"	" __SC	"%0, %1				\n"
 			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 			: "ir" (1UL << bit));
 		} while (unlikely(!temp));
 	} else
@@ -131,28 +131,28 @@
 		"	" __SC "%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "+" GCC_OFF12_ASM() (*m)
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 		: "ir" (~(1UL << bit)));
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
 		do {
 			__asm__ __volatile__(
 			"	" __LL "%0, %1		# clear_bit	\n"
 			"	" __INS "%0, $0, %2, 1			\n"
 			"	" __SC "%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 			: "ir" (bit));
 		} while (unlikely(!temp));
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 	} else if (kernel_uses_llsc) {
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	" __LL "%0, %1		# clear_bit	\n"
 			"	and	%0, %2				\n"
 			"	" __SC "%0, %1				\n"
 			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 			: "ir" (~(1UL << bit)));
 		} while (unlikely(!temp));
 	} else
@@ -197,7 +197,7 @@
 		"	" __SC	"%0, %1				\n"
 		"	beqzl	%0, 1b				\n"
 		"	.set	mips0				\n"
-		: "=&r" (temp), "+" GCC_OFF12_ASM() (*m)
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 		: "ir" (1UL << bit));
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
@@ -205,12 +205,12 @@
 
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	" __LL "%0, %1		# change_bit	\n"
 			"	xor	%0, %2				\n"
 			"	" __SC	"%0, %1				\n"
 			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
 			: "ir" (1UL << bit));
 		} while (unlikely(!temp));
 	} else
@@ -245,7 +245,7 @@
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "+" GCC_OFF12_ASM() (*m), "=&r" (res)
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: "memory");
 	} else if (kernel_uses_llsc) {
@@ -254,12 +254,12 @@
 
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	" __LL "%0, %1	# test_and_set_bit	\n"
 			"	or	%2, %0, %3			\n"
 			"	" __SC	"%2, %1				\n"
 			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m), "=&r" (res)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 			: "r" (1UL << bit)
 			: "memory");
 		} while (unlikely(!res));
@@ -308,12 +308,12 @@
 
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	" __LL "%0, %1	# test_and_set_bit	\n"
 			"	or	%2, %0, %3			\n"
 			"	" __SC	"%2, %1				\n"
 			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m), "=&r" (res)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 			: "r" (1UL << bit)
 			: "memory");
 		} while (unlikely(!res));
@@ -355,10 +355,10 @@
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "+" GCC_OFF12_ASM() (*m), "=&r" (res)
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: "memory");
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	} else if (kernel_uses_llsc && __builtin_constant_p(nr)) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
@@ -369,7 +369,7 @@
 			"	" __EXT "%2, %0, %3, 1			\n"
 			"	" __INS "%0, $0, %3, 1			\n"
 			"	" __SC	"%0, %1				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m), "=&r" (res)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 			: "ir" (bit)
 			: "memory");
 		} while (unlikely(!temp));
@@ -380,13 +380,13 @@
 
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	" __LL	"%0, %1 # test_and_clear_bit	\n"
 			"	or	%2, %0, %3			\n"
 			"	xor	%2, %3				\n"
 			"	" __SC	"%2, %1				\n"
 			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m), "=&r" (res)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 			: "r" (1UL << bit)
 			: "memory");
 		} while (unlikely(!res));
@@ -428,7 +428,7 @@
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "+" GCC_OFF12_ASM() (*m), "=&r" (res)
+		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 		: "r" (1UL << bit)
 		: "memory");
 	} else if (kernel_uses_llsc) {
@@ -437,12 +437,12 @@
 
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	" __LL	"%0, %1 # test_and_change_bit	\n"
 			"	xor	%2, %0, %3			\n"
 			"	" __SC	"\t%2, %1			\n"
 			"	.set	mips0				\n"
-			: "=&r" (temp), "+" GCC_OFF12_ASM() (*m), "=&r" (res)
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m), "=&r" (res)
 			: "r" (1UL << bit)
 			: "memory");
 		} while (unlikely(!res));
@@ -485,7 +485,7 @@
 	    __builtin_constant_p(cpu_has_clo_clz) && cpu_has_clo_clz) {
 		__asm__(
 		"	.set	push					\n"
-		"	.set	mips32					\n"
+		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"	clz	%0, %1					\n"
 		"	.set	pop					\n"
 		: "=r" (num)
@@ -498,7 +498,7 @@
 	    __builtin_constant_p(cpu_has_mips64) && cpu_has_mips64) {
 		__asm__(
 		"	.set	push					\n"
-		"	.set	mips64					\n"
+		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"	dclz	%0, %1					\n"
 		"	.set	pop					\n"
 		: "=r" (num)
@@ -562,7 +562,7 @@
 	if (__builtin_constant_p(cpu_has_clo_clz) && cpu_has_clo_clz) {
 		__asm__(
 		"	.set	push					\n"
-		"	.set	mips32					\n"
+		"	.set	"MIPS_ISA_LEVEL"			\n"
 		"	clz	%0, %1					\n"
 		"	.set	pop					\n"
 		: "=r" (x)
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index 3418c51..5c585c5 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -12,6 +12,10 @@
 #ifndef _ASM_CHECKSUM_H
 #define _ASM_CHECKSUM_H
 
+#ifdef CONFIG_GENERIC_CSUM
+#include <asm-generic/checksum.h>
+#else
+
 #include <linux/in6.h>
 
 #include <asm/uaccess.h>
@@ -99,27 +103,23 @@
  */
 __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 				       int len, __wsum sum);
+#define csum_partial_copy_nocheck csum_partial_copy_nocheck
 
 /*
  *	Fold a partial checksum without adding pseudo headers
  */
-static inline __sum16 csum_fold(__wsum sum)
+static inline __sum16 csum_fold(__wsum csum)
 {
-	__asm__(
-	"	.set	push		# csum_fold\n"
-	"	.set	noat		\n"
-	"	sll	$1, %0, 16	\n"
-	"	addu	%0, $1		\n"
-	"	sltu	$1, %0, $1	\n"
-	"	srl	%0, %0, 16	\n"
-	"	addu	%0, $1		\n"
-	"	xori	%0, 0xffff	\n"
-	"	.set	pop"
-	: "=r" (sum)
-	: "0" (sum));
+	u32 sum = (__force u32)csum;;
 
-	return (__force __sum16)sum;
+	sum += (sum << 16);
+	csum = (sum < csum);
+	sum >>= 16;
+	sum += csum;
+
+	return (__force __sum16)~sum;
 }
+#define csum_fold csum_fold
 
 /*
  *	This is a version of ip_compute_csum() optimized for IP headers,
@@ -158,6 +158,7 @@
 
 	return csum_fold(csum);
 }
+#define ip_fast_csum ip_fast_csum
 
 static inline __wsum csum_tcpudp_nofold(__be32 saddr,
 	__be32 daddr, unsigned short len, unsigned short proto,
@@ -200,18 +201,7 @@
 
 	return sum;
 }
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-						   unsigned short len,
-						   unsigned short proto,
-						   __wsum sum)
-{
-	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
-}
+#define csum_tcpudp_nofold csum_tcpudp_nofold
 
 /*
  * this routine is used for miscellaneous IP-like checksums, mainly
@@ -287,4 +277,7 @@
 	return csum_fold(sum);
 }
 
+#include <asm-generic/checksum.h>
+#endif /* CONFIG_GENERIC_CSUM */
+
 #endif /* _ASM_CHECKSUM_H */
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 28b1edf..d0a2a68 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -31,24 +31,24 @@
 		"	sc	%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
 		"	.set	mips0					\n"
-		: "=&r" (retval), "=" GCC_OFF12_ASM() (*m), "=&r" (dummy)
-		: GCC_OFF12_ASM() (*m), "Jr" (val)
+		: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m), "=&r" (dummy)
+		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		unsigned long dummy;
 
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	ll	%0, %3		# xchg_u32	\n"
 			"	.set	mips0				\n"
 			"	move	%2, %z4				\n"
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	sc	%2, %1				\n"
 			"	.set	mips0				\n"
-			: "=&r" (retval), "=" GCC_OFF12_ASM() (*m),
+			: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m),
 			  "=&r" (dummy)
-			: GCC_OFF12_ASM() (*m), "Jr" (val)
+			: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
 			: "memory");
 		} while (unlikely(!dummy));
 	} else {
@@ -82,22 +82,22 @@
 		"	scd	%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
 		"	.set	mips0					\n"
-		: "=&r" (retval), "=" GCC_OFF12_ASM() (*m), "=&r" (dummy)
-		: GCC_OFF12_ASM() (*m), "Jr" (val)
+		: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m), "=&r" (dummy)
+		: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		unsigned long dummy;
 
 		do {
 			__asm__ __volatile__(
-			"	.set	arch=r4000			\n"
+			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
 			"	lld	%0, %3		# xchg_u64	\n"
 			"	move	%2, %z4				\n"
 			"	scd	%2, %1				\n"
 			"	.set	mips0				\n"
-			: "=&r" (retval), "=" GCC_OFF12_ASM() (*m),
+			: "=&r" (retval), "=" GCC_OFF_SMALL_ASM() (*m),
 			  "=&r" (dummy)
-			: GCC_OFF12_ASM() (*m), "Jr" (val)
+			: GCC_OFF_SMALL_ASM() (*m), "Jr" (val)
 			: "memory");
 		} while (unlikely(!dummy));
 	} else {
@@ -158,25 +158,25 @@
 		"	beqzl	$1, 1b				\n"	\
 		"2:						\n"	\
 		"	.set	pop				\n"	\
-		: "=&r" (__ret), "=" GCC_OFF12_ASM() (*m)		\
-		: GCC_OFF12_ASM() (*m), "Jr" (old), "Jr" (new)		\
+		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
+		: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)		\
 		: "memory");						\
 	} else if (kernel_uses_llsc) {					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
-		"	.set	arch=r4000			\n"	\
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"1:	" ld "	%0, %2		# __cmpxchg_asm \n"	\
 		"	bne	%0, %z3, 2f			\n"	\
 		"	.set	mips0				\n"	\
 		"	move	$1, %z4				\n"	\
-		"	.set	arch=r4000			\n"	\
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"	" st "	$1, %1				\n"	\
 		"	beqz	$1, 1b				\n"	\
 		"	.set	pop				\n"	\
 		"2:						\n"	\
-		: "=&r" (__ret), "=" GCC_OFF12_ASM() (*m)		\
-		: GCC_OFF12_ASM() (*m), "Jr" (old), "Jr" (new)		\
+		: "=&r" (__ret), "=" GCC_OFF_SMALL_ASM() (*m)		\
+		: GCC_OFF_SMALL_ASM() (*m), "Jr" (old), "Jr" (new)		\
 		: "memory");						\
 	} else {							\
 		unsigned long __flags;					\
diff --git a/arch/mips/include/asm/compiler.h b/arch/mips/include/asm/compiler.h
index c73815e..e081a26 100644
--- a/arch/mips/include/asm/compiler.h
+++ b/arch/mips/include/asm/compiler.h
@@ -16,12 +16,30 @@
 #define GCC_REG_ACCUM "accum"
 #endif
 
+#ifdef CONFIG_CPU_MIPSR6
+/* All MIPS R6 toolchains support the ZC constrain */
+#define GCC_OFF_SMALL_ASM() "ZC"
+#else
 #ifndef CONFIG_CPU_MICROMIPS
-#define GCC_OFF12_ASM() "R"
+#define GCC_OFF_SMALL_ASM() "R"
 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)
-#define GCC_OFF12_ASM() "ZC"
+#define GCC_OFF_SMALL_ASM() "ZC"
 #else
 #error "microMIPS compilation unsupported with GCC older than 4.9"
-#endif
+#endif /* CONFIG_CPU_MICROMIPS */
+#endif /* CONFIG_CPU_MIPSR6 */
+
+#ifdef CONFIG_CPU_MIPSR6
+#define MIPS_ISA_LEVEL "mips64r6"
+#define MIPS_ISA_ARCH_LEVEL MIPS_ISA_LEVEL
+#define MIPS_ISA_LEVEL_RAW mips64r6
+#define MIPS_ISA_ARCH_LEVEL_RAW MIPS_ISA_LEVEL_RAW
+#else
+/* MIPS64 is a superset of MIPS32 */
+#define MIPS_ISA_LEVEL "mips64r2"
+#define MIPS_ISA_ARCH_LEVEL "arch=r4000"
+#define MIPS_ISA_LEVEL_RAW mips64r2
+#define MIPS_ISA_ARCH_LEVEL_RAW MIPS_ISA_LEVEL_RAW
+#endif /* CONFIG_CPU_MIPSR6 */
 
 #endif /* _ASM_COMPILER_H */
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 2897cfa..0d8208d 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -38,6 +38,9 @@
 #ifndef cpu_has_maar
 #define cpu_has_maar		(cpu_data[0].options & MIPS_CPU_MAAR)
 #endif
+#ifndef cpu_has_rw_llb
+#define cpu_has_rw_llb		(cpu_data[0].options & MIPS_CPU_RW_LLB)
+#endif
 
 /*
  * For the moment we don't consider R6000 and R8000 so we can assume that
@@ -171,6 +174,9 @@
 #endif
 #endif
 
+#ifndef cpu_has_mips_1
+# define cpu_has_mips_1		(!cpu_has_mips_r6)
+#endif
 #ifndef cpu_has_mips_2
 # define cpu_has_mips_2		(cpu_data[0].isa_level & MIPS_CPU_ISA_II)
 #endif
@@ -189,12 +195,18 @@
 #ifndef cpu_has_mips32r2
 # define cpu_has_mips32r2	(cpu_data[0].isa_level & MIPS_CPU_ISA_M32R2)
 #endif
+#ifndef cpu_has_mips32r6
+# define cpu_has_mips32r6	(cpu_data[0].isa_level & MIPS_CPU_ISA_M32R6)
+#endif
 #ifndef cpu_has_mips64r1
 # define cpu_has_mips64r1	(cpu_data[0].isa_level & MIPS_CPU_ISA_M64R1)
 #endif
 #ifndef cpu_has_mips64r2
 # define cpu_has_mips64r2	(cpu_data[0].isa_level & MIPS_CPU_ISA_M64R2)
 #endif
+#ifndef cpu_has_mips64r6
+# define cpu_has_mips64r6	(cpu_data[0].isa_level & MIPS_CPU_ISA_M64R6)
+#endif
 
 /*
  * Shortcuts ...
@@ -208,17 +220,23 @@
 #define cpu_has_mips_4_5_r	(cpu_has_mips_4 | cpu_has_mips_5_r)
 #define cpu_has_mips_5_r	(cpu_has_mips_5 | cpu_has_mips_r)
 
-#define cpu_has_mips_4_5_r2	(cpu_has_mips_4_5 | cpu_has_mips_r2)
+#define cpu_has_mips_4_5_r2_r6	(cpu_has_mips_4_5 | cpu_has_mips_r2 | \
+				 cpu_has_mips_r6)
 
-#define cpu_has_mips32	(cpu_has_mips32r1 | cpu_has_mips32r2)
-#define cpu_has_mips64	(cpu_has_mips64r1 | cpu_has_mips64r2)
+#define cpu_has_mips32	(cpu_has_mips32r1 | cpu_has_mips32r2 | cpu_has_mips32r6)
+#define cpu_has_mips64	(cpu_has_mips64r1 | cpu_has_mips64r2 | cpu_has_mips64r6)
 #define cpu_has_mips_r1 (cpu_has_mips32r1 | cpu_has_mips64r1)
 #define cpu_has_mips_r2 (cpu_has_mips32r2 | cpu_has_mips64r2)
+#define cpu_has_mips_r6	(cpu_has_mips32r6 | cpu_has_mips64r6)
 #define cpu_has_mips_r	(cpu_has_mips32r1 | cpu_has_mips32r2 | \
-			 cpu_has_mips64r1 | cpu_has_mips64r2)
+			 cpu_has_mips32r6 | cpu_has_mips64r1 | \
+			 cpu_has_mips64r2 | cpu_has_mips64r6)
+
+/* MIPSR2 and MIPSR6 have a lot of similarities */
+#define cpu_has_mips_r2_r6	(cpu_has_mips_r2 | cpu_has_mips_r6)
 
 #ifndef cpu_has_mips_r2_exec_hazard
-#define cpu_has_mips_r2_exec_hazard cpu_has_mips_r2
+#define cpu_has_mips_r2_exec_hazard (cpu_has_mips_r2 | cpu_has_mips_r6)
 #endif
 
 /*
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index a6c9ccb..c3f4f2d 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -84,6 +84,11 @@
 	 * (shifted by _CACHE_SHIFT)
 	 */
 	unsigned int		writecombine;
+	/*
+	 * Simple counter to prevent enabling HTW in nested
+	 * htw_start/htw_stop calls
+	 */
+	unsigned int		htw_seq;
 } __attribute__((aligned(SMP_CACHE_BYTES)));
 
 extern struct cpuinfo_mips cpu_data[];
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index b4e2bd8..8245875 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -54,6 +54,13 @@
 	case CPU_M5150:
 #endif
 
+#if defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) || \
+    defined(CONFIG_SYS_HAS_CPU_MIPS32_R6) || \
+    defined(CONFIG_SYS_HAS_CPU_MIPS64_R2) || \
+    defined(CONFIG_SYS_HAS_CPU_MIPS64_R6)
+	case CPU_QEMU_GENERIC:
+#endif
+
 #ifdef CONFIG_SYS_HAS_CPU_MIPS64_R1
 	case CPU_5KC:
 	case CPU_5KE:
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 33866fc..1568723 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -93,6 +93,7 @@
  * These are the PRID's for when 23:16 == PRID_COMP_MIPS
  */
 
+#define PRID_IMP_QEMU_GENERIC	0x0000
 #define PRID_IMP_4KC		0x8000
 #define PRID_IMP_5KC		0x8100
 #define PRID_IMP_20KC		0x8200
@@ -312,6 +313,8 @@
 	CPU_LOONGSON3, CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS,
 	CPU_CAVIUM_OCTEON2, CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP,
 
+	CPU_QEMU_GENERIC,
+
 	CPU_LAST
 };
 
@@ -329,11 +332,14 @@
 #define MIPS_CPU_ISA_M32R2	0x00000020
 #define MIPS_CPU_ISA_M64R1	0x00000040
 #define MIPS_CPU_ISA_M64R2	0x00000080
+#define MIPS_CPU_ISA_M32R6	0x00000100
+#define MIPS_CPU_ISA_M64R6	0x00000200
 
 #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_II | MIPS_CPU_ISA_M32R1 | \
-	MIPS_CPU_ISA_M32R2)
+	MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M32R6)
 #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \
-	MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)
+	MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2 | \
+	MIPS_CPU_ISA_M64R6)
 
 /*
  * CPU Option encodings
@@ -370,6 +376,7 @@
 #define MIPS_CPU_RIXIEX		0x200000000ull /* CPU has unique exception codes for {Read, Execute}-Inhibit exceptions */
 #define MIPS_CPU_MAAR		0x400000000ull /* MAAR(I) registers are present */
 #define MIPS_CPU_FRE		0x800000000ull /* FRE & UFE bits implemented */
+#define MIPS_CPU_RW_LLB		0x1000000000ull /* LLADDR/LLB writes are allowed */
 
 /*
  * CPU ASE encodings
diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h
index ae6fedc..94105d3 100644
--- a/arch/mips/include/asm/edac.h
+++ b/arch/mips/include/asm/edac.h
@@ -26,8 +26,8 @@
 		"	sc	%0, %1					\n"
 		"	beqz	%0, 1b					\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=" GCC_OFF12_ASM() (*virt_addr)
-		: GCC_OFF12_ASM() (*virt_addr));
+		: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr)
+		: GCC_OFF_SMALL_ASM() (*virt_addr));
 
 		virt_addr++;
 	}
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index eb4d95d..535f196 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -417,13 +417,15 @@
 struct arch_elf_state {
 	int fp_abi;
 	int interp_fp_abi;
-	int overall_abi;
+	int overall_fp_mode;
 };
 
+#define MIPS_ABI_FP_UNKNOWN	(-1)	/* Unknown FP ABI (kernel internal) */
+
 #define INIT_ARCH_ELF_STATE {			\
-	.fp_abi = -1,				\
-	.interp_fp_abi = -1,			\
-	.overall_abi = -1,			\
+	.fp_abi = MIPS_ABI_FP_UNKNOWN,		\
+	.interp_fp_abi = MIPS_ABI_FP_UNKNOWN,	\
+	.overall_fp_mode = -1,			\
 }
 
 extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index affebb7..dd083e9 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -68,7 +68,8 @@
 		goto fr_common;
 
 	case FPU_64BIT:
-#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_64BIT))
+#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS32_R6) \
+      || defined(CONFIG_64BIT))
 		/* we only have a 32-bit FPU */
 		return SIGFPE;
 #endif
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index ef9987a..1de190b 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -45,19 +45,19 @@
 		"	"__UA_ADDR "\t2b, 4b			\n"	\
 		"	.previous				\n"	\
 		: "=r" (ret), "=&r" (oldval),				\
-		  "=" GCC_OFF12_ASM() (*uaddr)				\
-		: "0" (0), GCC_OFF12_ASM() (*uaddr), "Jr" (oparg),	\
+		  "=" GCC_OFF_SMALL_ASM() (*uaddr)				\
+		: "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg),	\
 		  "i" (-EFAULT)						\
 		: "memory");						\
 	} else if (cpu_has_llsc) {					\
 		__asm__ __volatile__(					\
 		"	.set	push				\n"	\
 		"	.set	noat				\n"	\
-		"	.set	arch=r4000			\n"	\
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"1:	"user_ll("%1", "%4")" # __futex_atomic_op\n"	\
 		"	.set	mips0				\n"	\
 		"	" insn	"				\n"	\
-		"	.set	arch=r4000			\n"	\
+		"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"	\
 		"2:	"user_sc("$1", "%2")"			\n"	\
 		"	beqz	$1, 1b				\n"	\
 		__WEAK_LLSC_MB						\
@@ -74,8 +74,8 @@
 		"	"__UA_ADDR "\t2b, 4b			\n"	\
 		"	.previous				\n"	\
 		: "=r" (ret), "=&r" (oldval),				\
-		  "=" GCC_OFF12_ASM() (*uaddr)				\
-		: "0" (0), GCC_OFF12_ASM() (*uaddr), "Jr" (oparg),	\
+		  "=" GCC_OFF_SMALL_ASM() (*uaddr)				\
+		: "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg),	\
 		  "i" (-EFAULT)						\
 		: "memory");						\
 	} else								\
@@ -174,8 +174,8 @@
 		"	"__UA_ADDR "\t1b, 4b				\n"
 		"	"__UA_ADDR "\t2b, 4b				\n"
 		"	.previous					\n"
-		: "+r" (ret), "=&r" (val), "=" GCC_OFF12_ASM() (*uaddr)
-		: GCC_OFF12_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
+		: "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
+		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
 		  "i" (-EFAULT)
 		: "memory");
 	} else if (cpu_has_llsc) {
@@ -183,12 +183,12 @@
 		"# futex_atomic_cmpxchg_inatomic			\n"
 		"	.set	push					\n"
 		"	.set	noat					\n"
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"1:	"user_ll("%1", "%3")"				\n"
 		"	bne	%1, %z4, 3f				\n"
 		"	.set	mips0					\n"
 		"	move	$1, %z5					\n"
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"2:	"user_sc("$1", "%2")"				\n"
 		"	beqz	$1, 1b					\n"
 		__WEAK_LLSC_MB
@@ -203,8 +203,8 @@
 		"	"__UA_ADDR "\t1b, 4b				\n"
 		"	"__UA_ADDR "\t2b, 4b				\n"
 		"	.previous					\n"
-		: "+r" (ret), "=&r" (val), "=" GCC_OFF12_ASM() (*uaddr)
-		: GCC_OFF12_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
+		: "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
+		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
 		  "i" (-EFAULT)
 		: "memory");
 	} else
diff --git a/arch/mips/include/asm/gio_device.h b/arch/mips/include/asm/gio_device.h
index 4be1a57..71a986e 100644
--- a/arch/mips/include/asm/gio_device.h
+++ b/arch/mips/include/asm/gio_device.h
@@ -25,8 +25,6 @@
 
 	int  (*probe)(struct gio_device *, const struct gio_device_id *);
 	void (*remove)(struct gio_device *);
-	int  (*suspend)(struct gio_device *, pm_message_t);
-	int  (*resume)(struct gio_device *);
 	void (*shutdown)(struct gio_device *);
 
 	struct device_driver driver;
diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h
index e3ee92d..4087b47 100644
--- a/arch/mips/include/asm/hazards.h
+++ b/arch/mips/include/asm/hazards.h
@@ -11,6 +11,7 @@
 #define _ASM_HAZARDS_H
 
 #include <linux/stringify.h>
+#include <asm/compiler.h>
 
 #define ___ssnop							\
 	sll	$0, $0, 1
@@ -21,7 +22,7 @@
 /*
  * TLB hazards
  */
-#if defined(CONFIG_CPU_MIPSR2) && !defined(CONFIG_CPU_CAVIUM_OCTEON)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) && !defined(CONFIG_CPU_CAVIUM_OCTEON)
 
 /*
  * MIPSR2 defines ehb for hazard avoidance
@@ -58,7 +59,7 @@
 	unsigned long tmp;						\
 									\
 	__asm__ __volatile__(						\
-	"	.set	mips64r2				\n"	\
+	"	.set "MIPS_ISA_LEVEL"				\n"	\
 	"	dla	%0, 1f					\n"	\
 	"	jr.hb	%0					\n"	\
 	"	.set	mips0					\n"	\
@@ -132,7 +133,7 @@
 
 #define instruction_hazard()						\
 do {									\
-	if (cpu_has_mips_r2)						\
+	if (cpu_has_mips_r2_r6)						\
 		__instruction_hazard();					\
 } while (0)
 
@@ -240,7 +241,7 @@
 
 #define __disable_fpu_hazard
 
-#elif defined(CONFIG_CPU_MIPSR2)
+#elif defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 
 #define __enable_fpu_hazard						\
 	___ehb
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index 0fa5fdc..d60cc68 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -15,9 +15,10 @@
 
 #include <linux/compiler.h>
 #include <linux/stringify.h>
+#include <asm/compiler.h>
 #include <asm/hazards.h>
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined (CONFIG_CPU_MIPSR6)
 
 static inline void arch_local_irq_disable(void)
 {
@@ -118,7 +119,7 @@
 unsigned long arch_local_irq_save(void);
 void arch_local_irq_restore(unsigned long flags);
 void __arch_local_irq_restore(unsigned long flags);
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
 
 static inline void arch_local_irq_enable(void)
 {
@@ -126,7 +127,7 @@
 	"	.set	push						\n"
 	"	.set	reorder						\n"
 	"	.set	noat						\n"
-#if   defined(CONFIG_CPU_MIPSR2)
+#if   defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
 	"	ei							\n"
 #else
 	"	mfc0	$1,$12						\n"
diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
index 46dfc3c..8feaed6 100644
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -5,6 +5,7 @@
 #include <linux/bitops.h>
 #include <linux/atomic.h>
 #include <asm/cmpxchg.h>
+#include <asm/compiler.h>
 #include <asm/war.h>
 
 typedef struct
@@ -47,7 +48,7 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"1:"	__LL	"%1, %2		# local_add_return	\n"
 		"	addu	%0, %1, %3				\n"
 			__SC	"%0, %2					\n"
@@ -92,7 +93,7 @@
 		unsigned long temp;
 
 		__asm__ __volatile__(
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"1:"	__LL	"%1, %2		# local_sub_return	\n"
 		"	subu	%0, %1, %3				\n"
 			__SC	"%0, %2					\n"
diff --git a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
index 1668ee5..cf92fe7 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
@@ -8,11 +8,10 @@
 #ifndef __ASM_MACH_CAVIUM_OCTEON_KERNEL_ENTRY_H
 #define __ASM_MACH_CAVIUM_OCTEON_KERNEL_ENTRY_H
 
-
-#define CP0_CYCLE_COUNTER $9, 6
 #define CP0_CVMCTL_REG $9, 7
 #define CP0_CVMMEMCTL_REG $11,7
 #define CP0_PRID_REG $15, 0
+#define CP0_DCACHE_ERR_REG $27, 1
 #define CP0_PRID_OCTEON_PASS1 0x000d0000
 #define CP0_PRID_OCTEON_CN30XX 0x000d0200
 
@@ -38,36 +37,55 @@
 	# Needed for octeon specific memcpy
 	or  v0, v0, 0x5001
 	xor v0, v0, 0x1001
-	# Read the processor ID register
-	mfc0 v1, CP0_PRID_REG
-	# Disable instruction prefetching (Octeon Pass1 errata)
-	or  v0, v0, 0x2000
-	# Skip reenable of prefetching for Octeon Pass1
-	beq v1, CP0_PRID_OCTEON_PASS1, skip
-	nop
-	# Reenable instruction prefetching, not on Pass1
-	xor v0, v0, 0x2000
-	# Strip off pass number off of processor id
-	srl v1, 8
-	sll v1, 8
-	# CN30XX needs some extra stuff turned off for better performance
-	bne v1, CP0_PRID_OCTEON_CN30XX, skip
-	nop
-	# CN30XX Use random Icache replacement
-	or  v0, v0, 0x400
-	# CN30XX Disable instruction prefetching
-	or  v0, v0, 0x2000
-skip:
 	# First clear off CvmCtl[IPPCI] bit and move the performance
 	# counters interrupt to IRQ 6
-	li	v1, ~(7 << 7)
+	dli	v1, ~(7 << 7)
 	and	v0, v0, v1
 	ori	v0, v0, (6 << 7)
+
+	mfc0	v1, CP0_PRID_REG
+	and	t1, v1, 0xfff8
+	xor	t1, t1, 0x9000		# 63-P1
+	beqz	t1, 4f
+	and	t1, v1, 0xfff8
+	xor	t1, t1, 0x9008		# 63-P2
+	beqz	t1, 4f
+	and	t1, v1, 0xfff8
+	xor	t1, t1, 0x9100		# 68-P1
+	beqz	t1, 4f
+	and	t1, v1, 0xff00
+	xor	t1, t1, 0x9200		# 66-PX
+	bnez	t1, 5f			# Skip WAR for others.
+	and	t1, v1, 0x00ff
+	slti	t1, t1, 2		# 66-P1.2 and later good.
+	beqz	t1, 5f
+
+4:	# core-16057 work around
+	or	v0, v0, 0x2000		# Set IPREF bit.
+
+5:	# No core-16057 work around
 	# Write the cavium control register
 	dmtc0	v0, CP0_CVMCTL_REG
 	sync
 	# Flush dcache after config change
 	cache	9, 0($0)
+	# Zero all of CVMSEG to make sure parity is correct
+	dli	v0, CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE
+	dsll	v0, 7
+	beqz	v0, 2f
+1:	dsubu	v0, 8
+	sd	$0, -32768(v0)
+	bnez	v0, 1b
+2:
+	mfc0	v0, CP0_PRID_REG
+	bbit0	v0, 15, 1f
+	# OCTEON II or better have bit 15 set.  Clear the error bits.
+	and	t1, v0, 0xff00
+	dli	v0, 0x9500
+	bge	t1, v0, 1f  # OCTEON III has no DCACHE_ERR_REG COP0
+	dli	v0, 0x27
+	dmtc0	v0, CP0_DCACHE_ERR_REG
+1:
 	# Get my core id
 	rdhwr	v0, $0
 	# Jump the master to kernel_entry
diff --git a/arch/mips/include/asm/mach-cavium-octeon/war.h b/arch/mips/include/asm/mach-cavium-octeon/war.h
index eb72b35..35c80be 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/war.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/war.h
@@ -22,4 +22,7 @@
 #define R10000_LLSC_WAR			0
 #define MIPS34K_MISSED_ITLB_WAR		0
 
+#define CAVIUM_OCTEON_DCACHE_PREFETCH_WAR	\
+	OCTEON_IS_MODEL(OCTEON_CN6XXX)
+
 #endif /* __ASM_MIPS_MACH_CAVIUM_OCTEON_WAR_H */
diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h
index 2e54b4b..90dbe43 100644
--- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h
+++ b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h
@@ -85,8 +85,8 @@
 	"	"__beqz"%0, 1b				\n"
 	"	nop					\n"
 	"	.set	pop				\n"
-	: "=&r" (temp), "=" GCC_OFF12_ASM() (*addr)
-	: "ir" (~mask), "ir" (value), GCC_OFF12_ASM() (*addr));
+	: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr)
+	: "ir" (~mask), "ir" (value), GCC_OFF_SMALL_ASM() (*addr));
 }
 
 /*
@@ -106,8 +106,8 @@
 	"	"__beqz"%0, 1b				\n"
 	"	nop					\n"
 	"	.set	pop				\n"
-	: "=&r" (temp), "=" GCC_OFF12_ASM() (*addr)
-	: "ir" (mask), GCC_OFF12_ASM() (*addr));
+	: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr)
+	: "ir" (mask), GCC_OFF_SMALL_ASM() (*addr));
 }
 
 /*
@@ -127,8 +127,8 @@
 	"	"__beqz"%0, 1b				\n"
 	"	nop					\n"
 	"	.set	pop				\n"
-	: "=&r" (temp), "=" GCC_OFF12_ASM() (*addr)
-	: "ir" (~mask), GCC_OFF12_ASM() (*addr));
+	: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr)
+	: "ir" (~mask), GCC_OFF_SMALL_ASM() (*addr));
 }
 
 /*
@@ -148,8 +148,8 @@
 	"	"__beqz"%0, 1b				\n"
 	"	nop					\n"
 	"	.set	pop				\n"
-	: "=&r" (temp), "=" GCC_OFF12_ASM() (*addr)
-	: "ir" (mask), GCC_OFF12_ASM() (*addr));
+	: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr)
+	: "ir" (mask), GCC_OFF_SMALL_ASM() (*addr));
 }
 
 /*
@@ -220,8 +220,8 @@
 	"	.set	arch=r4000			\n"	\
 	"1:	ll	%0, %1	#custom_read_reg32	\n"	\
 	"	.set	pop				\n"	\
-	: "=r" (tmp), "=" GCC_OFF12_ASM() (*address)		\
-	: GCC_OFF12_ASM() (*address))
+	: "=r" (tmp), "=" GCC_OFF_SMALL_ASM() (*address)		\
+	: GCC_OFF_SMALL_ASM() (*address))
 
 #define custom_write_reg32(address, tmp)			\
 	__asm__ __volatile__(					\
@@ -231,7 +231,7 @@
 	"	"__beqz"%0, 1b				\n"	\
 	"	nop					\n"	\
 	"	.set	pop				\n"	\
-	: "=&r" (tmp), "=" GCC_OFF12_ASM() (*address)		\
-	: "0" (tmp), GCC_OFF12_ASM() (*address))
+	: "=&r" (tmp), "=" GCC_OFF_SMALL_ASM() (*address)		\
+	: "0" (tmp), GCC_OFF_SMALL_ASM() (*address))
 
 #endif	/* __ASM_REGOPS_H__ */
diff --git a/arch/mips/include/asm/mips-r2-to-r6-emul.h b/arch/mips/include/asm/mips-r2-to-r6-emul.h
new file mode 100644
index 0000000..60570f2
--- /dev/null
+++ b/arch/mips/include/asm/mips-r2-to-r6-emul.h
@@ -0,0 +1,96 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ */
+
+#ifndef __ASM_MIPS_R2_TO_R6_EMUL_H
+#define __ASM_MIPS_R2_TO_R6_EMUL_H
+
+struct mips_r2_emulator_stats {
+	u64 movs;
+	u64 hilo;
+	u64 muls;
+	u64 divs;
+	u64 dsps;
+	u64 bops;
+	u64 traps;
+	u64 fpus;
+	u64 loads;
+	u64 stores;
+	u64 llsc;
+	u64 dsemul;
+};
+
+struct mips_r2br_emulator_stats {
+	u64 jrs;
+	u64 bltzl;
+	u64 bgezl;
+	u64 bltzll;
+	u64 bgezll;
+	u64 bltzall;
+	u64 bgezall;
+	u64 bltzal;
+	u64 bgezal;
+	u64 beql;
+	u64 bnel;
+	u64 blezl;
+	u64 bgtzl;
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+#define MIPS_R2_STATS(M)						\
+do {									\
+	u32 nir;							\
+	int err;							\
+									\
+	preempt_disable();						\
+	__this_cpu_inc(mipsr2emustats.M);				\
+	err = __get_user(nir, (u32 __user *)regs->cp0_epc);		\
+	if (!err) {							\
+		if (nir == BREAK_MATH)					\
+			__this_cpu_inc(mipsr2bdemustats.M);		\
+	}								\
+	preempt_enable();						\
+} while (0)
+
+#define MIPS_R2BR_STATS(M)					\
+do {								\
+	preempt_disable();					\
+	__this_cpu_inc(mipsr2bremustats.M);			\
+	preempt_enable();					\
+} while (0)
+
+#else
+
+#define MIPS_R2_STATS(M)          do { } while (0)
+#define MIPS_R2BR_STATS(M)        do { } while (0)
+
+#endif /* CONFIG_DEBUG_FS */
+
+struct r2_decoder_table {
+	u32     mask;
+	u32     code;
+	int     (*func)(struct pt_regs *regs, u32 inst);
+};
+
+
+extern void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
+			  const char *str);
+
+#ifndef CONFIG_MIPSR2_TO_R6_EMULATOR
+static int mipsr2_emulation;
+static __maybe_unused int mipsr2_decoder(struct pt_regs *regs, u32 inst) { return 0; };
+#else
+/* MIPS R2 Emulator ON/OFF */
+extern int mipsr2_emulation;
+extern int mipsr2_decoder(struct pt_regs *regs, u32 inst);
+#endif /* CONFIG_MIPSR2_TO_R6_EMULATOR */
+
+#define NO_R6EMU	(cpu_has_mips_r6 && !mipsr2_emulation)
+
+#endif /* __ASM_MIPS_R2_TO_R6_EMUL_H */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 5b720d8..fef0044 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -653,6 +653,7 @@
 #define MIPS_CONF5_NF		(_ULCAST_(1) << 0)
 #define MIPS_CONF5_UFR		(_ULCAST_(1) << 2)
 #define MIPS_CONF5_MRP		(_ULCAST_(1) << 3)
+#define MIPS_CONF5_LLB		(_ULCAST_(1) << 4)
 #define MIPS_CONF5_MVH		(_ULCAST_(1) << 5)
 #define MIPS_CONF5_FRE		(_ULCAST_(1) << 8)
 #define MIPS_CONF5_UFE		(_ULCAST_(1) << 9)
@@ -1127,6 +1128,8 @@
 #define write_c0_config6(val)	__write_32bit_c0_register($16, 6, val)
 #define write_c0_config7(val)	__write_32bit_c0_register($16, 7, val)
 
+#define read_c0_lladdr()	__read_ulong_c0_register($17, 0)
+#define write_c0_lladdr(val)	__write_ulong_c0_register($17, 0, val)
 #define read_c0_maar()		__read_ulong_c0_register($17, 1)
 #define write_c0_maar(val)	__write_ulong_c0_register($17, 1, val)
 #define read_c0_maari()		__read_32bit_c0_register($17, 2)
@@ -1909,6 +1912,7 @@
 __BUILD_SET_C0(intcontrol)
 __BUILD_SET_C0(intctl)
 __BUILD_SET_C0(srsmap)
+__BUILD_SET_C0(pagegrain)
 __BUILD_SET_C0(brcm_config_0)
 __BUILD_SET_C0(brcm_bus_pll)
 __BUILD_SET_C0(brcm_reset)
diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index c436138..1afa1f9 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -1,9 +1,12 @@
 #ifndef __ASM_MMU_H
 #define __ASM_MMU_H
 
+#include <linux/atomic.h>
+
 typedef struct {
 	unsigned long asid[NR_CPUS];
 	void *vdso;
+	atomic_t fp_mode_switching;
 } mm_context_t;
 
 #endif /* __ASM_MMU_H */
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 2f82568..45914b5 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -25,7 +25,6 @@
 	if (cpu_has_htw) {						\
 		write_c0_pwbase(pgd);					\
 		back_to_back_c0_hazard();				\
-		htw_reset();						\
 	}								\
 } while (0)
 
@@ -132,6 +131,8 @@
 	for_each_possible_cpu(i)
 		cpu_context(i, mm) = 0;
 
+	atomic_set(&mm->context.fp_mode_switching, 0);
+
 	return 0;
 }
 
@@ -142,6 +143,7 @@
 	unsigned long flags;
 	local_irq_save(flags);
 
+	htw_stop();
 	/* Check if our ASID is of an older version and thus invalid */
 	if ((cpu_context(cpu, next) ^ asid_cache(cpu)) & ASID_VERSION_MASK)
 		get_new_mmu_context(next, cpu);
@@ -154,6 +156,7 @@
 	 */
 	cpumask_clear_cpu(cpu, mm_cpumask(prev));
 	cpumask_set_cpu(cpu, mm_cpumask(next));
+	htw_start();
 
 	local_irq_restore(flags);
 }
@@ -180,6 +183,7 @@
 
 	local_irq_save(flags);
 
+	htw_stop();
 	/* Unconditionally get a new ASID.  */
 	get_new_mmu_context(next, cpu);
 
@@ -189,6 +193,7 @@
 	/* mark mmu ownership change */
 	cpumask_clear_cpu(cpu, mm_cpumask(prev));
 	cpumask_set_cpu(cpu, mm_cpumask(next));
+	htw_start();
 
 	local_irq_restore(flags);
 }
@@ -203,6 +208,7 @@
 	unsigned long flags;
 
 	local_irq_save(flags);
+	htw_stop();
 
 	if (cpumask_test_cpu(cpu, mm_cpumask(mm)))  {
 		get_new_mmu_context(mm, cpu);
@@ -211,6 +217,7 @@
 		/* will get a new context next time */
 		cpu_context(cpu, mm) = 0;
 	}
+	htw_start();
 	local_irq_restore(flags);
 }
 
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index 800fe57..0aaf9a0 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -88,10 +88,14 @@
 #define MODULE_PROC_FAMILY "MIPS32_R1 "
 #elif defined CONFIG_CPU_MIPS32_R2
 #define MODULE_PROC_FAMILY "MIPS32_R2 "
+#elif defined CONFIG_CPU_MIPS32_R6
+#define MODULE_PROC_FAMILY "MIPS32_R6 "
 #elif defined CONFIG_CPU_MIPS64_R1
 #define MODULE_PROC_FAMILY "MIPS64_R1 "
 #elif defined CONFIG_CPU_MIPS64_R2
 #define MODULE_PROC_FAMILY "MIPS64_R2 "
+#elif defined CONFIG_CPU_MIPS64_R6
+#define MODULE_PROC_FAMILY "MIPS64_R6 "
 #elif defined CONFIG_CPU_R3000
 #define MODULE_PROC_FAMILY "R3000 "
 #elif defined CONFIG_CPU_TX39XX
diff --git a/arch/mips/include/asm/octeon/cvmx-cmd-queue.h b/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
index 75739c8..8d05d90 100644
--- a/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
+++ b/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
@@ -275,7 +275,7 @@
 		" lbu	%[ticket], %[now_serving]\n"
 		"4:\n"
 		".set pop\n" :
-		[ticket_ptr] "=" GCC_OFF12_ASM()(__cvmx_cmd_queue_state_ptr->ticket[__cvmx_cmd_queue_get_index(queue_id)]),
+		[ticket_ptr] "=" GCC_OFF_SMALL_ASM()(__cvmx_cmd_queue_state_ptr->ticket[__cvmx_cmd_queue_get_index(queue_id)]),
 		[now_serving] "=m"(qptr->now_serving), [ticket] "=r"(tmp),
 		[my_ticket] "=r"(my_ticket)
 	    );
diff --git a/arch/mips/include/asm/octeon/cvmx-rst-defs.h b/arch/mips/include/asm/octeon/cvmx-rst-defs.h
new file mode 100644
index 0000000..0c9c3e7
--- /dev/null
+++ b/arch/mips/include/asm/octeon/cvmx-rst-defs.h
@@ -0,0 +1,306 @@
+/***********************license start***************
+ * Author: Cavium Inc.
+ *
+ * Contact: support@cavium.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2014 Cavium Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Inc. for more information
+ ***********************license end**************************************/
+
+#ifndef __CVMX_RST_DEFS_H__
+#define __CVMX_RST_DEFS_H__
+
+#define CVMX_RST_BOOT (CVMX_ADD_IO_SEG(0x0001180006001600ull))
+#define CVMX_RST_CFG (CVMX_ADD_IO_SEG(0x0001180006001610ull))
+#define CVMX_RST_CKILL (CVMX_ADD_IO_SEG(0x0001180006001638ull))
+#define CVMX_RST_CTLX(offset) (CVMX_ADD_IO_SEG(0x0001180006001640ull) + ((offset) & 3) * 8)
+#define CVMX_RST_DELAY (CVMX_ADD_IO_SEG(0x0001180006001608ull))
+#define CVMX_RST_ECO (CVMX_ADD_IO_SEG(0x00011800060017B8ull))
+#define CVMX_RST_INT (CVMX_ADD_IO_SEG(0x0001180006001628ull))
+#define CVMX_RST_OCX (CVMX_ADD_IO_SEG(0x0001180006001618ull))
+#define CVMX_RST_POWER_DBG (CVMX_ADD_IO_SEG(0x0001180006001708ull))
+#define CVMX_RST_PP_POWER (CVMX_ADD_IO_SEG(0x0001180006001700ull))
+#define CVMX_RST_SOFT_PRSTX(offset) (CVMX_ADD_IO_SEG(0x00011800060016C0ull) + ((offset) & 3) * 8)
+#define CVMX_RST_SOFT_RST (CVMX_ADD_IO_SEG(0x0001180006001680ull))
+
+union cvmx_rst_boot {
+	uint64_t u64;
+	struct cvmx_rst_boot_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t chipkill:1;
+		uint64_t jtcsrdis:1;
+		uint64_t ejtagdis:1;
+		uint64_t romen:1;
+		uint64_t ckill_ppdis:1;
+		uint64_t jt_tstmode:1;
+		uint64_t vrm_err:1;
+		uint64_t reserved_37_56:20;
+		uint64_t c_mul:7;
+		uint64_t pnr_mul:6;
+		uint64_t reserved_21_23:3;
+		uint64_t lboot_oci:3;
+		uint64_t lboot_ext:6;
+		uint64_t lboot:10;
+		uint64_t rboot:1;
+		uint64_t rboot_pin:1;
+#else
+		uint64_t rboot_pin:1;
+		uint64_t rboot:1;
+		uint64_t lboot:10;
+		uint64_t lboot_ext:6;
+		uint64_t lboot_oci:3;
+		uint64_t reserved_21_23:3;
+		uint64_t pnr_mul:6;
+		uint64_t c_mul:7;
+		uint64_t reserved_37_56:20;
+		uint64_t vrm_err:1;
+		uint64_t jt_tstmode:1;
+		uint64_t ckill_ppdis:1;
+		uint64_t romen:1;
+		uint64_t ejtagdis:1;
+		uint64_t jtcsrdis:1;
+		uint64_t chipkill:1;
+#endif
+	} s;
+	struct cvmx_rst_boot_s cn70xx;
+	struct cvmx_rst_boot_s cn70xxp1;
+	struct cvmx_rst_boot_s cn78xx;
+};
+
+union cvmx_rst_cfg {
+	uint64_t u64;
+	struct cvmx_rst_cfg_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t bist_delay:58;
+		uint64_t reserved_3_5:3;
+		uint64_t cntl_clr_bist:1;
+		uint64_t warm_clr_bist:1;
+		uint64_t soft_clr_bist:1;
+#else
+		uint64_t soft_clr_bist:1;
+		uint64_t warm_clr_bist:1;
+		uint64_t cntl_clr_bist:1;
+		uint64_t reserved_3_5:3;
+		uint64_t bist_delay:58;
+#endif
+	} s;
+	struct cvmx_rst_cfg_s cn70xx;
+	struct cvmx_rst_cfg_s cn70xxp1;
+	struct cvmx_rst_cfg_s cn78xx;
+};
+
+union cvmx_rst_ckill {
+	uint64_t u64;
+	struct cvmx_rst_ckill_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_47_63:17;
+		uint64_t timer:47;
+#else
+		uint64_t timer:47;
+		uint64_t reserved_47_63:17;
+#endif
+	} s;
+	struct cvmx_rst_ckill_s cn70xx;
+	struct cvmx_rst_ckill_s cn70xxp1;
+	struct cvmx_rst_ckill_s cn78xx;
+};
+
+union cvmx_rst_ctlx {
+	uint64_t u64;
+	struct cvmx_rst_ctlx_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_10_63:54;
+		uint64_t prst_link:1;
+		uint64_t rst_done:1;
+		uint64_t rst_link:1;
+		uint64_t host_mode:1;
+		uint64_t reserved_4_5:2;
+		uint64_t rst_drv:1;
+		uint64_t rst_rcv:1;
+		uint64_t rst_chip:1;
+		uint64_t rst_val:1;
+#else
+		uint64_t rst_val:1;
+		uint64_t rst_chip:1;
+		uint64_t rst_rcv:1;
+		uint64_t rst_drv:1;
+		uint64_t reserved_4_5:2;
+		uint64_t host_mode:1;
+		uint64_t rst_link:1;
+		uint64_t rst_done:1;
+		uint64_t prst_link:1;
+		uint64_t reserved_10_63:54;
+#endif
+	} s;
+	struct cvmx_rst_ctlx_s cn70xx;
+	struct cvmx_rst_ctlx_s cn70xxp1;
+	struct cvmx_rst_ctlx_s cn78xx;
+};
+
+union cvmx_rst_delay {
+	uint64_t u64;
+	struct cvmx_rst_delay_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_32_63:32;
+		uint64_t warm_rst_dly:16;
+		uint64_t soft_rst_dly:16;
+#else
+		uint64_t soft_rst_dly:16;
+		uint64_t warm_rst_dly:16;
+		uint64_t reserved_32_63:32;
+#endif
+	} s;
+	struct cvmx_rst_delay_s cn70xx;
+	struct cvmx_rst_delay_s cn70xxp1;
+	struct cvmx_rst_delay_s cn78xx;
+};
+
+union cvmx_rst_eco {
+	uint64_t u64;
+	struct cvmx_rst_eco_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_32_63:32;
+		uint64_t eco_rw:32;
+#else
+		uint64_t eco_rw:32;
+		uint64_t reserved_32_63:32;
+#endif
+	} s;
+	struct cvmx_rst_eco_s cn78xx;
+};
+
+union cvmx_rst_int {
+	uint64_t u64;
+	struct cvmx_rst_int_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_12_63:52;
+		uint64_t perst:4;
+		uint64_t reserved_4_7:4;
+		uint64_t rst_link:4;
+#else
+		uint64_t rst_link:4;
+		uint64_t reserved_4_7:4;
+		uint64_t perst:4;
+		uint64_t reserved_12_63:52;
+#endif
+	} s;
+	struct cvmx_rst_int_cn70xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_11_63:53;
+		uint64_t perst:3;
+		uint64_t reserved_3_7:5;
+		uint64_t rst_link:3;
+#else
+		uint64_t rst_link:3;
+		uint64_t reserved_3_7:5;
+		uint64_t perst:3;
+		uint64_t reserved_11_63:53;
+#endif
+	} cn70xx;
+	struct cvmx_rst_int_cn70xx cn70xxp1;
+	struct cvmx_rst_int_s cn78xx;
+};
+
+union cvmx_rst_ocx {
+	uint64_t u64;
+	struct cvmx_rst_ocx_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_3_63:61;
+		uint64_t rst_link:3;
+#else
+		uint64_t rst_link:3;
+		uint64_t reserved_3_63:61;
+#endif
+	} s;
+	struct cvmx_rst_ocx_s cn78xx;
+};
+
+union cvmx_rst_power_dbg {
+	uint64_t u64;
+	struct cvmx_rst_power_dbg_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_3_63:61;
+		uint64_t str:3;
+#else
+		uint64_t str:3;
+		uint64_t reserved_3_63:61;
+#endif
+	} s;
+	struct cvmx_rst_power_dbg_s cn78xx;
+};
+
+union cvmx_rst_pp_power {
+	uint64_t u64;
+	struct cvmx_rst_pp_power_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_48_63:16;
+		uint64_t gate:48;
+#else
+		uint64_t gate:48;
+		uint64_t reserved_48_63:16;
+#endif
+	} s;
+	struct cvmx_rst_pp_power_cn70xx {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_4_63:60;
+		uint64_t gate:4;
+#else
+		uint64_t gate:4;
+		uint64_t reserved_4_63:60;
+#endif
+	} cn70xx;
+	struct cvmx_rst_pp_power_cn70xx cn70xxp1;
+	struct cvmx_rst_pp_power_s cn78xx;
+};
+
+union cvmx_rst_soft_prstx {
+	uint64_t u64;
+	struct cvmx_rst_soft_prstx_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_1_63:63;
+		uint64_t soft_prst:1;
+#else
+		uint64_t soft_prst:1;
+		uint64_t reserved_1_63:63;
+#endif
+	} s;
+	struct cvmx_rst_soft_prstx_s cn70xx;
+	struct cvmx_rst_soft_prstx_s cn70xxp1;
+	struct cvmx_rst_soft_prstx_s cn78xx;
+};
+
+union cvmx_rst_soft_rst {
+	uint64_t u64;
+	struct cvmx_rst_soft_rst_s {
+#ifdef __BIG_ENDIAN_BITFIELD
+		uint64_t reserved_1_63:63;
+		uint64_t soft_rst:1;
+#else
+		uint64_t soft_rst:1;
+		uint64_t reserved_1_63:63;
+#endif
+	} s;
+	struct cvmx_rst_soft_rst_s cn70xx;
+	struct cvmx_rst_soft_rst_s cn70xxp1;
+	struct cvmx_rst_soft_rst_s cn78xx;
+};
+
+#endif
diff --git a/arch/mips/include/asm/octeon/octeon-model.h b/arch/mips/include/asm/octeon/octeon-model.h
index e8a1c2f..92b377e 100644
--- a/arch/mips/include/asm/octeon/octeon-model.h
+++ b/arch/mips/include/asm/octeon/octeon-model.h
@@ -45,6 +45,7 @@
  */
 
 #define OCTEON_FAMILY_MASK	0x00ffff00
+#define OCTEON_PRID_MASK	0x00ffffff
 
 /* Flag bits in top byte */
 /* Ignores revision in model checks */
@@ -63,11 +64,52 @@
 #define OM_MATCH_6XXX_FAMILY_MODELS	0x40000000
 /* Match all cnf7XXX Octeon models. */
 #define OM_MATCH_F7XXX_FAMILY_MODELS	0x80000000
+/* Match all cn7XXX Octeon models. */
+#define OM_MATCH_7XXX_FAMILY_MODELS     0x10000000
+#define OM_MATCH_FAMILY_MODELS		(OM_MATCH_5XXX_FAMILY_MODELS |	\
+					 OM_MATCH_6XXX_FAMILY_MODELS |	\
+					 OM_MATCH_F7XXX_FAMILY_MODELS | \
+					 OM_MATCH_7XXX_FAMILY_MODELS)
+/*
+ * CN7XXX models with new revision encoding
+ */
+
+#define OCTEON_CN73XX_PASS1_0	0x000d9700
+#define OCTEON_CN73XX		(OCTEON_CN73XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN73XX_PASS1_X	(OCTEON_CN73XX_PASS1_0 | \
+				 OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN70XX_PASS1_0	0x000d9600
+#define OCTEON_CN70XX_PASS1_1	0x000d9601
+#define OCTEON_CN70XX_PASS1_2	0x000d9602
+
+#define OCTEON_CN70XX_PASS2_0	0x000d9608
+
+#define OCTEON_CN70XX		(OCTEON_CN70XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN70XX_PASS1_X	(OCTEON_CN70XX_PASS1_0 | \
+				 OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN70XX_PASS2_X	(OCTEON_CN70XX_PASS2_0 | \
+				 OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN71XX		OCTEON_CN70XX
+
+#define OCTEON_CN78XX_PASS1_0	0x000d9500
+#define OCTEON_CN78XX_PASS1_1	0x000d9501
+#define OCTEON_CN78XX_PASS2_0	0x000d9508
+
+#define OCTEON_CN78XX		(OCTEON_CN78XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN78XX_PASS1_X	(OCTEON_CN78XX_PASS1_0 | \
+				 OM_IGNORE_MINOR_REVISION)
+#define OCTEON_CN78XX_PASS2_X	(OCTEON_CN78XX_PASS2_0 | \
+				 OM_IGNORE_MINOR_REVISION)
+
+#define OCTEON_CN76XX		(0x000d9540 | OM_CHECK_SUBMODEL)
 
 /*
  * CNF7XXX models with new revision encoding
  */
 #define OCTEON_CNF71XX_PASS1_0	0x000d9400
+#define OCTEON_CNF71XX_PASS1_1  0x000d9401
 
 #define OCTEON_CNF71XX		(OCTEON_CNF71XX_PASS1_0 | OM_IGNORE_REVISION)
 #define OCTEON_CNF71XX_PASS1_X	(OCTEON_CNF71XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
@@ -79,6 +121,8 @@
 #define OCTEON_CN68XX_PASS1_1	0x000d9101
 #define OCTEON_CN68XX_PASS1_2	0x000d9102
 #define OCTEON_CN68XX_PASS2_0	0x000d9108
+#define OCTEON_CN68XX_PASS2_1   0x000d9109
+#define OCTEON_CN68XX_PASS2_2   0x000d910a
 
 #define OCTEON_CN68XX		(OCTEON_CN68XX_PASS2_0 | OM_IGNORE_REVISION)
 #define OCTEON_CN68XX_PASS1_X	(OCTEON_CN68XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
@@ -104,11 +148,18 @@
 #define OCTEON_CN63XX_PASS1_X	(OCTEON_CN63XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
 #define OCTEON_CN63XX_PASS2_X	(OCTEON_CN63XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
 
+/* CN62XX is same as CN63XX with 1 MB cache */
+#define OCTEON_CN62XX           OCTEON_CN63XX
+
 #define OCTEON_CN61XX_PASS1_0	0x000d9300
+#define OCTEON_CN61XX_PASS1_1   0x000d9301
 
 #define OCTEON_CN61XX		(OCTEON_CN61XX_PASS1_0 | OM_IGNORE_REVISION)
 #define OCTEON_CN61XX_PASS1_X	(OCTEON_CN61XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
 
+/* CN60XX is same as CN61XX with 512 KB cache */
+#define OCTEON_CN60XX           OCTEON_CN61XX
+
 /*
  * CN5XXX models with new revision encoding
  */
@@ -120,7 +171,7 @@
 #define OCTEON_CN58XX_PASS2_2	0x000d030a
 #define OCTEON_CN58XX_PASS2_3	0x000d030b
 
-#define OCTEON_CN58XX		(OCTEON_CN58XX_PASS1_0 | OM_IGNORE_REVISION)
+#define OCTEON_CN58XX		(OCTEON_CN58XX_PASS2_0 | OM_IGNORE_REVISION)
 #define OCTEON_CN58XX_PASS1_X	(OCTEON_CN58XX_PASS1_0 | OM_IGNORE_MINOR_REVISION)
 #define OCTEON_CN58XX_PASS2_X	(OCTEON_CN58XX_PASS2_0 | OM_IGNORE_MINOR_REVISION)
 #define OCTEON_CN58XX_PASS1	OCTEON_CN58XX_PASS1_X
@@ -217,12 +268,10 @@
 #define OCTEON_CN3XXX		(OCTEON_CN58XX_PASS1_0 | OM_MATCH_PREVIOUS_MODELS | OM_IGNORE_REVISION)
 #define OCTEON_CN5XXX		(OCTEON_CN58XX_PASS1_0 | OM_MATCH_5XXX_FAMILY_MODELS)
 #define OCTEON_CN6XXX		(OCTEON_CN63XX_PASS1_0 | OM_MATCH_6XXX_FAMILY_MODELS)
-
-/* These are used to cover entire families of OCTEON processors */
-#define OCTEON_FAM_1		(OCTEON_CN3XXX)
-#define OCTEON_FAM_PLUS		(OCTEON_CN5XXX)
-#define OCTEON_FAM_1_PLUS	(OCTEON_FAM_PLUS | OM_MATCH_PREVIOUS_MODELS)
-#define OCTEON_FAM_2		(OCTEON_CN6XXX)
+#define OCTEON_CNF7XXX		(OCTEON_CNF71XX_PASS1_0 | \
+				 OM_MATCH_F7XXX_FAMILY_MODELS)
+#define OCTEON_CN7XXX		(OCTEON_CN78XX_PASS1_0 | \
+				 OM_MATCH_7XXX_FAMILY_MODELS)
 
 /* The revision byte (low byte) has two different encodings.
  * CN3XXX:
@@ -232,7 +281,7 @@
  *     <4>:   alternate package
  *     <3:0>: revision
  *
- * CN5XXX:
+ * CN5XXX and older models:
  *
  *     bits
  *     <7>:   reserved (0)
@@ -251,17 +300,21 @@
 /* CN5XXX and later use different layout of bits in the revision ID field */
 #define OCTEON_58XX_FAMILY_MASK	     OCTEON_38XX_FAMILY_MASK
 #define OCTEON_58XX_FAMILY_REV_MASK  0x00ffff3f
-#define OCTEON_58XX_MODEL_MASK	     0x00ffffc0
+#define OCTEON_58XX_MODEL_MASK	     0x00ffff40
 #define OCTEON_58XX_MODEL_REV_MASK   (OCTEON_58XX_FAMILY_REV_MASK | OCTEON_58XX_MODEL_MASK)
-#define OCTEON_58XX_MODEL_MINOR_REV_MASK (OCTEON_58XX_MODEL_REV_MASK & 0x00fffff8)
+#define OCTEON_58XX_MODEL_MINOR_REV_MASK (OCTEON_58XX_MODEL_REV_MASK & 0x00ffff38)
 #define OCTEON_5XXX_MODEL_MASK	     0x00ff0fc0
 
-/* forward declarations */
 static inline uint32_t cvmx_get_proc_id(void) __attribute__ ((pure));
 static inline uint64_t cvmx_read_csr(uint64_t csr_addr);
 
 #define __OCTEON_MATCH_MASK__(x, y, z) (((x) & (z)) == ((y) & (z)))
 
+/*
+ * __OCTEON_IS_MODEL_COMPILE__(arg_model, chip_model)
+ * returns true if chip_model is identical or belong to the OCTEON
+ * model group specified in arg_model.
+ */
 /* NOTE: This for internal use only! */
 #define __OCTEON_IS_MODEL_COMPILE__(arg_model, chip_model)		\
 ((((arg_model & OCTEON_38XX_FAMILY_MASK) < OCTEON_CN58XX_PASS1_0)  && ( \
@@ -286,11 +339,18 @@
 		((((arg_model) & (OM_FLAG_MASK)) == OM_IGNORE_REVISION) \
 			&& __OCTEON_MATCH_MASK__((chip_model), (arg_model), OCTEON_58XX_FAMILY_MASK)) || \
 		((((arg_model) & (OM_FLAG_MASK)) == OM_CHECK_SUBMODEL)	\
-			&& __OCTEON_MATCH_MASK__((chip_model), (arg_model), OCTEON_58XX_MODEL_REV_MASK)) || \
+			&& __OCTEON_MATCH_MASK__((chip_model), (arg_model), OCTEON_58XX_MODEL_MASK)) || \
 		((((arg_model) & (OM_MATCH_5XXX_FAMILY_MODELS)) == OM_MATCH_5XXX_FAMILY_MODELS) \
-			&& ((chip_model) >= OCTEON_CN58XX_PASS1_0) && ((chip_model) < OCTEON_CN63XX_PASS1_0)) || \
+			&& ((chip_model & OCTEON_PRID_MASK) >= OCTEON_CN58XX_PASS1_0) \
+			&& ((chip_model & OCTEON_PRID_MASK) < OCTEON_CN63XX_PASS1_0)) || \
 		((((arg_model) & (OM_MATCH_6XXX_FAMILY_MODELS)) == OM_MATCH_6XXX_FAMILY_MODELS) \
-			&& ((chip_model) >= OCTEON_CN63XX_PASS1_0)) ||	\
+			&& ((chip_model & OCTEON_PRID_MASK) >= OCTEON_CN63XX_PASS1_0) \
+			&& ((chip_model & OCTEON_PRID_MASK) < OCTEON_CNF71XX_PASS1_0)) || \
+		((((arg_model) & (OM_MATCH_F7XXX_FAMILY_MODELS)) == OM_MATCH_F7XXX_FAMILY_MODELS) \
+			&& ((chip_model & OCTEON_PRID_MASK) >= OCTEON_CNF71XX_PASS1_0) \
+			&& ((chip_model & OCTEON_PRID_MASK) < OCTEON_CN78XX_PASS1_0)) || \
+		((((arg_model) & (OM_MATCH_7XXX_FAMILY_MODELS)) == OM_MATCH_7XXX_FAMILY_MODELS) \
+			&& ((chip_model & OCTEON_PRID_MASK) >= OCTEON_CN78XX_PASS1_0)) || \
 		((((arg_model) & (OM_MATCH_PREVIOUS_MODELS)) == OM_MATCH_PREVIOUS_MODELS) \
 			&& (((chip_model) & OCTEON_58XX_MODEL_MASK) < ((arg_model) & OCTEON_58XX_MODEL_MASK))) \
 		)))
@@ -300,14 +360,6 @@
 {
 	uint32_t cpuid = cvmx_get_proc_id();
 
-	/*
-	 * Check for special case of mismarked 3005 samples. We only
-	 * need to check if the sub model isn't being ignored
-	 */
-	if ((model & OM_CHECK_SUBMODEL) == OM_CHECK_SUBMODEL) {
-		if (cpuid == OCTEON_CN3010_PASS1 && (cvmx_read_csr(0x80011800800007B8ull) & (1ull << 34)))
-			cpuid |= 0x10;
-	}
 	return __OCTEON_IS_MODEL_COMPILE__(model, cpuid);
 }
 
@@ -326,10 +378,21 @@
 #define OCTEON_IS_COMMON_BINARY() 1
 #undef OCTEON_MODEL
 
+#define OCTEON_IS_OCTEON1()	OCTEON_IS_MODEL(OCTEON_CN3XXX)
+#define OCTEON_IS_OCTEONPLUS()	OCTEON_IS_MODEL(OCTEON_CN5XXX)
+#define OCTEON_IS_OCTEON2()						\
+	(OCTEON_IS_MODEL(OCTEON_CN6XXX) || OCTEON_IS_MODEL(OCTEON_CNF71XX))
+
+#define OCTEON_IS_OCTEON3()	OCTEON_IS_MODEL(OCTEON_CN7XXX)
+
+#define OCTEON_IS_OCTEON1PLUS()	(OCTEON_IS_OCTEON1() || OCTEON_IS_OCTEONPLUS())
+
 const char *__init octeon_model_get_string(uint32_t chip_id);
 
 /*
  * Return the octeon family, i.e., ProcessorID of the PrID register.
+ *
+ * @return the octeon family on success, ((unint32_t)-1) on error.
  */
 static inline uint32_t cvmx_get_octeon_family(void)
 {
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index 6dfefd2..0415965 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -9,6 +9,7 @@
 #define __ASM_OCTEON_OCTEON_H
 
 #include <asm/octeon/cvmx.h>
+#include <asm/bitfield.h>
 
 extern uint64_t octeon_bootmem_alloc_range_phys(uint64_t size,
 						uint64_t alignment,
@@ -53,6 +54,7 @@
 #define OCTOEN_SERIAL_LEN	20
 
 struct octeon_boot_descriptor {
+#ifdef __BIG_ENDIAN_BITFIELD
 	/* Start of block referenced by assembly code - do not change! */
 	uint32_t desc_version;
 	uint32_t desc_size;
@@ -104,77 +106,149 @@
 	uint8_t mac_addr_base[6];
 	uint8_t mac_addr_count;
 	uint64_t cvmx_desc_vaddr;
+#else
+	uint32_t desc_size;
+	uint32_t desc_version;
+	uint64_t stack_top;
+	uint64_t heap_base;
+	uint64_t heap_end;
+	/* Only used by bootloader */
+	uint64_t entry_point;
+	uint64_t desc_vaddr;
+	/* End of This block referenced by assembly code - do not change! */
+	uint32_t stack_size;
+	uint32_t exception_base_addr;
+	uint32_t argc;
+	uint32_t heap_size;
+	/*
+	 * Argc count for application.
+	 * Warning low bit scrambled in little-endian.
+	 */
+	uint32_t argv[OCTEON_ARGV_MAX_ARGS];
+
+#define  BOOT_FLAG_INIT_CORE		(1 << 0)
+#define  OCTEON_BL_FLAG_DEBUG		(1 << 1)
+#define  OCTEON_BL_FLAG_NO_MAGIC	(1 << 2)
+	/* If set, use uart1 for console */
+#define  OCTEON_BL_FLAG_CONSOLE_UART1	(1 << 3)
+	/* If set, use PCI console */
+#define  OCTEON_BL_FLAG_CONSOLE_PCI	(1 << 4)
+	/* Call exit on break on serial port */
+#define  OCTEON_BL_FLAG_BREAK		(1 << 5)
+
+	uint32_t core_mask;
+	uint32_t flags;
+	/* physical address of free memory descriptor block. */
+	uint32_t phy_mem_desc_addr;
+	/* DRAM size in megabyes. */
+	uint32_t dram_size;
+	/* CPU clock speed, in hz. */
+	uint32_t eclock_hz;
+	/* used to pass flags from app to debugger. */
+	uint32_t debugger_flags_base_addr;
+	/* SPI4 clock in hz. */
+	uint32_t spi_clock_hz;
+	/* DRAM clock speed, in hz. */
+	uint32_t dclock_hz;
+	uint8_t chip_rev_minor;
+	uint8_t chip_rev_major;
+	uint16_t chip_type;
+	uint8_t board_rev_minor;
+	uint8_t board_rev_major;
+	uint16_t board_type;
+
+	uint64_t unused1[4]; /* Not even filled in by bootloader. */
+
+	uint64_t cvmx_desc_vaddr;
+#endif
 };
 
 union octeon_cvmemctl {
 	uint64_t u64;
 	struct {
 		/* RO 1 = BIST fail, 0 = BIST pass */
-		uint64_t tlbbist:1;
+		__BITFIELD_FIELD(uint64_t tlbbist:1,
 		/* RO 1 = BIST fail, 0 = BIST pass */
-		uint64_t l1cbist:1;
+		__BITFIELD_FIELD(uint64_t l1cbist:1,
 		/* RO 1 = BIST fail, 0 = BIST pass */
-		uint64_t l1dbist:1;
+		__BITFIELD_FIELD(uint64_t l1dbist:1,
 		/* RO 1 = BIST fail, 0 = BIST pass */
-		uint64_t dcmbist:1;
+		__BITFIELD_FIELD(uint64_t dcmbist:1,
 		/* RO 1 = BIST fail, 0 = BIST pass */
-		uint64_t ptgbist:1;
+		__BITFIELD_FIELD(uint64_t ptgbist:1,
 		/* RO 1 = BIST fail, 0 = BIST pass */
-		uint64_t wbfbist:1;
+		__BITFIELD_FIELD(uint64_t wbfbist:1,
 		/* Reserved */
-		uint64_t reserved:22;
+		__BITFIELD_FIELD(uint64_t reserved:17,
+		/* OCTEON II - TLB replacement policy: 0 = bitmask LRU; 1 = NLU.
+		 * This field selects between the TLB replacement policies:
+		 * bitmask LRU or NLU. Bitmask LRU maintains a mask of
+		 * recently used TLB entries and avoids them as new entries
+		 * are allocated. NLU simply guarantees that the next
+		 * allocation is not the last used TLB entry. */
+		__BITFIELD_FIELD(uint64_t tlbnlu:1,
+		/* OCTEON II - Selects the bit in the counter used for
+		 * releasing a PAUSE. This counter trips every 2(8+PAUSETIME)
+		 * cycles. If not already released, the cnMIPS II core will
+		 * always release a given PAUSE instruction within
+		 * 2(8+PAUSETIME). If the counter trip happens to line up,
+		 * the cnMIPS II core may release the PAUSE instantly. */
+		__BITFIELD_FIELD(uint64_t pausetime:3,
+		/* OCTEON II - This field is an extension of
+		 * CvmMemCtl[DIDTTO] */
+		__BITFIELD_FIELD(uint64_t didtto2:1,
 		/* R/W If set, marked write-buffer entries time out
 		 * the same as as other entries; if clear, marked
 		 * write-buffer entries use the maximum timeout. */
-		uint64_t dismarkwblongto:1;
+		__BITFIELD_FIELD(uint64_t dismarkwblongto:1,
 		/* R/W If set, a merged store does not clear the
 		 * write-buffer entry timeout state. */
-		uint64_t dismrgclrwbto:1;
+		__BITFIELD_FIELD(uint64_t dismrgclrwbto:1,
 		/* R/W Two bits that are the MSBs of the resultant
 		 * CVMSEG LM word location for an IOBDMA. The other 8
 		 * bits come from the SCRADDR field of the IOBDMA. */
-		uint64_t iobdmascrmsb:2;
+		__BITFIELD_FIELD(uint64_t iobdmascrmsb:2,
 		/* R/W If set, SYNCWS and SYNCS only order marked
 		 * stores; if clear, SYNCWS and SYNCS only order
 		 * unmarked stores. SYNCWSMARKED has no effect when
 		 * DISSYNCWS is set. */
-		uint64_t syncwsmarked:1;
+		__BITFIELD_FIELD(uint64_t syncwsmarked:1,
 		/* R/W If set, SYNCWS acts as SYNCW and SYNCS acts as
 		 * SYNC. */
-		uint64_t dissyncws:1;
+		__BITFIELD_FIELD(uint64_t dissyncws:1,
 		/* R/W If set, no stall happens on write buffer
 		 * full. */
-		uint64_t diswbfst:1;
+		__BITFIELD_FIELD(uint64_t diswbfst:1,
 		/* R/W If set (and SX set), supervisor-level
 		 * loads/stores can use XKPHYS addresses with
 		 * VA<48>==0 */
-		uint64_t xkmemenas:1;
+		__BITFIELD_FIELD(uint64_t xkmemenas:1,
 		/* R/W If set (and UX set), user-level loads/stores
 		 * can use XKPHYS addresses with VA<48>==0 */
-		uint64_t xkmemenau:1;
+		__BITFIELD_FIELD(uint64_t xkmemenau:1,
 		/* R/W If set (and SX set), supervisor-level
 		 * loads/stores can use XKPHYS addresses with
 		 * VA<48>==1 */
-		uint64_t xkioenas:1;
+		__BITFIELD_FIELD(uint64_t xkioenas:1,
 		/* R/W If set (and UX set), user-level loads/stores
 		 * can use XKPHYS addresses with VA<48>==1 */
-		uint64_t xkioenau:1;
+		__BITFIELD_FIELD(uint64_t xkioenau:1,
 		/* R/W If set, all stores act as SYNCW (NOMERGE must
 		 * be set when this is set) RW, reset to 0. */
-		uint64_t allsyncw:1;
+		__BITFIELD_FIELD(uint64_t allsyncw:1,
 		/* R/W If set, no stores merge, and all stores reach
 		 * the coherent bus in order. */
-		uint64_t nomerge:1;
+		__BITFIELD_FIELD(uint64_t nomerge:1,
 		/* R/W Selects the bit in the counter used for DID
 		 * time-outs 0 = 231, 1 = 230, 2 = 229, 3 =
 		 * 214. Actual time-out is between 1x and 2x this
 		 * interval. For example, with DIDTTO=3, expiration
 		 * interval is between 16K and 32K. */
-		uint64_t didtto:2;
+		__BITFIELD_FIELD(uint64_t didtto:2,
 		/* R/W If set, the (mem) CSR clock never turns off. */
-		uint64_t csrckalwys:1;
+		__BITFIELD_FIELD(uint64_t csrckalwys:1,
 		/* R/W If set, mclk never turns off. */
-		uint64_t mclkalwys:1;
+		__BITFIELD_FIELD(uint64_t mclkalwys:1,
 		/* R/W Selects the bit in the counter used for write
 		 * buffer flush time-outs (WBFLT+11) is the bit
 		 * position in an internal counter used to determine
@@ -182,25 +256,26 @@
 		 * 2x this interval. For example, with WBFLT = 0, a
 		 * write buffer expires between 2K and 4K cycles after
 		 * the write buffer entry is allocated. */
-		uint64_t wbfltime:3;
+		__BITFIELD_FIELD(uint64_t wbfltime:3,
 		/* R/W If set, do not put Istream in the L2 cache. */
-		uint64_t istrnol2:1;
+		__BITFIELD_FIELD(uint64_t istrnol2:1,
 		/* R/W The write buffer threshold. */
-		uint64_t wbthresh:4;
+		__BITFIELD_FIELD(uint64_t wbthresh:4,
 		/* Reserved */
-		uint64_t reserved2:2;
+		__BITFIELD_FIELD(uint64_t reserved2:2,
 		/* R/W If set, CVMSEG is available for loads/stores in
 		 * kernel/debug mode. */
-		uint64_t cvmsegenak:1;
+		__BITFIELD_FIELD(uint64_t cvmsegenak:1,
 		/* R/W If set, CVMSEG is available for loads/stores in
 		 * supervisor mode. */
-		uint64_t cvmsegenas:1;
+		__BITFIELD_FIELD(uint64_t cvmsegenas:1,
 		/* R/W If set, CVMSEG is available for loads/stores in
 		 * user mode. */
-		uint64_t cvmsegenau:1;
+		__BITFIELD_FIELD(uint64_t cvmsegenau:1,
 		/* R/W Size of local memory in cache blocks, 54 (6912
 		 * bytes) is max legal value. */
-		uint64_t lmemsz:6;
+		__BITFIELD_FIELD(uint64_t lmemsz:6,
+		;)))))))))))))))))))))))))))))))))
 	} s;
 };
 
@@ -224,6 +299,19 @@
 	cvmx_read64_uint32(address ^ 4);
 }
 
+/* Octeon multiplier save/restore routines from octeon_switch.S */
+void octeon_mult_save(void);
+void octeon_mult_restore(void);
+void octeon_mult_save_end(void);
+void octeon_mult_restore_end(void);
+void octeon_mult_save3(void);
+void octeon_mult_save3_end(void);
+void octeon_mult_save2(void);
+void octeon_mult_save2_end(void);
+void octeon_mult_restore3(void);
+void octeon_mult_restore3_end(void);
+void octeon_mult_restore2(void);
+void octeon_mult_restore2_end(void);
 
 /**
  * Read a 32bit value from the Octeon NPI register space
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 6952962..193b4c6 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -121,6 +121,7 @@
 }
 #endif
 
+#ifdef CONFIG_PCI_DOMAINS
 #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
 
 static inline int pci_proc_domain(struct pci_bus *bus)
@@ -128,6 +129,7 @@
 	struct pci_controller *hose = bus->sysdata;
 	return hose->need_domain_info;
 }
+#endif /* CONFIG_PCI_DOMAINS */
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index fc807aa..91747c2 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -35,7 +35,7 @@
 #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 
 /*
- * The following bits are directly used by the TLB hardware
+ * The following bits are implemented by the TLB hardware
  */
 #define _PAGE_GLOBAL_SHIFT	0
 #define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
@@ -60,43 +60,40 @@
 #define _PAGE_MODIFIED_SHIFT	(_PAGE_ACCESSED_SHIFT + 1)
 #define _PAGE_MODIFIED		(1 << _PAGE_MODIFIED_SHIFT)
 
-#define _PAGE_SILENT_READ	_PAGE_VALID
-#define _PAGE_SILENT_WRITE	_PAGE_DIRTY
-
 #define _PFN_SHIFT		(PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
 
 #elif defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 
 /*
- * The following are implemented by software
+ * The following bits are implemented in software
  */
-#define _PAGE_PRESENT_SHIFT	0
-#define _PAGE_PRESENT		(1 <<  _PAGE_PRESENT_SHIFT)
-#define _PAGE_READ_SHIFT	1
-#define _PAGE_READ		(1 <<  _PAGE_READ_SHIFT)
-#define _PAGE_WRITE_SHIFT	2
-#define _PAGE_WRITE		(1 <<  _PAGE_WRITE_SHIFT)
-#define _PAGE_ACCESSED_SHIFT	3
-#define _PAGE_ACCESSED		(1 <<  _PAGE_ACCESSED_SHIFT)
-#define _PAGE_MODIFIED_SHIFT	4
-#define _PAGE_MODIFIED		(1 <<  _PAGE_MODIFIED_SHIFT)
+#define _PAGE_PRESENT_SHIFT	(0)
+#define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
+#define _PAGE_READ_SHIFT	(_PAGE_PRESENT_SHIFT + 1)
+#define _PAGE_READ		(1 << _PAGE_READ_SHIFT)
+#define _PAGE_WRITE_SHIFT	(_PAGE_READ_SHIFT + 1)
+#define _PAGE_WRITE		(1 << _PAGE_WRITE_SHIFT)
+#define _PAGE_ACCESSED_SHIFT	(_PAGE_WRITE_SHIFT + 1)
+#define _PAGE_ACCESSED		(1 << _PAGE_ACCESSED_SHIFT)
+#define _PAGE_MODIFIED_SHIFT	(_PAGE_ACCESSED_SHIFT + 1)
+#define _PAGE_MODIFIED		(1 << _PAGE_MODIFIED_SHIFT)
 
 /*
- * And these are the hardware TLB bits
+ * The following bits are implemented by the TLB hardware
  */
-#define _PAGE_GLOBAL_SHIFT	8
-#define _PAGE_GLOBAL		(1 <<  _PAGE_GLOBAL_SHIFT)
-#define _PAGE_VALID_SHIFT	9
-#define _PAGE_VALID		(1 <<  _PAGE_VALID_SHIFT)
-#define _PAGE_SILENT_READ	(1 <<  _PAGE_VALID_SHIFT)	/* synonym  */
-#define _PAGE_DIRTY_SHIFT	10
+#define _PAGE_GLOBAL_SHIFT	(_PAGE_MODIFIED_SHIFT + 4)
+#define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
+#define _PAGE_VALID_SHIFT	(_PAGE_GLOBAL_SHIFT + 1)
+#define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
+#define _PAGE_DIRTY_SHIFT	(_PAGE_VALID_SHIFT + 1)
 #define _PAGE_DIRTY		(1 << _PAGE_DIRTY_SHIFT)
-#define _PAGE_SILENT_WRITE	(1 << _PAGE_DIRTY_SHIFT)
-#define _CACHE_UNCACHED_SHIFT	11
+#define _CACHE_UNCACHED_SHIFT	(_PAGE_DIRTY_SHIFT + 1)
 #define _CACHE_UNCACHED		(1 << _CACHE_UNCACHED_SHIFT)
-#define _CACHE_MASK		(1 << _CACHE_UNCACHED_SHIFT)
+#define _CACHE_MASK		_CACHE_UNCACHED
 
-#else /* 'Normal' r4K case */
+#define _PFN_SHIFT		PAGE_SHIFT
+
+#else
 /*
  * When using the RI/XI bit support, we have 13 bits of flags below
  * the physical address. The RI/XI bits are placed such that a SRL 5
@@ -107,10 +104,8 @@
 
 /*
  * The following bits are implemented in software
- *
- * _PAGE_READ / _PAGE_READ_SHIFT should be unused if cpu_has_rixi.
  */
-#define _PAGE_PRESENT_SHIFT	(0)
+#define _PAGE_PRESENT_SHIFT	0
 #define _PAGE_PRESENT		(1 << _PAGE_PRESENT_SHIFT)
 #define _PAGE_READ_SHIFT	(cpu_has_rixi ? _PAGE_PRESENT_SHIFT : _PAGE_PRESENT_SHIFT + 1)
 #define _PAGE_READ ({BUG_ON(cpu_has_rixi); 1 << _PAGE_READ_SHIFT; })
@@ -125,16 +120,11 @@
 /* huge tlb page */
 #define _PAGE_HUGE_SHIFT	(_PAGE_MODIFIED_SHIFT + 1)
 #define _PAGE_HUGE		(1 << _PAGE_HUGE_SHIFT)
-#else
-#define _PAGE_HUGE_SHIFT	(_PAGE_MODIFIED_SHIFT)
-#define _PAGE_HUGE		({BUG(); 1; })	/* Dummy value */
-#endif
-
-#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
-/* huge tlb page */
 #define _PAGE_SPLITTING_SHIFT	(_PAGE_HUGE_SHIFT + 1)
 #define _PAGE_SPLITTING		(1 << _PAGE_SPLITTING_SHIFT)
 #else
+#define _PAGE_HUGE_SHIFT	(_PAGE_MODIFIED_SHIFT)
+#define _PAGE_HUGE		({BUG(); 1; })	/* Dummy value */
 #define _PAGE_SPLITTING_SHIFT	(_PAGE_HUGE_SHIFT)
 #define _PAGE_SPLITTING		({BUG(); 1; })	/* Dummy value */
 #endif
@@ -149,17 +139,10 @@
 
 #define _PAGE_GLOBAL_SHIFT	(_PAGE_NO_READ_SHIFT + 1)
 #define _PAGE_GLOBAL		(1 << _PAGE_GLOBAL_SHIFT)
-
 #define _PAGE_VALID_SHIFT	(_PAGE_GLOBAL_SHIFT + 1)
 #define _PAGE_VALID		(1 << _PAGE_VALID_SHIFT)
-/* synonym		   */
-#define _PAGE_SILENT_READ	(_PAGE_VALID)
-
-/* The MIPS dirty bit	   */
 #define _PAGE_DIRTY_SHIFT	(_PAGE_VALID_SHIFT + 1)
 #define _PAGE_DIRTY		(1 << _PAGE_DIRTY_SHIFT)
-#define _PAGE_SILENT_WRITE	(_PAGE_DIRTY)
-
 #define _CACHE_SHIFT		(_PAGE_DIRTY_SHIFT + 1)
 #define _CACHE_MASK		(7 << _CACHE_SHIFT)
 
@@ -167,9 +150,9 @@
 
 #endif /* defined(CONFIG_PHYS_ADDR_T_64BIT && defined(CONFIG_CPU_MIPS32) */
 
-#ifndef _PFN_SHIFT
-#define _PFN_SHIFT		    PAGE_SHIFT
-#endif
+#define _PAGE_SILENT_READ	_PAGE_VALID
+#define _PAGE_SILENT_WRITE	_PAGE_DIRTY
+
 #define _PFN_MASK		(~((1 << (_PFN_SHIFT)) - 1))
 
 #ifndef _PAGE_NO_READ
@@ -179,9 +162,6 @@
 #ifndef _PAGE_NO_EXEC
 #define _PAGE_NO_EXEC ({BUG(); 0; })
 #endif
-#ifndef _PAGE_GLOBAL_SHIFT
-#define _PAGE_GLOBAL_SHIFT ilog2(_PAGE_GLOBAL)
-#endif
 
 
 #ifndef __ASSEMBLY__
@@ -266,8 +246,9 @@
 #endif
 
 #define __READABLE	(_PAGE_SILENT_READ | _PAGE_ACCESSED | (cpu_has_rixi ? 0 : _PAGE_READ))
-#define __WRITEABLE	(_PAGE_WRITE | _PAGE_SILENT_WRITE | _PAGE_MODIFIED)
+#define __WRITEABLE	(_PAGE_SILENT_WRITE | _PAGE_WRITE | _PAGE_MODIFIED)
 
-#define _PAGE_CHG_MASK	(_PFN_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED | _CACHE_MASK)
+#define _PAGE_CHG_MASK	(_PAGE_ACCESSED | _PAGE_MODIFIED |	\
+			 _PFN_MASK | _CACHE_MASK)
 
 #endif /* _ASM_PGTABLE_BITS_H */
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 583ff42..bef782c 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -99,29 +99,35 @@
 
 #define htw_stop()							\
 do {									\
-	if (cpu_has_htw)						\
-		write_c0_pwctl(read_c0_pwctl() &			\
-			       ~(1 << MIPS_PWCTL_PWEN_SHIFT));		\
+	unsigned long flags;						\
+									\
+	if (cpu_has_htw) {						\
+		local_irq_save(flags);					\
+		if(!raw_current_cpu_data.htw_seq++) {			\
+			write_c0_pwctl(read_c0_pwctl() &		\
+				       ~(1 << MIPS_PWCTL_PWEN_SHIFT));	\
+			back_to_back_c0_hazard();			\
+		}							\
+		local_irq_restore(flags);				\
+	}								\
 } while(0)
 
 #define htw_start()							\
 do {									\
-	if (cpu_has_htw)						\
-		write_c0_pwctl(read_c0_pwctl() |			\
-			       (1 << MIPS_PWCTL_PWEN_SHIFT));		\
-} while(0)
-
-
-#define htw_reset()							\
-do {									\
+	unsigned long flags;						\
+									\
 	if (cpu_has_htw) {						\
-		htw_stop();						\
-		back_to_back_c0_hazard();				\
-		htw_start();						\
-		back_to_back_c0_hazard();				\
+		local_irq_save(flags);					\
+		if (!--raw_current_cpu_data.htw_seq) {			\
+			write_c0_pwctl(read_c0_pwctl() |		\
+				       (1 << MIPS_PWCTL_PWEN_SHIFT));	\
+			back_to_back_c0_hazard();			\
+		}							\
+		local_irq_restore(flags);				\
 	}								\
 } while(0)
 
+
 extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 	pte_t pteval);
 
@@ -153,12 +159,13 @@
 {
 	pte_t null = __pte(0);
 
+	htw_stop();
 	/* Preserve global status for the pair */
 	if (ptep_buddy(ptep)->pte_low & _PAGE_GLOBAL)
 		null.pte_low = null.pte_high = _PAGE_GLOBAL;
 
 	set_pte_at(mm, addr, ptep, null);
-	htw_reset();
+	htw_start();
 }
 #else
 
@@ -188,6 +195,7 @@
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
+	htw_stop();
 #if !defined(CONFIG_CPU_R3000) && !defined(CONFIG_CPU_TX39XX)
 	/* Preserve global status for the pair */
 	if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL)
@@ -195,7 +203,7 @@
 	else
 #endif
 		set_pte_at(mm, addr, ptep, __pte(0));
-	htw_reset();
+	htw_start();
 }
 #endif
 
@@ -334,7 +342,7 @@
 	return pte;
 }
 
-#ifdef _PAGE_HUGE
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 static inline int pte_huge(pte_t pte)	{ return pte_val(pte) & _PAGE_HUGE; }
 
 static inline pte_t pte_mkhuge(pte_t pte)
@@ -342,7 +350,7 @@
 	pte_val(pte) |= _PAGE_HUGE;
 	return pte;
 }
-#endif /* _PAGE_HUGE */
+#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */
 #endif
 static inline int pte_special(pte_t pte)	{ return 0; }
 static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index f1df4cb..b5dcbee 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -54,9 +54,7 @@
 #define TASK_SIZE	0x7fff8000UL
 #endif
 
-#ifdef __KERNEL__
 #define STACK_TOP_MAX	TASK_SIZE
-#endif
 
 #define TASK_IS_32BIT_ADDR 1
 
@@ -73,11 +71,7 @@
 #define TASK_SIZE32	0x7fff8000UL
 #define TASK_SIZE64	0x10000000000UL
 #define TASK_SIZE (test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE64)
-
-#ifdef __KERNEL__
 #define STACK_TOP_MAX	TASK_SIZE64
-#endif
-
 
 #define TASK_SIZE_OF(tsk)						\
 	(test_tsk_thread_flag(tsk, TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE64)
@@ -211,6 +205,8 @@
 	unsigned long	cop2_gfm_poly;
 	/* DMFC2 rt, 0x025A; DMFC2 rt, 0x025B - Pass2 */
 	unsigned long	cop2_gfm_result[2];
+	/* DMFC2 rt, 0x24F, DMFC2 rt, 0x50, OCTEON III */
+	unsigned long	cop2_sha3[2];
 };
 #define COP2_INIT						\
 	.cp2			= {0,},
@@ -399,4 +395,15 @@
 
 #endif
 
+/*
+ * Functions & macros implementing the PR_GET_FP_MODE & PR_SET_FP_MODE options
+ * to the prctl syscall.
+ */
+extern int mips_get_process_fp_mode(struct task_struct *task);
+extern int mips_set_process_fp_mode(struct task_struct *task,
+				    unsigned int value);
+
+#define GET_FP_MODE(task)		mips_get_process_fp_mode(task)
+#define SET_FP_MODE(task,value)		mips_set_process_fp_mode(task, value)
+
 #endif /* _ASM_PROCESSOR_H */
diff --git a/arch/mips/include/asm/prom.h b/arch/mips/include/asm/prom.h
index eaa2627..8ebc2aa 100644
--- a/arch/mips/include/asm/prom.h
+++ b/arch/mips/include/asm/prom.h
@@ -24,13 +24,6 @@
 extern void __dt_setup_arch(void *bph);
 extern int __dt_register_buses(const char *bus0, const char *bus1);
 
-#define dt_setup_arch(sym)						\
-({									\
-	extern char __dtb_##sym##_begin[];				\
-									\
-	__dt_setup_arch(__dtb_##sym##_begin);				\
-})
-
 #else /* CONFIG_OF */
 static inline void device_tree_init(void) { }
 #endif /* CONFIG_OF */
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index fc783f8..ffc3203 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -40,8 +40,8 @@
 	unsigned long cp0_cause;
 	unsigned long cp0_epc;
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
-	unsigned long long mpl[3];	  /* MTM{0,1,2} */
-	unsigned long long mtp[3];	  /* MTP{0,1,2} */
+	unsigned long long mpl[6];        /* MTM{0-5} */
+	unsigned long long mtp[6];        /* MTP{0-5} */
 #endif
 } __aligned(8);
 
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index e293a8d..1b22d2d 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -14,6 +14,7 @@
 
 #include <asm/asm.h>
 #include <asm/cacheops.h>
+#include <asm/compiler.h>
 #include <asm/cpu-features.h>
 #include <asm/cpu-type.h>
 #include <asm/mipsmtregs.h>
@@ -39,7 +40,7 @@
 	__asm__ __volatile__(						\
 	"	.set	push					\n"	\
 	"	.set	noreorder				\n"	\
-	"	.set	arch=r4000				\n"	\
+	"	.set "MIPS_ISA_ARCH_LEVEL"			\n"	\
 	"	cache	%0, %1					\n"	\
 	"	.set	pop					\n"	\
 	:								\
@@ -147,7 +148,7 @@
 	__asm__ __volatile__(					\
 	"	.set	push			\n"		\
 	"	.set	noreorder		\n"		\
-	"	.set	arch=r4000		\n"		\
+	"	.set "MIPS_ISA_ARCH_LEVEL"	\n"		\
 	"1:	cache	%0, (%1)		\n"		\
 	"2:	.set	pop			\n"		\
 	"	.section __ex_table,\"a\"	\n"		\
@@ -218,6 +219,7 @@
 	cache_op(Page_Invalidate_T, addr);
 }
 
+#ifndef CONFIG_CPU_MIPSR6
 #define cache16_unroll32(base,op)					\
 	__asm__ __volatile__(						\
 	"	.set push					\n"	\
@@ -322,6 +324,150 @@
 		: "r" (base),						\
 		  "i" (op));
 
+#else
+/*
+ * MIPS R6 changed the cache opcode and moved to a 8-bit offset field.
+ * This means we now need to increment the base register before we flush
+ * more cache lines
+ */
+#define cache16_unroll32(base,op)				\
+	__asm__ __volatile__(					\
+	"	.set push\n"					\
+	"	.set noreorder\n"				\
+	"	.set mips64r6\n"				\
+	"	.set noat\n"					\
+	"	cache %1, 0x000(%0); cache %1, 0x010(%0)\n"	\
+	"	cache %1, 0x020(%0); cache %1, 0x030(%0)\n"	\
+	"	cache %1, 0x040(%0); cache %1, 0x050(%0)\n"	\
+	"	cache %1, 0x060(%0); cache %1, 0x070(%0)\n"	\
+	"	cache %1, 0x080(%0); cache %1, 0x090(%0)\n"	\
+	"	cache %1, 0x0a0(%0); cache %1, 0x0b0(%0)\n"	\
+	"	cache %1, 0x0c0(%0); cache %1, 0x0d0(%0)\n"	\
+	"	cache %1, 0x0e0(%0); cache %1, 0x0f0(%0)\n"	\
+	"	addiu $1, $0, 0x100			\n"	\
+	"	cache %1, 0x000($1); cache %1, 0x010($1)\n"	\
+	"	cache %1, 0x020($1); cache %1, 0x030($1)\n"	\
+	"	cache %1, 0x040($1); cache %1, 0x050($1)\n"	\
+	"	cache %1, 0x060($1); cache %1, 0x070($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x090($1)\n"	\
+	"	cache %1, 0x0a0($1); cache %1, 0x0b0($1)\n"	\
+	"	cache %1, 0x0c0($1); cache %1, 0x0d0($1)\n"	\
+	"	cache %1, 0x0e0($1); cache %1, 0x0f0($1)\n"	\
+	"	.set pop\n"					\
+		:						\
+		: "r" (base),					\
+		  "i" (op));
+
+#define cache32_unroll32(base,op)				\
+	__asm__ __volatile__(					\
+	"	.set push\n"					\
+	"	.set noreorder\n"				\
+	"	.set mips64r6\n"				\
+	"	.set noat\n"					\
+	"	cache %1, 0x000(%0); cache %1, 0x020(%0)\n"	\
+	"	cache %1, 0x040(%0); cache %1, 0x060(%0)\n"	\
+	"	cache %1, 0x080(%0); cache %1, 0x0a0(%0)\n"	\
+	"	cache %1, 0x0c0(%0); cache %1, 0x0e0(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
+	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
+	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
+	"	addiu $1, $1, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
+	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
+	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
+	"	addiu $1, $1, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x020($1)\n"	\
+	"	cache %1, 0x040($1); cache %1, 0x060($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0a0($1)\n"	\
+	"	cache %1, 0x0c0($1); cache %1, 0x0e0($1)\n"	\
+	"	.set pop\n"					\
+		:						\
+		: "r" (base),					\
+		  "i" (op));
+
+#define cache64_unroll32(base,op)				\
+	__asm__ __volatile__(					\
+	"	.set push\n"					\
+	"	.set noreorder\n"				\
+	"	.set mips64r6\n"				\
+	"	.set noat\n"					\
+	"	cache %1, 0x000(%0); cache %1, 0x040(%0)\n"	\
+	"	cache %1, 0x080(%0); cache %1, 0x0c0(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000($1); cache %1, 0x040($1)\n"	\
+	"	cache %1, 0x080($1); cache %1, 0x0c0($1)\n"	\
+	"	.set pop\n"					\
+		:						\
+		: "r" (base),					\
+		  "i" (op));
+
+#define cache128_unroll32(base,op)				\
+	__asm__ __volatile__(					\
+	"	.set push\n"					\
+	"	.set noreorder\n"				\
+	"	.set mips64r6\n"				\
+	"	.set noat\n"					\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	cache %1, 0x000(%0); cache %1, 0x080(%0)\n"	\
+	"	addiu $1, %0, 0x100\n"				\
+	"	.set pop\n"					\
+		:						\
+		: "r" (base),					\
+		  "i" (op));
+#endif /* CONFIG_CPU_MIPSR6 */
+
 /*
  * Perform the cache operation specified by op using a user mode virtual
  * address while in kernel mode.
diff --git a/arch/mips/include/asm/sgialib.h b/arch/mips/include/asm/sgialib.h
index 753275a..195db50 100644
--- a/arch/mips/include/asm/sgialib.h
+++ b/arch/mips/include/asm/sgialib.h
@@ -11,6 +11,7 @@
 #ifndef _ASM_SGIALIB_H
 #define _ASM_SGIALIB_H
 
+#include <linux/compiler.h>
 #include <asm/sgiarcs.h>
 
 extern struct linux_romvec *romvec;
@@ -70,8 +71,11 @@
 extern LONG ArcWrite(ULONG fd, PVOID buf, ULONG num, PULONG cnt);
 
 /* Misc. routines. */
-extern VOID ArcReboot(VOID) __attribute__((noreturn));
-extern VOID ArcEnterInteractiveMode(VOID) __attribute__((noreturn));
+extern VOID ArcHalt(VOID) __noreturn;
+extern VOID ArcPowerDown(VOID) __noreturn;
+extern VOID ArcRestart(VOID) __noreturn;
+extern VOID ArcReboot(VOID) __noreturn;
+extern VOID ArcEnterInteractiveMode(VOID) __noreturn;
 extern VOID ArcFlushAllCaches(VOID);
 extern DISPLAY_STATUS *ArcGetDisplayStatus(ULONG FileID);
 
diff --git a/arch/mips/include/asm/siginfo.h b/arch/mips/include/asm/siginfo.h
deleted file mode 100644
index dd9a762..0000000
--- a/arch/mips/include/asm/siginfo.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998, 1999, 2001, 2003 Ralf Baechle
- * Copyright (C) 2000, 2001 Silicon Graphics, Inc.
- */
-#ifndef _ASM_SIGINFO_H
-#define _ASM_SIGINFO_H
-
-#include <uapi/asm/siginfo.h>
-
-
-/*
- * Duplicated here because of <asm-generic/siginfo.h> braindamage ...
- */
-#include <linux/string.h>
-
-static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
-{
-	if (from->si_code < 0)
-		memcpy(to, from, sizeof(*to));
-	else
-		/* _sigchld is currently the largest know union member */
-		memcpy(to, from, 3*sizeof(int) + sizeof(from->_sifields._sigchld));
-}
-
-#endif /* _ASM_SIGINFO_H */
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index c6d06d3..b454869 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -89,7 +89,7 @@
 		"	 subu	%[ticket], %[ticket], 1			\n"
 		"	.previous					\n"
 		"	.set pop					\n"
-		: [ticket_ptr] "+" GCC_OFF12_ASM() (lock->lock),
+		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
 		  [serving_now_ptr] "+m" (lock->h.serving_now),
 		  [ticket] "=&r" (tmp),
 		  [my_ticket] "=&r" (my_ticket)
@@ -122,7 +122,7 @@
 		"	 subu	%[ticket], %[ticket], 1			\n"
 		"	.previous					\n"
 		"	.set pop					\n"
-		: [ticket_ptr] "+" GCC_OFF12_ASM() (lock->lock),
+		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
 		  [serving_now_ptr] "+m" (lock->h.serving_now),
 		  [ticket] "=&r" (tmp),
 		  [my_ticket] "=&r" (my_ticket)
@@ -164,7 +164,7 @@
 		"	 li	%[ticket], 0				\n"
 		"	.previous					\n"
 		"	.set pop					\n"
-		: [ticket_ptr] "+" GCC_OFF12_ASM() (lock->lock),
+		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
 		  [ticket] "=&r" (tmp),
 		  [my_ticket] "=&r" (tmp2),
 		  [now_serving] "=&r" (tmp3)
@@ -188,7 +188,7 @@
 		"	 li	%[ticket], 0				\n"
 		"	.previous					\n"
 		"	.set pop					\n"
-		: [ticket_ptr] "+" GCC_OFF12_ASM() (lock->lock),
+		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
 		  [ticket] "=&r" (tmp),
 		  [my_ticket] "=&r" (tmp2),
 		  [now_serving] "=&r" (tmp3)
@@ -235,8 +235,8 @@
 		"	beqzl	%1, 1b					\n"
 		"	 nop						\n"
 		"	.set	reorder					\n"
-		: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp)
-		: GCC_OFF12_ASM() (rw->lock)
+		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
 	} else {
 		do {
@@ -245,8 +245,8 @@
 			"	bltz	%1, 1b				\n"
 			"	 addu	%1, 1				\n"
 			"2:	sc	%1, %0				\n"
-			: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp)
-			: GCC_OFF12_ASM() (rw->lock)
+			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+			: GCC_OFF_SMALL_ASM() (rw->lock)
 			: "memory");
 		} while (unlikely(!tmp));
 	}
@@ -254,9 +254,6 @@
 	smp_llsc_mb();
 }
 
-/* Note the use of sub, not subu which will make the kernel die with an
-   overflow exception if we ever try to unlock an rwlock that is already
-   unlocked or is being held by a writer.  */
 static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
@@ -266,20 +263,20 @@
 	if (R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"1:	ll	%1, %2		# arch_read_unlock	\n"
-		"	sub	%1, 1					\n"
+		"	addiu	%1, 1					\n"
 		"	sc	%1, %0					\n"
 		"	beqzl	%1, 1b					\n"
-		: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp)
-		: GCC_OFF12_ASM() (rw->lock)
+		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
 	} else {
 		do {
 			__asm__ __volatile__(
 			"1:	ll	%1, %2	# arch_read_unlock	\n"
-			"	sub	%1, 1				\n"
+			"	addiu	%1, -1				\n"
 			"	sc	%1, %0				\n"
-			: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp)
-			: GCC_OFF12_ASM() (rw->lock)
+			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+			: GCC_OFF_SMALL_ASM() (rw->lock)
 			: "memory");
 		} while (unlikely(!tmp));
 	}
@@ -299,8 +296,8 @@
 		"	beqzl	%1, 1b					\n"
 		"	 nop						\n"
 		"	.set	reorder					\n"
-		: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp)
-		: GCC_OFF12_ASM() (rw->lock)
+		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
 	} else {
 		do {
@@ -309,8 +306,8 @@
 			"	bnez	%1, 1b				\n"
 			"	 lui	%1, 0x8000			\n"
 			"2:	sc	%1, %0				\n"
-			: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp)
-			: GCC_OFF12_ASM() (rw->lock)
+			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
+			: GCC_OFF_SMALL_ASM() (rw->lock)
 			: "memory");
 		} while (unlikely(!tmp));
 	}
@@ -349,8 +346,8 @@
 		__WEAK_LLSC_MB
 		"	li	%2, 1					\n"
 		"2:							\n"
-		: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
-		: GCC_OFF12_ASM() (rw->lock)
+		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
+		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
 	} else {
 		__asm__ __volatile__(
@@ -366,8 +363,8 @@
 		__WEAK_LLSC_MB
 		"	li	%2, 1					\n"
 		"2:							\n"
-		: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
-		: GCC_OFF12_ASM() (rw->lock)
+		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
+		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
 	}
 
@@ -393,8 +390,8 @@
 		"	li	%2, 1					\n"
 		"	.set	reorder					\n"
 		"2:							\n"
-		: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
-		: GCC_OFF12_ASM() (rw->lock)
+		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
+		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
 	} else {
 		do {
@@ -406,9 +403,9 @@
 			"	sc	%1, %0				\n"
 			"	li	%2, 1				\n"
 			"2:						\n"
-			: "=" GCC_OFF12_ASM() (rw->lock), "=&r" (tmp),
+			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp),
 			  "=&r" (ret)
-			: GCC_OFF12_ASM() (rw->lock)
+			: GCC_OFF_SMALL_ASM() (rw->lock)
 			: "memory");
 		} while (unlikely(!tmp));
 
diff --git a/arch/mips/include/asm/spram.h b/arch/mips/include/asm/spram.h
index 0b89006..0f90d88 100644
--- a/arch/mips/include/asm/spram.h
+++ b/arch/mips/include/asm/spram.h
@@ -1,10 +1,10 @@
 #ifndef _MIPS_SPRAM_H
 #define _MIPS_SPRAM_H
 
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_MIPS_SPRAM)
 extern __init void spram_config(void);
 #else
 static inline void spram_config(void) { };
-#endif /* CONFIG_CPU_MIPSR2 */
+#endif /* CONFIG_MIPS_SPRAM */
 
 #endif /* _MIPS_SPRAM_H */
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index b188c797..28d6d93 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -40,7 +40,7 @@
 		LONG_S	v1, PT_HI(sp)
 		mflhxu	v1
 		LONG_S	v1, PT_ACX(sp)
-#else
+#elif !defined(CONFIG_CPU_MIPSR6)
 		mfhi	v1
 #endif
 #ifdef CONFIG_32BIT
@@ -50,7 +50,7 @@
 		LONG_S	$10, PT_R10(sp)
 		LONG_S	$11, PT_R11(sp)
 		LONG_S	$12, PT_R12(sp)
-#ifndef CONFIG_CPU_HAS_SMARTMIPS
+#if !defined(CONFIG_CPU_HAS_SMARTMIPS) && !defined(CONFIG_CPU_MIPSR6)
 		LONG_S	v1, PT_HI(sp)
 		mflo	v1
 #endif
@@ -58,7 +58,7 @@
 		LONG_S	$14, PT_R14(sp)
 		LONG_S	$15, PT_R15(sp)
 		LONG_S	$24, PT_R24(sp)
-#ifndef CONFIG_CPU_HAS_SMARTMIPS
+#if !defined(CONFIG_CPU_HAS_SMARTMIPS) && !defined(CONFIG_CPU_MIPSR6)
 		LONG_S	v1, PT_LO(sp)
 #endif
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
@@ -226,7 +226,7 @@
 		mtlhx	$24
 		LONG_L	$24, PT_LO(sp)
 		mtlhx	$24
-#else
+#elif !defined(CONFIG_CPU_MIPSR6)
 		LONG_L	$24, PT_LO(sp)
 		mtlo	$24
 		LONG_L	$24, PT_HI(sp)
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index b928b6f..e92d6c4b 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -75,9 +75,12 @@
 #endif
 
 #define __clear_software_ll_bit()					\
-do {									\
-	if (!__builtin_constant_p(cpu_has_llsc) || !cpu_has_llsc)	\
-		ll_bit = 0;						\
+do {	if (cpu_has_rw_llb) {						\
+		write_c0_lladdr(0);					\
+	} else {							\
+		if (!__builtin_constant_p(cpu_has_llsc) || !cpu_has_llsc)\
+			ll_bit = 0;					\
+	}								\
 } while (0)
 
 #define switch_to(prev, next, last)					\
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 9e1295f..55ed660 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -28,7 +28,7 @@
 	unsigned long		tp_value;	/* thread pointer */
 	__u32			cpu;		/* current CPU */
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
-
+	int			r2_emul_return; /* 1 => Returning from R2 emulator */
 	mm_segment_t		addr_limit;	/*
 						 * thread address space limit:
 						 * 0x7fffffff for user-thead
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 89c2243..fc0cf5a 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -21,20 +21,20 @@
 enum major_op {
 	spec_op, bcond_op, j_op, jal_op,
 	beq_op, bne_op, blez_op, bgtz_op,
-	addi_op, addiu_op, slti_op, sltiu_op,
+	addi_op, cbcond0_op = addi_op, addiu_op, slti_op, sltiu_op,
 	andi_op, ori_op, xori_op, lui_op,
 	cop0_op, cop1_op, cop2_op, cop1x_op,
 	beql_op, bnel_op, blezl_op, bgtzl_op,
-	daddi_op, daddiu_op, ldl_op, ldr_op,
+	daddi_op, cbcond1_op = daddi_op, daddiu_op, ldl_op, ldr_op,
 	spec2_op, jalx_op, mdmx_op, spec3_op,
 	lb_op, lh_op, lwl_op, lw_op,
 	lbu_op, lhu_op, lwr_op, lwu_op,
 	sb_op, sh_op, swl_op, sw_op,
 	sdl_op, sdr_op, swr_op, cache_op,
-	ll_op, lwc1_op, lwc2_op, pref_op,
-	lld_op, ldc1_op, ldc2_op, ld_op,
-	sc_op, swc1_op, swc2_op, major_3b_op,
-	scd_op, sdc1_op, sdc2_op, sd_op
+	ll_op, lwc1_op, lwc2_op, bc6_op = lwc2_op, pref_op,
+	lld_op, ldc1_op, ldc2_op, beqzcjic_op = ldc2_op, ld_op,
+	sc_op, swc1_op, swc2_op, balc6_op = swc2_op, major_3b_op,
+	scd_op, sdc1_op, sdc2_op, bnezcjialc_op = sdc2_op, sd_op
 };
 
 /*
@@ -83,9 +83,12 @@
 	swe_op    = 0x1f, bshfl_op  = 0x20,
 	swle_op   = 0x21, swre_op   = 0x22,
 	prefe_op  = 0x23, dbshfl_op = 0x24,
-	lbue_op   = 0x28, lhue_op   = 0x29,
-	lbe_op    = 0x2c, lhe_op    = 0x2d,
-	lle_op    = 0x2e, lwe_op    = 0x2f,
+	cache6_op = 0x25, sc6_op    = 0x26,
+	scd6_op   = 0x27, lbue_op   = 0x28,
+	lhue_op   = 0x29, lbe_op    = 0x2c,
+	lhe_op    = 0x2d, lle_op    = 0x2e,
+	lwe_op    = 0x2f, pref6_op  = 0x35,
+	ll6_op    = 0x36, lld6_op   = 0x37,
 	rdhwr_op  = 0x3b
 };
 
@@ -112,7 +115,8 @@
 	mfhc_op       = 0x03, mtc_op	    = 0x04,
 	dmtc_op	      = 0x05, ctc_op	    = 0x06,
 	mthc0_op      = 0x06, mthc_op	    = 0x07,
-	bc_op	      = 0x08, cop_op	    = 0x10,
+	bc_op	      = 0x08, bc1eqz_op     = 0x09,
+	bc1nez_op     = 0x0d, cop_op	    = 0x10,
 	copm_op	      = 0x18
 };
 
diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h
index d08f83f..2cb7fde 100644
--- a/arch/mips/include/uapi/asm/siginfo.h
+++ b/arch/mips/include/uapi/asm/siginfo.h
@@ -16,13 +16,6 @@
 #define HAVE_ARCH_SIGINFO_T
 
 /*
- * We duplicate the generic versions - <asm-generic/siginfo.h> is just borked
- * by design ...
- */
-#define HAVE_ARCH_COPY_SIGINFO
-struct siginfo;
-
-/*
  * Careful to keep union _sifields from shifting ...
  */
 #if _MIPS_SZLONG == 32
@@ -35,8 +28,9 @@
 
 #define __ARCH_SIGSYS
 
-#include <asm-generic/siginfo.h>
+#include <uapi/asm-generic/siginfo.h>
 
+/* We can't use generic siginfo_t, because our si_code and si_errno are swapped */
 typedef struct siginfo {
 	int si_signo;
 	int si_code;
@@ -124,5 +118,6 @@
 #define SI_TIMER __SI_CODE(__SI_TIMER, -3) /* sent by timer expiration */
 #define SI_MESGQ __SI_CODE(__SI_MESGQ, -4) /* sent by real time mesq state change */
 
+#include <asm-generic/siginfo.h>
 
 #endif /* _UAPI_ASM_SIGINFO_H */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 92987d1..d3d2ff2 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -52,7 +52,7 @@
 obj-$(CONFIG_MIPS_CMP)		+= smp-cmp.o
 obj-$(CONFIG_MIPS_CPS)		+= smp-cps.o cps-vec.o
 obj-$(CONFIG_MIPS_GIC_IPI)	+= smp-gic.o
-obj-$(CONFIG_CPU_MIPSR2)	+= spram.o
+obj-$(CONFIG_MIPS_SPRAM)	+= spram.o
 
 obj-$(CONFIG_MIPS_VPE_LOADER)	+= vpe.o
 obj-$(CONFIG_MIPS_VPE_LOADER_CMP) += vpe-cmp.o
@@ -90,6 +90,7 @@
 obj-$(CONFIG_EARLY_PRINTK_8250)	+= early_printk_8250.o
 obj-$(CONFIG_SPINLOCK_TEST)	+= spinlock_test.o
 obj-$(CONFIG_MIPS_MACHINE)	+= mips_machine.o
+obj-$(CONFIG_MIPSR2_TO_R6_EMULATOR)	+= mips-r2-to-r6-emul.o
 
 CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -x c /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
 
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 3b2dfdb..750d67a 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -97,6 +97,7 @@
 	OFFSET(TI_TP_VALUE, thread_info, tp_value);
 	OFFSET(TI_CPU, thread_info, cpu);
 	OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
+	OFFSET(TI_R2_EMUL_RET, thread_info, r2_emul_return);
 	OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit);
 	OFFSET(TI_REGS, thread_info, regs);
 	DEFINE(_THREAD_SIZE, THREAD_SIZE);
@@ -381,6 +382,7 @@
 	OFFSET(OCTEON_CP2_GFM_RESULT,	octeon_cop2_state, cop2_gfm_result);
 	OFFSET(OCTEON_CP2_HSH_DATW,	octeon_cop2_state, cop2_hsh_datw);
 	OFFSET(OCTEON_CP2_HSH_IVW,	octeon_cop2_state, cop2_hsh_ivw);
+	OFFSET(OCTEON_CP2_SHA3,		octeon_cop2_state, cop2_sha3);
 	OFFSET(THREAD_CP2,	task_struct, thread.cp2);
 	OFFSET(THREAD_CVMSEG,	task_struct, thread.cvmseg.cvmseg);
 	BLANK();
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 4d7d99d..c2e0f45 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -16,6 +16,7 @@
 #include <asm/fpu.h>
 #include <asm/fpu_emulator.h>
 #include <asm/inst.h>
+#include <asm/mips-r2-to-r6-emul.h>
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
 
@@ -399,11 +400,21 @@
  * @returns:	-EFAULT on error and forces SIGBUS, and on success
  *		returns 0 or BRANCH_LIKELY_TAKEN as appropriate after
  *		evaluating the branch.
+ *
+ * MIPS R6 Compact branches and forbidden slots:
+ *	Compact branches do not throw exceptions because they do
+ *	not have delay slots. The forbidden slot instruction ($PC+4)
+ *	is only executed if the branch was not taken. Otherwise the
+ *	forbidden slot is skipped entirely. This means that the
+ *	only possible reason to be here because of a MIPS R6 compact
+ *	branch instruction is that the forbidden slot has thrown one.
+ *	In that case the branch was not taken, so the EPC can be safely
+ *	set to EPC + 8.
  */
 int __compute_return_epc_for_insn(struct pt_regs *regs,
 				   union mips_instruction insn)
 {
-	unsigned int bit, fcr31, dspcontrol;
+	unsigned int bit, fcr31, dspcontrol, reg;
 	long epc = regs->cp0_epc;
 	int ret = 0;
 
@@ -417,6 +428,8 @@
 			regs->regs[insn.r_format.rd] = epc + 8;
 			/* Fall through */
 		case jr_op:
+			if (NO_R6EMU && insn.r_format.func == jr_op)
+				goto sigill_r6;
 			regs->cp0_epc = regs->regs[insn.r_format.rs];
 			break;
 		}
@@ -429,8 +442,10 @@
 	 */
 	case bcond_op:
 		switch (insn.i_format.rt) {
-		case bltz_op:
 		case bltzl_op:
+			if (NO_R6EMU)
+				goto sigill_r6;
+		case bltz_op:
 			if ((long)regs->regs[insn.i_format.rs] < 0) {
 				epc = epc + 4 + (insn.i_format.simmediate << 2);
 				if (insn.i_format.rt == bltzl_op)
@@ -440,8 +455,10 @@
 			regs->cp0_epc = epc;
 			break;
 
-		case bgez_op:
 		case bgezl_op:
+			if (NO_R6EMU)
+				goto sigill_r6;
+		case bgez_op:
 			if ((long)regs->regs[insn.i_format.rs] >= 0) {
 				epc = epc + 4 + (insn.i_format.simmediate << 2);
 				if (insn.i_format.rt == bgezl_op)
@@ -453,7 +470,29 @@
 
 		case bltzal_op:
 		case bltzall_op:
+			if (NO_R6EMU && (insn.i_format.rs ||
+			    insn.i_format.rt == bltzall_op)) {
+				ret = -SIGILL;
+				break;
+			}
 			regs->regs[31] = epc + 8;
+			/*
+			 * OK we are here either because we hit a NAL
+			 * instruction or because we are emulating an
+			 * old bltzal{,l} one. Lets figure out what the
+			 * case really is.
+			 */
+			if (!insn.i_format.rs) {
+				/*
+				 * NAL or BLTZAL with rs == 0
+				 * Doesn't matter if we are R6 or not. The
+				 * result is the same
+				 */
+				regs->cp0_epc += 4 +
+					(insn.i_format.simmediate << 2);
+				break;
+			}
+			/* Now do the real thing for non-R6 BLTZAL{,L} */
 			if ((long)regs->regs[insn.i_format.rs] < 0) {
 				epc = epc + 4 + (insn.i_format.simmediate << 2);
 				if (insn.i_format.rt == bltzall_op)
@@ -465,7 +504,29 @@
 
 		case bgezal_op:
 		case bgezall_op:
+			if (NO_R6EMU && (insn.i_format.rs ||
+			    insn.i_format.rt == bgezall_op)) {
+				ret = -SIGILL;
+				break;
+			}
 			regs->regs[31] = epc + 8;
+			/*
+			 * OK we are here either because we hit a BAL
+			 * instruction or because we are emulating an
+			 * old bgezal{,l} one. Lets figure out what the
+			 * case really is.
+			 */
+			if (!insn.i_format.rs) {
+				/*
+				 * BAL or BGEZAL with rs == 0
+				 * Doesn't matter if we are R6 or not. The
+				 * result is the same
+				 */
+				regs->cp0_epc += 4 +
+					(insn.i_format.simmediate << 2);
+				break;
+			}
+			/* Now do the real thing for non-R6 BGEZAL{,L} */
 			if ((long)regs->regs[insn.i_format.rs] >= 0) {
 				epc = epc + 4 + (insn.i_format.simmediate << 2);
 				if (insn.i_format.rt == bgezall_op)
@@ -477,7 +538,7 @@
 
 		case bposge32_op:
 			if (!cpu_has_dsp)
-				goto sigill;
+				goto sigill_dsp;
 
 			dspcontrol = rddsp(0x01);
 
@@ -508,8 +569,10 @@
 	/*
 	 * These are conditional and in i_format.
 	 */
-	case beq_op:
 	case beql_op:
+		if (NO_R6EMU)
+			goto sigill_r6;
+	case beq_op:
 		if (regs->regs[insn.i_format.rs] ==
 		    regs->regs[insn.i_format.rt]) {
 			epc = epc + 4 + (insn.i_format.simmediate << 2);
@@ -520,8 +583,10 @@
 		regs->cp0_epc = epc;
 		break;
 
-	case bne_op:
 	case bnel_op:
+		if (NO_R6EMU)
+			goto sigill_r6;
+	case bne_op:
 		if (regs->regs[insn.i_format.rs] !=
 		    regs->regs[insn.i_format.rt]) {
 			epc = epc + 4 + (insn.i_format.simmediate << 2);
@@ -532,8 +597,31 @@
 		regs->cp0_epc = epc;
 		break;
 
-	case blez_op: /* not really i_format */
-	case blezl_op:
+	case blezl_op: /* not really i_format */
+		if (NO_R6EMU)
+			goto sigill_r6;
+	case blez_op:
+		/*
+		 * Compact branches for R6 for the
+		 * blez and blezl opcodes.
+		 * BLEZ  | rs = 0 | rt != 0  == BLEZALC
+		 * BLEZ  | rs = rt != 0      == BGEZALC
+		 * BLEZ  | rs != 0 | rt != 0 == BGEUC
+		 * BLEZL | rs = 0 | rt != 0  == BLEZC
+		 * BLEZL | rs = rt != 0      == BGEZC
+		 * BLEZL | rs != 0 | rt != 0 == BGEC
+		 *
+		 * For real BLEZ{,L}, rt is always 0.
+		 */
+
+		if (cpu_has_mips_r6 && insn.i_format.rt) {
+			if ((insn.i_format.opcode == blez_op) &&
+			    ((!insn.i_format.rs && insn.i_format.rt) ||
+			     (insn.i_format.rs == insn.i_format.rt)))
+				regs->regs[31] = epc + 4;
+			regs->cp0_epc += 8;
+			break;
+		}
 		/* rt field assumed to be zero */
 		if ((long)regs->regs[insn.i_format.rs] <= 0) {
 			epc = epc + 4 + (insn.i_format.simmediate << 2);
@@ -544,8 +632,32 @@
 		regs->cp0_epc = epc;
 		break;
 
-	case bgtz_op:
 	case bgtzl_op:
+		if (NO_R6EMU)
+			goto sigill_r6;
+	case bgtz_op:
+		/*
+		 * Compact branches for R6 for the
+		 * bgtz and bgtzl opcodes.
+		 * BGTZ  | rs = 0 | rt != 0  == BGTZALC
+		 * BGTZ  | rs = rt != 0      == BLTZALC
+		 * BGTZ  | rs != 0 | rt != 0 == BLTUC
+		 * BGTZL | rs = 0 | rt != 0  == BGTZC
+		 * BGTZL | rs = rt != 0      == BLTZC
+		 * BGTZL | rs != 0 | rt != 0 == BLTC
+		 *
+		 * *ZALC varint for BGTZ &&& rt != 0
+		 * For real GTZ{,L}, rt is always 0.
+		 */
+		if (cpu_has_mips_r6 && insn.i_format.rt) {
+			if ((insn.i_format.opcode == blez_op) &&
+			    ((!insn.i_format.rs && insn.i_format.rt) ||
+			    (insn.i_format.rs == insn.i_format.rt)))
+				regs->regs[31] = epc + 4;
+			regs->cp0_epc += 8;
+			break;
+		}
+
 		/* rt field assumed to be zero */
 		if ((long)regs->regs[insn.i_format.rs] > 0) {
 			epc = epc + 4 + (insn.i_format.simmediate << 2);
@@ -560,40 +672,83 @@
 	 * And now the FPA/cp1 branch instructions.
 	 */
 	case cop1_op:
-		preempt_disable();
-		if (is_fpu_owner())
-		        fcr31 = read_32bit_cp1_register(CP1_STATUS);
-		else
-			fcr31 = current->thread.fpu.fcr31;
-		preempt_enable();
-
-		bit = (insn.i_format.rt >> 2);
-		bit += (bit != 0);
-		bit += 23;
-		switch (insn.i_format.rt & 3) {
-		case 0: /* bc1f */
-		case 2: /* bc1fl */
-			if (~fcr31 & (1 << bit)) {
-				epc = epc + 4 + (insn.i_format.simmediate << 2);
-				if (insn.i_format.rt == 2)
-					ret = BRANCH_LIKELY_TAKEN;
-			} else
+		if (cpu_has_mips_r6 &&
+		    ((insn.i_format.rs == bc1eqz_op) ||
+		     (insn.i_format.rs == bc1nez_op))) {
+			if (!used_math()) { /* First time FPU user */
+				ret = init_fpu();
+				if (ret && NO_R6EMU) {
+					ret = -ret;
+					break;
+				}
+				ret = 0;
+				set_used_math();
+			}
+			lose_fpu(1);    /* Save FPU state for the emulator. */
+			reg = insn.i_format.rt;
+			bit = 0;
+			switch (insn.i_format.rs) {
+			case bc1eqz_op:
+				/* Test bit 0 */
+				if (get_fpr32(&current->thread.fpu.fpr[reg], 0)
+				    & 0x1)
+					bit = 1;
+				break;
+			case bc1nez_op:
+				/* Test bit 0 */
+				if (!(get_fpr32(&current->thread.fpu.fpr[reg], 0)
+				      & 0x1))
+					bit = 1;
+				break;
+			}
+			own_fpu(1);
+			if (bit)
+				epc = epc + 4 +
+					(insn.i_format.simmediate << 2);
+			else
 				epc += 8;
 			regs->cp0_epc = epc;
+
 			break;
+		} else {
 
-		case 1: /* bc1t */
-		case 3: /* bc1tl */
-			if (fcr31 & (1 << bit)) {
-				epc = epc + 4 + (insn.i_format.simmediate << 2);
-				if (insn.i_format.rt == 3)
-					ret = BRANCH_LIKELY_TAKEN;
-			} else
-				epc += 8;
-			regs->cp0_epc = epc;
+			preempt_disable();
+			if (is_fpu_owner())
+			        fcr31 = read_32bit_cp1_register(CP1_STATUS);
+			else
+				fcr31 = current->thread.fpu.fcr31;
+			preempt_enable();
+
+			bit = (insn.i_format.rt >> 2);
+			bit += (bit != 0);
+			bit += 23;
+			switch (insn.i_format.rt & 3) {
+			case 0: /* bc1f */
+			case 2: /* bc1fl */
+				if (~fcr31 & (1 << bit)) {
+					epc = epc + 4 +
+						(insn.i_format.simmediate << 2);
+					if (insn.i_format.rt == 2)
+						ret = BRANCH_LIKELY_TAKEN;
+				} else
+					epc += 8;
+				regs->cp0_epc = epc;
+				break;
+
+			case 1: /* bc1t */
+			case 3: /* bc1tl */
+				if (fcr31 & (1 << bit)) {
+					epc = epc + 4 +
+						(insn.i_format.simmediate << 2);
+					if (insn.i_format.rt == 3)
+						ret = BRANCH_LIKELY_TAKEN;
+				} else
+					epc += 8;
+				regs->cp0_epc = epc;
+				break;
+			}
 			break;
 		}
-		break;
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 	case lwc2_op: /* This is bbit0 on Octeon */
 		if ((regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt))
@@ -626,15 +781,72 @@
 			epc += 8;
 		regs->cp0_epc = epc;
 		break;
+#else
+	case bc6_op:
+		/* Only valid for MIPS R6 */
+		if (!cpu_has_mips_r6) {
+			ret = -SIGILL;
+			break;
+		}
+		regs->cp0_epc += 8;
+		break;
+	case balc6_op:
+		if (!cpu_has_mips_r6) {
+			ret = -SIGILL;
+			break;
+		}
+		/* Compact branch: BALC */
+		regs->regs[31] = epc + 4;
+		epc += 4 + (insn.i_format.simmediate << 2);
+		regs->cp0_epc = epc;
+		break;
+	case beqzcjic_op:
+		if (!cpu_has_mips_r6) {
+			ret = -SIGILL;
+			break;
+		}
+		/* Compact branch: BEQZC || JIC */
+		regs->cp0_epc += 8;
+		break;
+	case bnezcjialc_op:
+		if (!cpu_has_mips_r6) {
+			ret = -SIGILL;
+			break;
+		}
+		/* Compact branch: BNEZC || JIALC */
+		if (insn.i_format.rs)
+			regs->regs[31] = epc + 4;
+		regs->cp0_epc += 8;
+		break;
 #endif
+	case cbcond0_op:
+	case cbcond1_op:
+		/* Only valid for MIPS R6 */
+		if (!cpu_has_mips_r6) {
+			ret = -SIGILL;
+			break;
+		}
+		/*
+		 * Compact branches:
+		 * bovc, beqc, beqzalc, bnvc, bnec, bnezlac
+		 */
+		if (insn.i_format.rt && !insn.i_format.rs)
+			regs->regs[31] = epc + 4;
+		regs->cp0_epc += 8;
+		break;
 	}
 
 	return ret;
 
-sigill:
+sigill_dsp:
 	printk("%s: DSP branch but not DSP ASE - sending SIGBUS.\n", current->comm);
 	force_sig(SIGBUS, current);
 	return -EFAULT;
+sigill_r6:
+	pr_info("%s: R2 branch but r2-to-r6 emulator is not preset - sending SIGILL.\n",
+		current->comm);
+	force_sig(SIGILL, current);
+	return -EFAULT;
 }
 EXPORT_SYMBOL_GPL(__compute_return_epc_for_insn);
 
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 6acaad0..82bd2b2 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -11,7 +11,6 @@
 #include <linux/percpu.h>
 #include <linux/smp.h>
 #include <linux/irq.h>
-#include <linux/irqchip/mips-gic.h>
 
 #include <asm/time.h>
 #include <asm/cevt-r4k.h>
@@ -40,7 +39,7 @@
 
 irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 {
-	const int r2 = cpu_has_mips_r2;
+	const int r2 = cpu_has_mips_r2_r6;
 	struct clock_event_device *cd;
 	int cpu = smp_processor_id();
 
@@ -85,10 +84,7 @@
  */
 static int c0_compare_int_pending(void)
 {
-#ifdef CONFIG_MIPS_GIC
-	if (gic_present)
-		return gic_get_timer_pending();
-#endif
+	/* When cpu_has_mips_r2, this checks Cause.TI instead of Cause.IP7 */
 	return (read_c0_cause() >> cp0_compare_irq_shift) & (1ul << CAUSEB_IP);
 }
 
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index 0384b05..55b759a 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -99,11 +99,11 @@
 	xori	t2, t1, 0x7
 	beqz	t2, 1f
 	 li	t3, 32
-	addi	t1, t1, 1
+	addiu	t1, t1, 1
 	sllv	t1, t3, t1
 1:	/* At this point t1 == I-cache sets per way */
 	_EXT	t2, v0, MIPS_CONF1_IA_SHF, MIPS_CONF1_IA_SZ
-	addi	t2, t2, 1
+	addiu	t2, t2, 1
 	mul	t1, t1, t0
 	mul	t1, t1, t2
 
@@ -126,11 +126,11 @@
 	xori	t2, t1, 0x7
 	beqz	t2, 1f
 	 li	t3, 32
-	addi	t1, t1, 1
+	addiu	t1, t1, 1
 	sllv	t1, t3, t1
 1:	/* At this point t1 == D-cache sets per way */
 	_EXT	t2, v0, MIPS_CONF1_DA_SHF, MIPS_CONF1_DA_SZ
-	addi	t2, t2, 1
+	addiu	t2, t2, 1
 	mul	t1, t1, t0
 	mul	t1, t1, t2
 
@@ -250,7 +250,7 @@
 	mfc0	t0, CP0_MVPCONF0
 	srl	t0, t0, MVPCONF0_PVPE_SHIFT
 	andi	t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT)
-	addi	t7, t0, 1
+	addiu	t7, t0, 1
 
 	/* If there's only 1, we're done */
 	beqz	t0, 2f
@@ -280,7 +280,7 @@
 	mttc0	t0, CP0_TCHALT
 
 	/* Next VPE */
-	addi	t5, t5, 1
+	addiu	t5, t5, 1
 	slt	t0, t5, t7
 	bnez	t0, 1b
 	 nop
@@ -317,7 +317,7 @@
 	mfc0	t1, CP0_MVPCONF0
 	srl	t1, t1, MVPCONF0_PVPE_SHIFT
 	andi	t1, t1, MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT
-	addi	t1, t1, 1
+	addiu	t1, t1, 1
 
 	/* Calculate a mask for the VPE ID from EBase.CPUNum */
 	clz	t1, t1
@@ -424,7 +424,7 @@
 
 	/* Next VPE */
 2:	srl	t6, t6, 1
-	addi	t5, t5, 1
+	addiu	t5, t5, 1
 	bnez	t6, 1b
 	 nop
 
diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c
index 2d80b5f..09f4034 100644
--- a/arch/mips/kernel/cpu-bugs64.c
+++ b/arch/mips/kernel/cpu-bugs64.c
@@ -244,7 +244,7 @@
 	panic(bug64hit, !DADDI_WAR ? daddiwar : nowar);
 }
 
-int daddiu_bug	= -1;
+int daddiu_bug	= config_enabled(CONFIG_CPU_MIPSR6) ? 0 : -1;
 
 static inline void check_daddiu(void)
 {
@@ -314,11 +314,14 @@
 
 void __init check_bugs64_early(void)
 {
-	check_mult_sh();
-	check_daddiu();
+	if (!config_enabled(CONFIG_CPU_MIPSR6)) {
+		check_mult_sh();
+		check_daddiu();
+	}
 }
 
 void __init check_bugs64(void)
 {
-	check_daddi();
+	if (!config_enabled(CONFIG_CPU_MIPSR6))
+		check_daddi();
 }
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 5342674..48dfb9d 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -237,6 +237,13 @@
 		c->isa_level |= MIPS_CPU_ISA_II | MIPS_CPU_ISA_III;
 		break;
 
+	/* R6 incompatible with everything else */
+	case MIPS_CPU_ISA_M64R6:
+		c->isa_level |= MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6;
+	case MIPS_CPU_ISA_M32R6:
+		c->isa_level |= MIPS_CPU_ISA_M32R6;
+		/* Break here so we don't add incompatible ISAs */
+		break;
 	case MIPS_CPU_ISA_M32R2:
 		c->isa_level |= MIPS_CPU_ISA_M32R2;
 	case MIPS_CPU_ISA_M32R1:
@@ -326,6 +333,9 @@
 		case 1:
 			set_isa(c, MIPS_CPU_ISA_M32R2);
 			break;
+		case 2:
+			set_isa(c, MIPS_CPU_ISA_M32R6);
+			break;
 		default:
 			goto unknown;
 		}
@@ -338,6 +348,9 @@
 		case 1:
 			set_isa(c, MIPS_CPU_ISA_M64R2);
 			break;
+		case 2:
+			set_isa(c, MIPS_CPU_ISA_M64R6);
+			break;
 		default:
 			goto unknown;
 		}
@@ -424,8 +437,10 @@
 	if (config3 & MIPS_CONF3_MSA)
 		c->ases |= MIPS_ASE_MSA;
 	/* Only tested on 32-bit cores */
-	if ((config3 & MIPS_CONF3_PW) && config_enabled(CONFIG_32BIT))
+	if ((config3 & MIPS_CONF3_PW) && config_enabled(CONFIG_32BIT)) {
+		c->htw_seq = 0;
 		c->options |= MIPS_CPU_HTW;
+	}
 
 	return config3 & MIPS_CONF_M;
 }
@@ -499,6 +514,8 @@
 		c->options |= MIPS_CPU_EVA;
 	if (config5 & MIPS_CONF5_MRP)
 		c->options |= MIPS_CPU_MAAR;
+	if (config5 & MIPS_CONF5_LLB)
+		c->options |= MIPS_CPU_RW_LLB;
 
 	return config5 & MIPS_CONF_M;
 }
@@ -533,7 +550,7 @@
 
 	if (cpu_has_rixi) {
 		/* Enable the RIXI exceptions */
-		write_c0_pagegrain(read_c0_pagegrain() | PG_IEC);
+		set_c0_pagegrain(PG_IEC);
 		back_to_back_c0_hazard();
 		/* Verify the IEC bit is set */
 		if (read_c0_pagegrain() & PG_IEC)
@@ -541,7 +558,7 @@
 	}
 
 #ifndef CONFIG_MIPS_CPS
-	if (cpu_has_mips_r2) {
+	if (cpu_has_mips_r2_r6) {
 		c->core = get_ebase_cpunum();
 		if (cpu_has_mipsmt)
 			c->core >>= fls(core_nvpes()) - 1;
@@ -896,6 +913,11 @@
 {
 	c->writecombine = _CACHE_UNCACHED_ACCELERATED;
 	switch (c->processor_id & PRID_IMP_MASK) {
+	case PRID_IMP_QEMU_GENERIC:
+		c->writecombine = _CACHE_UNCACHED;
+		c->cputype = CPU_QEMU_GENERIC;
+		__cpu_name[cpu] = "MIPS GENERIC QEMU";
+		break;
 	case PRID_IMP_4KC:
 		c->cputype = CPU_4KC;
 		c->writecombine = _CACHE_UNCACHED;
@@ -1345,8 +1367,7 @@
 	if (c->options & MIPS_CPU_FPU) {
 		c->fpu_id = cpu_get_fpu_id();
 
-		if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 |
-				    MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)) {
+		if (c->isa_level & cpu_has_mips_r) {
 			if (c->fpu_id & MIPS_FPIR_3D)
 				c->ases |= MIPS_ASE_MIPS3D;
 			if (c->fpu_id & MIPS_FPIR_FREP)
@@ -1354,7 +1375,7 @@
 		}
 	}
 
-	if (cpu_has_mips_r2) {
+	if (cpu_has_mips_r2_r6) {
 		c->srsets = ((read_c0_srsctl() >> 26) & 0x0f) + 1;
 		/* R2 has Performance Counter Interrupt indicator */
 		c->options |= MIPS_CPU_PCI;
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index a5b5b56..d2c09f6 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -11,29 +11,112 @@
 #include <linux/elf.h>
 #include <linux/sched.h>
 
+/* FPU modes */
 enum {
-	FP_ERROR = -1,
-	FP_DOUBLE_64A = -2,
+	FP_FRE,
+	FP_FR0,
+	FP_FR1,
 };
 
+/**
+ * struct mode_req - ABI FPU mode requirements
+ * @single:	The program being loaded needs an FPU but it will only issue
+ *		single precision instructions meaning that it can execute in
+ *		either FR0 or FR1.
+ * @soft:	The soft(-float) requirement means that the program being
+ *		loaded needs has no FPU dependency at all (i.e. it has no
+ *		FPU instructions).
+ * @fr1:	The program being loaded depends on FPU being in FR=1 mode.
+ * @frdefault:	The program being loaded depends on the default FPU mode.
+ *		That is FR0 for O32 and FR1 for N32/N64.
+ * @fre:	The program being loaded depends on FPU with FRE=1. This mode is
+ *		a bridge which uses FR=1 whilst still being able to maintain
+ *		full compatibility with pre-existing code using the O32 FP32
+ *		ABI.
+ *
+ * More information about the FP ABIs can be found here:
+ *
+ * https://dmz-portal.mips.com/wiki/MIPS_O32_ABI_-_FR0_and_FR1_Interlinking#10.4.1._Basic_mode_set-up
+ *
+ */
+
+struct mode_req {
+	bool single;
+	bool soft;
+	bool fr1;
+	bool frdefault;
+	bool fre;
+};
+
+static const struct mode_req fpu_reqs[] = {
+	[MIPS_ABI_FP_ANY]    = { true,  true,  true,  true,  true  },
+	[MIPS_ABI_FP_DOUBLE] = { false, false, false, true,  true  },
+	[MIPS_ABI_FP_SINGLE] = { true,  false, false, false, false },
+	[MIPS_ABI_FP_SOFT]   = { false, true,  false, false, false },
+	[MIPS_ABI_FP_OLD_64] = { false, false, false, false, false },
+	[MIPS_ABI_FP_XX]     = { false, false, true,  true,  true  },
+	[MIPS_ABI_FP_64]     = { false, false, true,  false, false },
+	[MIPS_ABI_FP_64A]    = { false, false, true,  false, true  }
+};
+
+/*
+ * Mode requirements when .MIPS.abiflags is not present in the ELF.
+ * Not present means that everything is acceptable except FR1.
+ */
+static struct mode_req none_req = { true, true, false, true, true };
+
 int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
 		     bool is_interp, struct arch_elf_state *state)
 {
-	struct elf32_hdr *ehdr = _ehdr;
-	struct elf32_phdr *phdr = _phdr;
+	struct elf32_hdr *ehdr32 = _ehdr;
+	struct elf32_phdr *phdr32 = _phdr;
+	struct elf64_phdr *phdr64 = _phdr;
 	struct mips_elf_abiflags_v0 abiflags;
 	int ret;
 
-	if (config_enabled(CONFIG_64BIT) &&
-	    (ehdr->e_ident[EI_CLASS] != ELFCLASS32))
-		return 0;
-	if (phdr->p_type != PT_MIPS_ABIFLAGS)
-		return 0;
-	if (phdr->p_filesz < sizeof(abiflags))
-		return -EINVAL;
+	/* Lets see if this is an O32 ELF */
+	if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
+		/* FR = 1 for N32 */
+		if (ehdr32->e_flags & EF_MIPS_ABI2)
+			state->overall_fp_mode = FP_FR1;
+		else
+			/* Set a good default FPU mode for O32 */
+			state->overall_fp_mode = cpu_has_mips_r6 ?
+				FP_FRE : FP_FR0;
 
-	ret = kernel_read(elf, phdr->p_offset, (char *)&abiflags,
-			  sizeof(abiflags));
+		if (ehdr32->e_flags & EF_MIPS_FP64) {
+			/*
+			 * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
+			 * later if needed
+			 */
+			if (is_interp)
+				state->interp_fp_abi = MIPS_ABI_FP_OLD_64;
+			else
+				state->fp_abi = MIPS_ABI_FP_OLD_64;
+		}
+		if (phdr32->p_type != PT_MIPS_ABIFLAGS)
+			return 0;
+
+		if (phdr32->p_filesz < sizeof(abiflags))
+			return -EINVAL;
+
+		ret = kernel_read(elf, phdr32->p_offset,
+				  (char *)&abiflags,
+				  sizeof(abiflags));
+	} else {
+		/* FR=1 is really the only option for 64-bit */
+		state->overall_fp_mode = FP_FR1;
+
+		if (phdr64->p_type != PT_MIPS_ABIFLAGS)
+			return 0;
+		if (phdr64->p_filesz < sizeof(abiflags))
+			return -EINVAL;
+
+		ret = kernel_read(elf, phdr64->p_offset,
+				  (char *)&abiflags,
+				  sizeof(abiflags));
+	}
+
 	if (ret < 0)
 		return ret;
 	if (ret != sizeof(abiflags))
@@ -48,35 +131,30 @@
 	return 0;
 }
 
-static inline unsigned get_fp_abi(struct elf32_hdr *ehdr, int in_abi)
+static inline unsigned get_fp_abi(int in_abi)
 {
 	/* If the ABI requirement is provided, simply return that */
-	if (in_abi != -1)
+	if (in_abi != MIPS_ABI_FP_UNKNOWN)
 		return in_abi;
 
-	/* If the EF_MIPS_FP64 flag was set, return MIPS_ABI_FP_64 */
-	if (ehdr->e_flags & EF_MIPS_FP64)
-		return MIPS_ABI_FP_64;
-
-	/* Default to MIPS_ABI_FP_DOUBLE */
-	return MIPS_ABI_FP_DOUBLE;
+	/* Unknown ABI */
+	return MIPS_ABI_FP_UNKNOWN;
 }
 
 int arch_check_elf(void *_ehdr, bool has_interpreter,
 		   struct arch_elf_state *state)
 {
 	struct elf32_hdr *ehdr = _ehdr;
-	unsigned fp_abi, interp_fp_abi, abi0, abi1;
+	struct mode_req prog_req, interp_req;
+	int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
 
-	/* Ignore non-O32 binaries */
-	if (config_enabled(CONFIG_64BIT) &&
-	    (ehdr->e_ident[EI_CLASS] != ELFCLASS32))
+	if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
 		return 0;
 
-	fp_abi = get_fp_abi(ehdr, state->fp_abi);
+	fp_abi = get_fp_abi(state->fp_abi);
 
 	if (has_interpreter) {
-		interp_fp_abi = get_fp_abi(ehdr, state->interp_fp_abi);
+		interp_fp_abi = get_fp_abi(state->interp_fp_abi);
 
 		abi0 = min(fp_abi, interp_fp_abi);
 		abi1 = max(fp_abi, interp_fp_abi);
@@ -84,108 +162,103 @@
 		abi0 = abi1 = fp_abi;
 	}
 
-	state->overall_abi = FP_ERROR;
+	/* ABI limits. O32 = FP_64A, N32/N64 = FP_SOFT */
+	max_abi = ((ehdr->e_ident[EI_CLASS] == ELFCLASS32) &&
+		   (!(ehdr->e_flags & EF_MIPS_ABI2))) ?
+		MIPS_ABI_FP_64A : MIPS_ABI_FP_SOFT;
 
-	if (abi0 == abi1) {
-		state->overall_abi = abi0;
-	} else if (abi0 == MIPS_ABI_FP_ANY) {
-		state->overall_abi = abi1;
-	} else if (abi0 == MIPS_ABI_FP_DOUBLE) {
-		switch (abi1) {
-		case MIPS_ABI_FP_XX:
-			state->overall_abi = MIPS_ABI_FP_DOUBLE;
-			break;
-
-		case MIPS_ABI_FP_64A:
-			state->overall_abi = FP_DOUBLE_64A;
-			break;
-		}
-	} else if (abi0 == MIPS_ABI_FP_SINGLE ||
-		   abi0 == MIPS_ABI_FP_SOFT) {
-		/* Cannot link with other ABIs */
-	} else if (abi0 == MIPS_ABI_FP_OLD_64) {
-		switch (abi1) {
-		case MIPS_ABI_FP_XX:
-		case MIPS_ABI_FP_64:
-		case MIPS_ABI_FP_64A:
-			state->overall_abi = MIPS_ABI_FP_64;
-			break;
-		}
-	} else if (abi0 == MIPS_ABI_FP_XX ||
-		   abi0 == MIPS_ABI_FP_64 ||
-		   abi0 == MIPS_ABI_FP_64A) {
-		state->overall_abi = MIPS_ABI_FP_64;
-	}
-
-	switch (state->overall_abi) {
-	case MIPS_ABI_FP_64:
-	case MIPS_ABI_FP_64A:
-	case FP_DOUBLE_64A:
-		if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
-			return -ELIBBAD;
-		break;
-
-	case FP_ERROR:
+	if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
+	    (abi1 > max_abi && abi1 != MIPS_ABI_FP_UNKNOWN))
 		return -ELIBBAD;
-	}
+
+	/* It's time to determine the FPU mode requirements */
+	prog_req = (abi0 == MIPS_ABI_FP_UNKNOWN) ? none_req : fpu_reqs[abi0];
+	interp_req = (abi1 == MIPS_ABI_FP_UNKNOWN) ? none_req : fpu_reqs[abi1];
+
+	/*
+	 * Check whether the program's and interp's ABIs have a matching FPU
+	 * mode requirement.
+	 */
+	prog_req.single = interp_req.single && prog_req.single;
+	prog_req.soft = interp_req.soft && prog_req.soft;
+	prog_req.fr1 = interp_req.fr1 && prog_req.fr1;
+	prog_req.frdefault = interp_req.frdefault && prog_req.frdefault;
+	prog_req.fre = interp_req.fre && prog_req.fre;
+
+	/*
+	 * Determine the desired FPU mode
+	 *
+	 * Decision making:
+	 *
+	 * - We want FR_FRE if FRE=1 and both FR=1 and FR=0 are false. This
+	 *   means that we have a combination of program and interpreter
+	 *   that inherently require the hybrid FP mode.
+	 * - If FR1 and FRDEFAULT is true, that means we hit the any-abi or
+	 *   fpxx case. This is because, in any-ABI (or no-ABI) we have no FPU
+	 *   instructions so we don't care about the mode. We will simply use
+	 *   the one preferred by the hardware. In fpxx case, that ABI can
+	 *   handle both FR=1 and FR=0, so, again, we simply choose the one
+	 *   preferred by the hardware. Next, if we only use single-precision
+	 *   FPU instructions, and the default ABI FPU mode is not good
+	 *   (ie single + any ABI combination), we set again the FPU mode to the
+	 *   one is preferred by the hardware. Next, if we know that the code
+	 *   will only use single-precision instructions, shown by single being
+	 *   true but frdefault being false, then we again set the FPU mode to
+	 *   the one that is preferred by the hardware.
+	 * - We want FP_FR1 if that's the only matching mode and the default one
+	 *   is not good.
+	 * - Return with -ELIBADD if we can't find a matching FPU mode.
+	 */
+	if (prog_req.fre && !prog_req.frdefault && !prog_req.fr1)
+		state->overall_fp_mode = FP_FRE;
+	else if ((prog_req.fr1 && prog_req.frdefault) ||
+		 (prog_req.single && !prog_req.frdefault))
+		/* Make sure 64-bit MIPS III/IV/64R1 will not pick FR1 */
+		state->overall_fp_mode = ((current_cpu_data.fpu_id & MIPS_FPIR_F64) &&
+					  cpu_has_mips_r2_r6) ?
+					  FP_FR1 : FP_FR0;
+	else if (prog_req.fr1)
+		state->overall_fp_mode = FP_FR1;
+	else  if (!prog_req.fre && !prog_req.frdefault &&
+		  !prog_req.fr1 && !prog_req.single && !prog_req.soft)
+		return -ELIBBAD;
 
 	return 0;
 }
 
+static inline void set_thread_fp_mode(int hybrid, int regs32)
+{
+	if (hybrid)
+		set_thread_flag(TIF_HYBRID_FPREGS);
+	else
+		clear_thread_flag(TIF_HYBRID_FPREGS);
+	if (regs32)
+		set_thread_flag(TIF_32BIT_FPREGS);
+	else
+		clear_thread_flag(TIF_32BIT_FPREGS);
+}
+
 void mips_set_personality_fp(struct arch_elf_state *state)
 {
-	if (config_enabled(CONFIG_FP32XX_HYBRID_FPRS)) {
-		/*
-		 * Use hybrid FPRs for all code which can correctly execute
-		 * with that mode.
-		 */
-		switch (state->overall_abi) {
-		case MIPS_ABI_FP_DOUBLE:
-		case MIPS_ABI_FP_SINGLE:
-		case MIPS_ABI_FP_SOFT:
-		case MIPS_ABI_FP_XX:
-		case MIPS_ABI_FP_ANY:
-			/* FR=1, FRE=1 */
-			clear_thread_flag(TIF_32BIT_FPREGS);
-			set_thread_flag(TIF_HYBRID_FPREGS);
-			return;
-		}
-	}
+	/*
+	 * This function is only ever called for O32 ELFs so we should
+	 * not be worried about N32/N64 binaries.
+	 */
 
-	switch (state->overall_abi) {
-	case MIPS_ABI_FP_DOUBLE:
-	case MIPS_ABI_FP_SINGLE:
-	case MIPS_ABI_FP_SOFT:
-		/* FR=0 */
-		set_thread_flag(TIF_32BIT_FPREGS);
-		clear_thread_flag(TIF_HYBRID_FPREGS);
+	if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
+		return;
+
+	switch (state->overall_fp_mode) {
+	case FP_FRE:
+		set_thread_fp_mode(1, 0);
 		break;
-
-	case FP_DOUBLE_64A:
-		/* FR=1, FRE=1 */
-		clear_thread_flag(TIF_32BIT_FPREGS);
-		set_thread_flag(TIF_HYBRID_FPREGS);
+	case FP_FR0:
+		set_thread_fp_mode(0, 1);
 		break;
-
-	case MIPS_ABI_FP_64:
-	case MIPS_ABI_FP_64A:
-		/* FR=1, FRE=0 */
-		clear_thread_flag(TIF_32BIT_FPREGS);
-		clear_thread_flag(TIF_HYBRID_FPREGS);
+	case FP_FR1:
+		set_thread_fp_mode(0, 0);
 		break;
-
-	case MIPS_ABI_FP_XX:
-	case MIPS_ABI_FP_ANY:
-		if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
-			set_thread_flag(TIF_32BIT_FPREGS);
-		else
-			clear_thread_flag(TIF_32BIT_FPREGS);
-
-		clear_thread_flag(TIF_HYBRID_FPREGS);
-		break;
-
 	default:
-	case FP_ERROR:
 		BUG();
 	}
 }
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 4353d32..af41ba6 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -46,6 +46,11 @@
 	local_irq_disable		# make sure we dont miss an
 					# interrupt setting need_resched
 					# between sampling and return
+#ifdef CONFIG_MIPSR2_TO_R6_EMULATOR
+	lw	k0, TI_R2_EMUL_RET($28)
+	bnez	k0, restore_all_from_r2_emul
+#endif
+
 	LONG_L	a2, TI_FLAGS($28)	# current->work
 	andi	t0, a2, _TIF_WORK_MASK	# (ignoring syscall_trace)
 	bnez	t0, work_pending
@@ -114,6 +119,19 @@
 	RESTORE_SP_AND_RET
 	.set	at
 
+#ifdef CONFIG_MIPSR2_TO_R6_EMULATOR
+restore_all_from_r2_emul:			# restore full frame
+	.set	noat
+	sw	zero, TI_R2_EMUL_RET($28)	# reset it
+	RESTORE_TEMP
+	RESTORE_AT
+	RESTORE_STATIC
+	RESTORE_SOME
+	LONG_L	sp, PT_R29(sp)
+	eretnc
+	.set	at
+#endif
+
 work_pending:
 	andi	t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
 	beqz	t0, work_notifysig
@@ -158,7 +176,8 @@
 	jal	syscall_trace_leave
 	b	resume_userspace
 
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT)
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) || \
+    defined(CONFIG_MIPS_MT)
 
 /*
  * MIPS32R2 Instruction Hazard Barrier - must be called
@@ -171,4 +190,4 @@
 	nop
 	END(mips_ihb)
 
-#endif /* CONFIG_CPU_MIPSR2 or CONFIG_MIPS_MT */
+#endif /* CONFIG_CPU_MIPSR2 or CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index a5e26dd..2ebaabe 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -125,7 +125,7 @@
 	nop
 	nop
 #endif
-	.set	arch=r4000
+	.set	MIPS_ISA_ARCH_LEVEL_RAW
 	wait
 	/* end of rollback region (the region size must be power of two) */
 1:
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 0b9082b..368c88b 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -186,6 +186,7 @@
 	case CPU_PROAPTIV:
 	case CPU_P5600:
 	case CPU_M5150:
+	case CPU_QEMU_GENERIC:
 		cpu_wait = r4k_wait;
 		if (read_c0_config7() & MIPS_CONF7_WII)
 			cpu_wait = r4k_wait_irqoff;
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
new file mode 100644
index 0000000..64d17e4
--- /dev/null
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -0,0 +1,2378 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ *
+ *      MIPS R2 user space instruction emulator for MIPS R6
+ *
+ */
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/seq_file.h>
+
+#include <asm/asm.h>
+#include <asm/branch.h>
+#include <asm/break.h>
+#include <asm/fpu.h>
+#include <asm/fpu_emulator.h>
+#include <asm/inst.h>
+#include <asm/mips-r2-to-r6-emul.h>
+#include <asm/local.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_64BIT
+#define ADDIU	"daddiu "
+#define INS	"dins "
+#define EXT	"dext "
+#else
+#define ADDIU	"addiu "
+#define INS	"ins "
+#define EXT	"ext "
+#endif /* CONFIG_64BIT */
+
+#define SB	"sb "
+#define LB	"lb "
+#define LL	"ll "
+#define SC	"sc "
+
+DEFINE_PER_CPU(struct mips_r2_emulator_stats, mipsr2emustats);
+DEFINE_PER_CPU(struct mips_r2_emulator_stats, mipsr2bdemustats);
+DEFINE_PER_CPU(struct mips_r2br_emulator_stats, mipsr2bremustats);
+
+extern const unsigned int fpucondbit[8];
+
+#define MIPS_R2_EMUL_TOTAL_PASS	10
+
+int mipsr2_emulation = 0;
+
+static int __init mipsr2emu_enable(char *s)
+{
+	mipsr2_emulation = 1;
+
+	pr_info("MIPS R2-to-R6 Emulator Enabled!");
+
+	return 1;
+}
+__setup("mipsr2emu", mipsr2emu_enable);
+
+/**
+ * mipsr6_emul - Emulate some frequent R2/R5/R6 instructions in delay slot
+ * for performance instead of the traditional way of using a stack trampoline
+ * which is rather slow.
+ * @regs: Process register set
+ * @ir: Instruction
+ */
+static inline int mipsr6_emul(struct pt_regs *regs, u32 ir)
+{
+	switch (MIPSInst_OPCODE(ir)) {
+	case addiu_op:
+		if (MIPSInst_RT(ir))
+			regs->regs[MIPSInst_RT(ir)] =
+				(s32)regs->regs[MIPSInst_RS(ir)] +
+				(s32)MIPSInst_SIMM(ir);
+		return 0;
+	case daddiu_op:
+		if (config_enabled(CONFIG_32BIT))
+			break;
+
+		if (MIPSInst_RT(ir))
+			regs->regs[MIPSInst_RT(ir)] =
+				(s64)regs->regs[MIPSInst_RS(ir)] +
+				(s64)MIPSInst_SIMM(ir);
+		return 0;
+	case lwc1_op:
+	case swc1_op:
+	case cop1_op:
+	case cop1x_op:
+		/* FPU instructions in delay slot */
+		return -SIGFPE;
+	case spec_op:
+		switch (MIPSInst_FUNC(ir)) {
+		case or_op:
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					regs->regs[MIPSInst_RS(ir)] |
+					regs->regs[MIPSInst_RT(ir)];
+			return 0;
+		case sll_op:
+			if (MIPSInst_RS(ir))
+				break;
+
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					(s32)(((u32)regs->regs[MIPSInst_RT(ir)]) <<
+						MIPSInst_FD(ir));
+			return 0;
+		case srl_op:
+			if (MIPSInst_RS(ir))
+				break;
+
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					(s32)(((u32)regs->regs[MIPSInst_RT(ir)]) >>
+						MIPSInst_FD(ir));
+			return 0;
+		case addu_op:
+			if (MIPSInst_FD(ir))
+				break;
+
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					(s32)((u32)regs->regs[MIPSInst_RS(ir)] +
+					      (u32)regs->regs[MIPSInst_RT(ir)]);
+			return 0;
+		case subu_op:
+			if (MIPSInst_FD(ir))
+				break;
+
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					(s32)((u32)regs->regs[MIPSInst_RS(ir)] -
+					      (u32)regs->regs[MIPSInst_RT(ir)]);
+			return 0;
+		case dsll_op:
+			if (config_enabled(CONFIG_32BIT) || MIPSInst_RS(ir))
+				break;
+
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					(s64)(((u64)regs->regs[MIPSInst_RT(ir)]) <<
+						MIPSInst_FD(ir));
+			return 0;
+		case dsrl_op:
+			if (config_enabled(CONFIG_32BIT) || MIPSInst_RS(ir))
+				break;
+
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					(s64)(((u64)regs->regs[MIPSInst_RT(ir)]) >>
+						MIPSInst_FD(ir));
+			return 0;
+		case daddu_op:
+			if (config_enabled(CONFIG_32BIT) || MIPSInst_FD(ir))
+				break;
+
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					(u64)regs->regs[MIPSInst_RS(ir)] +
+					(u64)regs->regs[MIPSInst_RT(ir)];
+			return 0;
+		case dsubu_op:
+			if (config_enabled(CONFIG_32BIT) || MIPSInst_FD(ir))
+				break;
+
+			if (MIPSInst_RD(ir))
+				regs->regs[MIPSInst_RD(ir)] =
+					(s64)((u64)regs->regs[MIPSInst_RS(ir)] -
+					      (u64)regs->regs[MIPSInst_RT(ir)]);
+			return 0;
+		}
+		break;
+	default:
+		pr_debug("No fastpath BD emulation for instruction 0x%08x (op: %02x)\n",
+			 ir, MIPSInst_OPCODE(ir));
+	}
+
+	return SIGILL;
+}
+
+/**
+ * movt_func - Emulate a MOVT instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int movf_func(struct pt_regs *regs, u32 ir)
+{
+	u32 csr;
+	u32 cond;
+
+	csr = current->thread.fpu.fcr31;
+	cond = fpucondbit[MIPSInst_RT(ir) >> 2];
+	if (((csr & cond) == 0) && MIPSInst_RD(ir))
+		regs->regs[MIPSInst_RD(ir)] = regs->regs[MIPSInst_RS(ir)];
+	MIPS_R2_STATS(movs);
+	return 0;
+}
+
+/**
+ * movt_func - Emulate a MOVT instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int movt_func(struct pt_regs *regs, u32 ir)
+{
+	u32 csr;
+	u32 cond;
+
+	csr = current->thread.fpu.fcr31;
+	cond = fpucondbit[MIPSInst_RT(ir) >> 2];
+
+	if (((csr & cond) != 0) && MIPSInst_RD(ir))
+		regs->regs[MIPSInst_RD(ir)] = regs->regs[MIPSInst_RS(ir)];
+
+	MIPS_R2_STATS(movs);
+
+	return 0;
+}
+
+/**
+ * jr_func - Emulate a JR instruction.
+ * @pt_regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns SIGILL if JR was in delay slot, SIGEMT if we
+ * can't compute the EPC, SIGSEGV if we can't access the
+ * userland instruction or 0 on success.
+ */
+static int jr_func(struct pt_regs *regs, u32 ir)
+{
+	int err;
+	unsigned long cepc, epc, nepc;
+	u32 nir;
+
+	if (delay_slot(regs))
+		return SIGILL;
+
+	/* EPC after the RI/JR instruction */
+	nepc = regs->cp0_epc;
+	/* Roll back to the reserved R2 JR instruction */
+	regs->cp0_epc -= 4;
+	epc = regs->cp0_epc;
+	err = __compute_return_epc(regs);
+
+	if (err < 0)
+		return SIGEMT;
+
+
+	/* Computed EPC */
+	cepc = regs->cp0_epc;
+
+	/* Get DS instruction */
+	err = __get_user(nir, (u32 __user *)nepc);
+	if (err)
+		return SIGSEGV;
+
+	MIPS_R2BR_STATS(jrs);
+
+	/* If nir == 0(NOP), then nothing else to do */
+	if (nir) {
+		/*
+		 * Negative err means FPU instruction in BD-slot,
+		 * Zero err means 'BD-slot emulation done'
+		 * For anything else we go back to trampoline emulation.
+		 */
+		err = mipsr6_emul(regs, nir);
+		if (err > 0) {
+			regs->cp0_epc = nepc;
+			err = mips_dsemul(regs, nir, cepc);
+			if (err == SIGILL)
+				err = SIGEMT;
+			MIPS_R2_STATS(dsemul);
+		}
+	}
+
+	return err;
+}
+
+/**
+ * movz_func - Emulate a MOVZ instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int movz_func(struct pt_regs *regs, u32 ir)
+{
+	if (((regs->regs[MIPSInst_RT(ir)]) == 0) && MIPSInst_RD(ir))
+		regs->regs[MIPSInst_RD(ir)] = regs->regs[MIPSInst_RS(ir)];
+	MIPS_R2_STATS(movs);
+
+	return 0;
+}
+
+/**
+ * movn_func - Emulate a MOVZ instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int movn_func(struct pt_regs *regs, u32 ir)
+{
+	if (((regs->regs[MIPSInst_RT(ir)]) != 0) && MIPSInst_RD(ir))
+		regs->regs[MIPSInst_RD(ir)] = regs->regs[MIPSInst_RS(ir)];
+	MIPS_R2_STATS(movs);
+
+	return 0;
+}
+
+/**
+ * mfhi_func - Emulate a MFHI instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int mfhi_func(struct pt_regs *regs, u32 ir)
+{
+	if (MIPSInst_RD(ir))
+		regs->regs[MIPSInst_RD(ir)] = regs->hi;
+
+	MIPS_R2_STATS(hilo);
+
+	return 0;
+}
+
+/**
+ * mthi_func - Emulate a MTHI instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int mthi_func(struct pt_regs *regs, u32 ir)
+{
+	regs->hi = regs->regs[MIPSInst_RS(ir)];
+
+	MIPS_R2_STATS(hilo);
+
+	return 0;
+}
+
+/**
+ * mflo_func - Emulate a MFLO instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int mflo_func(struct pt_regs *regs, u32 ir)
+{
+	if (MIPSInst_RD(ir))
+		regs->regs[MIPSInst_RD(ir)] = regs->lo;
+
+	MIPS_R2_STATS(hilo);
+
+	return 0;
+}
+
+/**
+ * mtlo_func - Emulate a MTLO instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int mtlo_func(struct pt_regs *regs, u32 ir)
+{
+	regs->lo = regs->regs[MIPSInst_RS(ir)];
+
+	MIPS_R2_STATS(hilo);
+
+	return 0;
+}
+
+/**
+ * mult_func - Emulate a MULT instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int mult_func(struct pt_regs *regs, u32 ir)
+{
+	s64 res;
+	s32 rt, rs;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = (s64)rt * (s64)rs;
+
+	rs = res;
+	regs->lo = (s64)rs;
+	rt = res >> 32;
+	res = (s64)rt;
+	regs->hi = res;
+
+	MIPS_R2_STATS(muls);
+
+	return 0;
+}
+
+/**
+ * multu_func - Emulate a MULTU instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int multu_func(struct pt_regs *regs, u32 ir)
+{
+	u64 res;
+	u32 rt, rs;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = (u64)rt * (u64)rs;
+	rt = res;
+	regs->lo = (s64)rt;
+	regs->hi = (s64)(res >> 32);
+
+	MIPS_R2_STATS(muls);
+
+	return 0;
+}
+
+/**
+ * div_func - Emulate a DIV instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int div_func(struct pt_regs *regs, u32 ir)
+{
+	s32 rt, rs;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+
+	regs->lo = (s64)(rs / rt);
+	regs->hi = (s64)(rs % rt);
+
+	MIPS_R2_STATS(divs);
+
+	return 0;
+}
+
+/**
+ * divu_func - Emulate a DIVU instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int divu_func(struct pt_regs *regs, u32 ir)
+{
+	u32 rt, rs;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+
+	regs->lo = (s64)(rs / rt);
+	regs->hi = (s64)(rs % rt);
+
+	MIPS_R2_STATS(divs);
+
+	return 0;
+}
+
+/**
+ * dmult_func - Emulate a DMULT instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 on success or SIGILL for 32-bit kernels.
+ */
+static int dmult_func(struct pt_regs *regs, u32 ir)
+{
+	s64 res;
+	s64 rt, rs;
+
+	if (config_enabled(CONFIG_32BIT))
+		return SIGILL;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = rt * rs;
+
+	regs->lo = res;
+	__asm__ __volatile__(
+		"dmuh %0, %1, %2\t\n"
+		: "=r"(res)
+		: "r"(rt), "r"(rs));
+
+	regs->hi = res;
+
+	MIPS_R2_STATS(muls);
+
+	return 0;
+}
+
+/**
+ * dmultu_func - Emulate a DMULTU instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 on success or SIGILL for 32-bit kernels.
+ */
+static int dmultu_func(struct pt_regs *regs, u32 ir)
+{
+	u64 res;
+	u64 rt, rs;
+
+	if (config_enabled(CONFIG_32BIT))
+		return SIGILL;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = rt * rs;
+
+	regs->lo = res;
+	__asm__ __volatile__(
+		"dmuhu %0, %1, %2\t\n"
+		: "=r"(res)
+		: "r"(rt), "r"(rs));
+
+	regs->hi = res;
+
+	MIPS_R2_STATS(muls);
+
+	return 0;
+}
+
+/**
+ * ddiv_func - Emulate a DDIV instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 on success or SIGILL for 32-bit kernels.
+ */
+static int ddiv_func(struct pt_regs *regs, u32 ir)
+{
+	s64 rt, rs;
+
+	if (config_enabled(CONFIG_32BIT))
+		return SIGILL;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+
+	regs->lo = rs / rt;
+	regs->hi = rs % rt;
+
+	MIPS_R2_STATS(divs);
+
+	return 0;
+}
+
+/**
+ * ddivu_func - Emulate a DDIVU instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 on success or SIGILL for 32-bit kernels.
+ */
+static int ddivu_func(struct pt_regs *regs, u32 ir)
+{
+	u64 rt, rs;
+
+	if (config_enabled(CONFIG_32BIT))
+		return SIGILL;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+
+	regs->lo = rs / rt;
+	regs->hi = rs % rt;
+
+	MIPS_R2_STATS(divs);
+
+	return 0;
+}
+
+/* R6 removed instructions for the SPECIAL opcode */
+static struct r2_decoder_table spec_op_table[] = {
+	{ 0xfc1ff83f, 0x00000008, jr_func },
+	{ 0xfc00ffff, 0x00000018, mult_func },
+	{ 0xfc00ffff, 0x00000019, multu_func },
+	{ 0xfc00ffff, 0x0000001c, dmult_func },
+	{ 0xfc00ffff, 0x0000001d, dmultu_func },
+	{ 0xffff07ff, 0x00000010, mfhi_func },
+	{ 0xfc1fffff, 0x00000011, mthi_func },
+	{ 0xffff07ff, 0x00000012, mflo_func },
+	{ 0xfc1fffff, 0x00000013, mtlo_func },
+	{ 0xfc0307ff, 0x00000001, movf_func },
+	{ 0xfc0307ff, 0x00010001, movt_func },
+	{ 0xfc0007ff, 0x0000000a, movz_func },
+	{ 0xfc0007ff, 0x0000000b, movn_func },
+	{ 0xfc00ffff, 0x0000001a, div_func },
+	{ 0xfc00ffff, 0x0000001b, divu_func },
+	{ 0xfc00ffff, 0x0000001e, ddiv_func },
+	{ 0xfc00ffff, 0x0000001f, ddivu_func },
+	{}
+};
+
+/**
+ * madd_func - Emulate a MADD instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int madd_func(struct pt_regs *regs, u32 ir)
+{
+	s64 res;
+	s32 rt, rs;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = (s64)rt * (s64)rs;
+	rt = regs->hi;
+	rs = regs->lo;
+	res += ((((s64)rt) << 32) | (u32)rs);
+
+	rt = res;
+	regs->lo = (s64)rt;
+	rs = res >> 32;
+	regs->hi = (s64)rs;
+
+	MIPS_R2_STATS(dsps);
+
+	return 0;
+}
+
+/**
+ * maddu_func - Emulate a MADDU instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int maddu_func(struct pt_regs *regs, u32 ir)
+{
+	u64 res;
+	u32 rt, rs;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = (u64)rt * (u64)rs;
+	rt = regs->hi;
+	rs = regs->lo;
+	res += ((((s64)rt) << 32) | (u32)rs);
+
+	rt = res;
+	regs->lo = (s64)rt;
+	rs = res >> 32;
+	regs->hi = (s64)rs;
+
+	MIPS_R2_STATS(dsps);
+
+	return 0;
+}
+
+/**
+ * msub_func - Emulate a MSUB instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int msub_func(struct pt_regs *regs, u32 ir)
+{
+	s64 res;
+	s32 rt, rs;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = (s64)rt * (s64)rs;
+	rt = regs->hi;
+	rs = regs->lo;
+	res = ((((s64)rt) << 32) | (u32)rs) - res;
+
+	rt = res;
+	regs->lo = (s64)rt;
+	rs = res >> 32;
+	regs->hi = (s64)rs;
+
+	MIPS_R2_STATS(dsps);
+
+	return 0;
+}
+
+/**
+ * msubu_func - Emulate a MSUBU instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int msubu_func(struct pt_regs *regs, u32 ir)
+{
+	u64 res;
+	u32 rt, rs;
+
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = (u64)rt * (u64)rs;
+	rt = regs->hi;
+	rs = regs->lo;
+	res = ((((s64)rt) << 32) | (u32)rs) - res;
+
+	rt = res;
+	regs->lo = (s64)rt;
+	rs = res >> 32;
+	regs->hi = (s64)rs;
+
+	MIPS_R2_STATS(dsps);
+
+	return 0;
+}
+
+/**
+ * mul_func - Emulate a MUL instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int mul_func(struct pt_regs *regs, u32 ir)
+{
+	s64 res;
+	s32 rt, rs;
+
+	if (!MIPSInst_RD(ir))
+		return 0;
+	rt = regs->regs[MIPSInst_RT(ir)];
+	rs = regs->regs[MIPSInst_RS(ir)];
+	res = (s64)rt * (s64)rs;
+
+	rs = res;
+	regs->regs[MIPSInst_RD(ir)] = (s64)rs;
+
+	MIPS_R2_STATS(muls);
+
+	return 0;
+}
+
+/**
+ * clz_func - Emulate a CLZ instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int clz_func(struct pt_regs *regs, u32 ir)
+{
+	u32 res;
+	u32 rs;
+
+	if (!MIPSInst_RD(ir))
+		return 0;
+
+	rs = regs->regs[MIPSInst_RS(ir)];
+	__asm__ __volatile__("clz %0, %1" : "=r"(res) : "r"(rs));
+	regs->regs[MIPSInst_RD(ir)] = res;
+
+	MIPS_R2_STATS(bops);
+
+	return 0;
+}
+
+/**
+ * clo_func - Emulate a CLO instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+
+static int clo_func(struct pt_regs *regs, u32 ir)
+{
+	u32 res;
+	u32 rs;
+
+	if (!MIPSInst_RD(ir))
+		return 0;
+
+	rs = regs->regs[MIPSInst_RS(ir)];
+	__asm__ __volatile__("clo %0, %1" : "=r"(res) : "r"(rs));
+	regs->regs[MIPSInst_RD(ir)] = res;
+
+	MIPS_R2_STATS(bops);
+
+	return 0;
+}
+
+/**
+ * dclz_func - Emulate a DCLZ instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int dclz_func(struct pt_regs *regs, u32 ir)
+{
+	u64 res;
+	u64 rs;
+
+	if (config_enabled(CONFIG_32BIT))
+		return SIGILL;
+
+	if (!MIPSInst_RD(ir))
+		return 0;
+
+	rs = regs->regs[MIPSInst_RS(ir)];
+	__asm__ __volatile__("dclz %0, %1" : "=r"(res) : "r"(rs));
+	regs->regs[MIPSInst_RD(ir)] = res;
+
+	MIPS_R2_STATS(bops);
+
+	return 0;
+}
+
+/**
+ * dclo_func - Emulate a DCLO instruction
+ * @regs: Process register set
+ * @ir: Instruction
+ *
+ * Returns 0 since it always succeeds.
+ */
+static int dclo_func(struct pt_regs *regs, u32 ir)
+{
+	u64 res;
+	u64 rs;
+
+	if (config_enabled(CONFIG_32BIT))
+		return SIGILL;
+
+	if (!MIPSInst_RD(ir))
+		return 0;
+
+	rs = regs->regs[MIPSInst_RS(ir)];
+	__asm__ __volatile__("dclo %0, %1" : "=r"(res) : "r"(rs));
+	regs->regs[MIPSInst_RD(ir)] = res;
+
+	MIPS_R2_STATS(bops);
+
+	return 0;
+}
+
+/* R6 removed instructions for the SPECIAL2 opcode */
+static struct r2_decoder_table spec2_op_table[] = {
+	{ 0xfc00ffff, 0x70000000, madd_func },
+	{ 0xfc00ffff, 0x70000001, maddu_func },
+	{ 0xfc0007ff, 0x70000002, mul_func },
+	{ 0xfc00ffff, 0x70000004, msub_func },
+	{ 0xfc00ffff, 0x70000005, msubu_func },
+	{ 0xfc0007ff, 0x70000020, clz_func },
+	{ 0xfc0007ff, 0x70000021, clo_func },
+	{ 0xfc0007ff, 0x70000024, dclz_func },
+	{ 0xfc0007ff, 0x70000025, dclo_func },
+	{ }
+};
+
+static inline int mipsr2_find_op_func(struct pt_regs *regs, u32 inst,
+				      struct r2_decoder_table *table)
+{
+	struct r2_decoder_table *p;
+	int err;
+
+	for (p = table; p->func; p++) {
+		if ((inst & p->mask) == p->code) {
+			err = (p->func)(regs, inst);
+			return err;
+		}
+	}
+	return SIGILL;
+}
+
+/**
+ * mipsr2_decoder: Decode and emulate a MIPS R2 instruction
+ * @regs: Process register set
+ * @inst: Instruction to decode and emulate
+ */
+int mipsr2_decoder(struct pt_regs *regs, u32 inst)
+{
+	int err = 0;
+	unsigned long vaddr;
+	u32 nir;
+	unsigned long cpc, epc, nepc, r31, res, rs, rt;
+
+	void __user *fault_addr = NULL;
+	int pass = 0;
+
+repeat:
+	r31 = regs->regs[31];
+	epc = regs->cp0_epc;
+	err = compute_return_epc(regs);
+	if (err < 0) {
+		BUG();
+		return SIGEMT;
+	}
+	pr_debug("Emulating the 0x%08x R2 instruction @ 0x%08lx (pass=%d))\n",
+		 inst, epc, pass);
+
+	switch (MIPSInst_OPCODE(inst)) {
+	case spec_op:
+		err = mipsr2_find_op_func(regs, inst, spec_op_table);
+		if (err < 0) {
+			/* FPU instruction under JR */
+			regs->cp0_cause |= CAUSEF_BD;
+			goto fpu_emul;
+		}
+		break;
+	case spec2_op:
+		err = mipsr2_find_op_func(regs, inst, spec2_op_table);
+		break;
+	case bcond_op:
+		rt = MIPSInst_RT(inst);
+		rs = MIPSInst_RS(inst);
+		switch (rt) {
+		case tgei_op:
+			if ((long)regs->regs[rs] >= MIPSInst_SIMM(inst))
+				do_trap_or_bp(regs, 0, "TGEI");
+
+			MIPS_R2_STATS(traps);
+
+			break;
+		case tgeiu_op:
+			if (regs->regs[rs] >= MIPSInst_UIMM(inst))
+				do_trap_or_bp(regs, 0, "TGEIU");
+
+			MIPS_R2_STATS(traps);
+
+			break;
+		case tlti_op:
+			if ((long)regs->regs[rs] < MIPSInst_SIMM(inst))
+				do_trap_or_bp(regs, 0, "TLTI");
+
+			MIPS_R2_STATS(traps);
+
+			break;
+		case tltiu_op:
+			if (regs->regs[rs] < MIPSInst_UIMM(inst))
+				do_trap_or_bp(regs, 0, "TLTIU");
+
+			MIPS_R2_STATS(traps);
+
+			break;
+		case teqi_op:
+			if (regs->regs[rs] == MIPSInst_SIMM(inst))
+				do_trap_or_bp(regs, 0, "TEQI");
+
+			MIPS_R2_STATS(traps);
+
+			break;
+		case tnei_op:
+			if (regs->regs[rs] != MIPSInst_SIMM(inst))
+				do_trap_or_bp(regs, 0, "TNEI");
+
+			MIPS_R2_STATS(traps);
+
+			break;
+		case bltzl_op:
+		case bgezl_op:
+		case bltzall_op:
+		case bgezall_op:
+			if (delay_slot(regs)) {
+				err = SIGILL;
+				break;
+			}
+			regs->regs[31] = r31;
+			regs->cp0_epc = epc;
+			err = __compute_return_epc(regs);
+			if (err < 0)
+				return SIGEMT;
+			if (err != BRANCH_LIKELY_TAKEN)
+				break;
+			cpc = regs->cp0_epc;
+			nepc = epc + 4;
+			err = __get_user(nir, (u32 __user *)nepc);
+			if (err) {
+				err = SIGSEGV;
+				break;
+			}
+			/*
+			 * This will probably be optimized away when
+			 * CONFIG_DEBUG_FS is not enabled
+			 */
+			switch (rt) {
+			case bltzl_op:
+				MIPS_R2BR_STATS(bltzl);
+				break;
+			case bgezl_op:
+				MIPS_R2BR_STATS(bgezl);
+				break;
+			case bltzall_op:
+				MIPS_R2BR_STATS(bltzall);
+				break;
+			case bgezall_op:
+				MIPS_R2BR_STATS(bgezall);
+				break;
+			}
+
+			switch (MIPSInst_OPCODE(nir)) {
+			case cop1_op:
+			case cop1x_op:
+			case lwc1_op:
+			case swc1_op:
+				regs->cp0_cause |= CAUSEF_BD;
+				goto fpu_emul;
+			}
+			if (nir) {
+				err = mipsr6_emul(regs, nir);
+				if (err > 0) {
+					err = mips_dsemul(regs, nir, cpc);
+					if (err == SIGILL)
+						err = SIGEMT;
+					MIPS_R2_STATS(dsemul);
+				}
+			}
+			break;
+		case bltzal_op:
+		case bgezal_op:
+			if (delay_slot(regs)) {
+				err = SIGILL;
+				break;
+			}
+			regs->regs[31] = r31;
+			regs->cp0_epc = epc;
+			err = __compute_return_epc(regs);
+			if (err < 0)
+				return SIGEMT;
+			cpc = regs->cp0_epc;
+			nepc = epc + 4;
+			err = __get_user(nir, (u32 __user *)nepc);
+			if (err) {
+				err = SIGSEGV;
+				break;
+			}
+			/*
+			 * This will probably be optimized away when
+			 * CONFIG_DEBUG_FS is not enabled
+			 */
+			switch (rt) {
+			case bltzal_op:
+				MIPS_R2BR_STATS(bltzal);
+				break;
+			case bgezal_op:
+				MIPS_R2BR_STATS(bgezal);
+				break;
+			}
+
+			switch (MIPSInst_OPCODE(nir)) {
+			case cop1_op:
+			case cop1x_op:
+			case lwc1_op:
+			case swc1_op:
+				regs->cp0_cause |= CAUSEF_BD;
+				goto fpu_emul;
+			}
+			if (nir) {
+				err = mipsr6_emul(regs, nir);
+				if (err > 0) {
+					err = mips_dsemul(regs, nir, cpc);
+					if (err == SIGILL)
+						err = SIGEMT;
+					MIPS_R2_STATS(dsemul);
+				}
+			}
+			break;
+		default:
+			regs->regs[31] = r31;
+			regs->cp0_epc = epc;
+			err = SIGILL;
+			break;
+		}
+		break;
+
+	case beql_op:
+	case bnel_op:
+	case blezl_op:
+	case bgtzl_op:
+		if (delay_slot(regs)) {
+			err = SIGILL;
+			break;
+		}
+		regs->regs[31] = r31;
+		regs->cp0_epc = epc;
+		err = __compute_return_epc(regs);
+		if (err < 0)
+			return SIGEMT;
+		if (err != BRANCH_LIKELY_TAKEN)
+			break;
+		cpc = regs->cp0_epc;
+		nepc = epc + 4;
+		err = __get_user(nir, (u32 __user *)nepc);
+		if (err) {
+			err = SIGSEGV;
+			break;
+		}
+		/*
+		 * This will probably be optimized away when
+		 * CONFIG_DEBUG_FS is not enabled
+		 */
+		switch (MIPSInst_OPCODE(inst)) {
+		case beql_op:
+			MIPS_R2BR_STATS(beql);
+			break;
+		case bnel_op:
+			MIPS_R2BR_STATS(bnel);
+			break;
+		case blezl_op:
+			MIPS_R2BR_STATS(blezl);
+			break;
+		case bgtzl_op:
+			MIPS_R2BR_STATS(bgtzl);
+			break;
+		}
+
+		switch (MIPSInst_OPCODE(nir)) {
+		case cop1_op:
+		case cop1x_op:
+		case lwc1_op:
+		case swc1_op:
+			regs->cp0_cause |= CAUSEF_BD;
+			goto fpu_emul;
+		}
+		if (nir) {
+			err = mipsr6_emul(regs, nir);
+			if (err > 0) {
+				err = mips_dsemul(regs, nir, cpc);
+				if (err == SIGILL)
+					err = SIGEMT;
+				MIPS_R2_STATS(dsemul);
+			}
+		}
+		break;
+	case lwc1_op:
+	case swc1_op:
+	case cop1_op:
+	case cop1x_op:
+fpu_emul:
+		regs->regs[31] = r31;
+		regs->cp0_epc = epc;
+		if (!used_math()) {     /* First time FPU user.  */
+			err = init_fpu();
+			set_used_math();
+		}
+		lose_fpu(1);    /* Save FPU state for the emulator. */
+
+		err = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
+					       &fault_addr);
+
+		/*
+		 * this is a tricky issue - lose_fpu() uses LL/SC atomics
+		 * if FPU is owned and effectively cancels user level LL/SC.
+		 * So, it could be logical to don't restore FPU ownership here.
+		 * But the sequence of multiple FPU instructions is much much
+		 * more often than LL-FPU-SC and I prefer loop here until
+		 * next scheduler cycle cancels FPU ownership
+		 */
+		own_fpu(1);	/* Restore FPU state. */
+
+		if (err)
+			current->thread.cp0_baduaddr = (unsigned long)fault_addr;
+
+		MIPS_R2_STATS(fpus);
+
+		break;
+
+	case lwl_op:
+		rt = regs->regs[MIPSInst_RT(inst)];
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (!access_ok(VERIFY_READ, vaddr, 4)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGSEGV;
+			break;
+		}
+		__asm__ __volatile__(
+			"	.set	push\n"
+			"	.set	reorder\n"
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+			"1:"	LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 24, 8\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+			"2:"	LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 16, 8\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+			"3:"	LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 8, 8\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+			"4:"	LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 0, 8\n"
+#else /* !CONFIG_CPU_LITTLE_ENDIAN */
+			"1:"	LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 24, 8\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+			"2:"	LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 16, 8\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+			"3:"	LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 8, 8\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+			"4:"	LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 0, 8\n"
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+			"9:	sll	%0, %0, 0\n"
+			"10:\n"
+			"	.insn\n"
+			"	.section	.fixup,\"ax\"\n"
+			"8:	li	%3,%4\n"
+			"	j	10b\n"
+			"	.previous\n"
+			"	.section	__ex_table,\"a\"\n"
+			"	.word	1b,8b\n"
+			"	.word	2b,8b\n"
+			"	.word	3b,8b\n"
+			"	.word	4b,8b\n"
+			"	.previous\n"
+			"	.set	pop\n"
+			: "+&r"(rt), "=&r"(rs),
+			  "+&r"(vaddr), "+&r"(err)
+			: "i"(SIGSEGV));
+
+		if (MIPSInst_RT(inst) && !err)
+			regs->regs[MIPSInst_RT(inst)] = rt;
+
+		MIPS_R2_STATS(loads);
+
+		break;
+
+	case lwr_op:
+		rt = regs->regs[MIPSInst_RT(inst)];
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (!access_ok(VERIFY_READ, vaddr, 4)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGSEGV;
+			break;
+		}
+		__asm__ __volatile__(
+			"       .set	push\n"
+			"       .set	reorder\n"
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+			"1:"    LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 0, 8\n"
+				ADDIU	"%2, %2, 1\n"
+			"       andi	%1, %2, 0x3\n"
+			"       beq	$0, %1, 9f\n"
+			"2:"    LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 8, 8\n"
+				ADDIU	"%2, %2, 1\n"
+			"       andi	%1, %2, 0x3\n"
+			"       beq	$0, %1, 9f\n"
+			"3:"    LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 16, 8\n"
+				ADDIU	"%2, %2, 1\n"
+			"       andi	%1, %2, 0x3\n"
+			"       beq	$0, %1, 9f\n"
+			"4:"    LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 24, 8\n"
+			"       sll	%0, %0, 0\n"
+#else /* !CONFIG_CPU_LITTLE_ENDIAN */
+			"1:"    LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 0, 8\n"
+			"       andi	%1, %2, 0x3\n"
+			"       beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+			"2:"    LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 8, 8\n"
+			"       andi	%1, %2, 0x3\n"
+			"       beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+			"3:"    LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 16, 8\n"
+			"       andi	%1, %2, 0x3\n"
+			"       beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+			"4:"    LB	"%1, 0(%2)\n"
+				INS	"%0, %1, 24, 8\n"
+			"       sll	%0, %0, 0\n"
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+			"9:\n"
+			"10:\n"
+			"	.insn\n"
+			"	.section	.fixup,\"ax\"\n"
+			"8:	li	%3,%4\n"
+			"	j	10b\n"
+			"       .previous\n"
+			"	.section	__ex_table,\"a\"\n"
+			"	.word	1b,8b\n"
+			"	.word	2b,8b\n"
+			"	.word	3b,8b\n"
+			"	.word	4b,8b\n"
+			"	.previous\n"
+			"	.set	pop\n"
+			: "+&r"(rt), "=&r"(rs),
+			  "+&r"(vaddr), "+&r"(err)
+			: "i"(SIGSEGV));
+		if (MIPSInst_RT(inst) && !err)
+			regs->regs[MIPSInst_RT(inst)] = rt;
+
+		MIPS_R2_STATS(loads);
+
+		break;
+
+	case swl_op:
+		rt = regs->regs[MIPSInst_RT(inst)];
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (!access_ok(VERIFY_WRITE, vaddr, 4)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGSEGV;
+			break;
+		}
+		__asm__ __volatile__(
+			"	.set	push\n"
+			"	.set	reorder\n"
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+				EXT	"%1, %0, 24, 8\n"
+			"1:"	SB	"%1, 0(%2)\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+				EXT	"%1, %0, 16, 8\n"
+			"2:"	SB	"%1, 0(%2)\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+				EXT	"%1, %0, 8, 8\n"
+			"3:"	SB	"%1, 0(%2)\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+				EXT	"%1, %0, 0, 8\n"
+			"4:"	SB	"%1, 0(%2)\n"
+#else /* !CONFIG_CPU_LITTLE_ENDIAN */
+				EXT	"%1, %0, 24, 8\n"
+			"1:"	SB	"%1, 0(%2)\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				EXT	"%1, %0, 16, 8\n"
+			"2:"	SB	"%1, 0(%2)\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				EXT	"%1, %0, 8, 8\n"
+			"3:"	SB	"%1, 0(%2)\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				EXT	"%1, %0, 0, 8\n"
+			"4:"	SB	"%1, 0(%2)\n"
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+			"9:\n"
+			"	.insn\n"
+			"       .section        .fixup,\"ax\"\n"
+			"8:	li	%3,%4\n"
+			"	j	9b\n"
+			"	.previous\n"
+			"	.section        __ex_table,\"a\"\n"
+			"	.word	1b,8b\n"
+			"	.word	2b,8b\n"
+			"	.word	3b,8b\n"
+			"	.word	4b,8b\n"
+			"	.previous\n"
+			"	.set	pop\n"
+			: "+&r"(rt), "=&r"(rs),
+			  "+&r"(vaddr), "+&r"(err)
+			: "i"(SIGSEGV)
+			: "memory");
+
+		MIPS_R2_STATS(stores);
+
+		break;
+
+	case swr_op:
+		rt = regs->regs[MIPSInst_RT(inst)];
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (!access_ok(VERIFY_WRITE, vaddr, 4)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGSEGV;
+			break;
+		}
+		__asm__ __volatile__(
+			"	.set	push\n"
+			"	.set	reorder\n"
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+				EXT	"%1, %0, 0, 8\n"
+			"1:"	SB	"%1, 0(%2)\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				EXT	"%1, %0, 8, 8\n"
+			"2:"	SB	"%1, 0(%2)\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				EXT	"%1, %0, 16, 8\n"
+			"3:"	SB	"%1, 0(%2)\n"
+				ADDIU	"%2, %2, 1\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				EXT	"%1, %0, 24, 8\n"
+			"4:"	SB	"%1, 0(%2)\n"
+#else /* !CONFIG_CPU_LITTLE_ENDIAN */
+				EXT	"%1, %0, 0, 8\n"
+			"1:"	SB	"%1, 0(%2)\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+				EXT	"%1, %0, 8, 8\n"
+			"2:"	SB	"%1, 0(%2)\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+				EXT	"%1, %0, 16, 8\n"
+			"3:"	SB	"%1, 0(%2)\n"
+			"	andi	%1, %2, 0x3\n"
+			"	beq	$0, %1, 9f\n"
+				ADDIU	"%2, %2, -1\n"
+				EXT	"%1, %0, 24, 8\n"
+			"4:"	SB	"%1, 0(%2)\n"
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+			"9:\n"
+			"	.insn\n"
+			"	.section        .fixup,\"ax\"\n"
+			"8:	li	%3,%4\n"
+			"	j	9b\n"
+			"	.previous\n"
+			"	.section        __ex_table,\"a\"\n"
+			"	.word	1b,8b\n"
+			"	.word	2b,8b\n"
+			"	.word	3b,8b\n"
+			"	.word	4b,8b\n"
+			"	.previous\n"
+			"	.set	pop\n"
+			: "+&r"(rt), "=&r"(rs),
+			  "+&r"(vaddr), "+&r"(err)
+			: "i"(SIGSEGV)
+			: "memory");
+
+		MIPS_R2_STATS(stores);
+
+		break;
+
+	case ldl_op:
+		if (config_enabled(CONFIG_32BIT)) {
+		    err = SIGILL;
+		    break;
+		}
+
+		rt = regs->regs[MIPSInst_RT(inst)];
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (!access_ok(VERIFY_READ, vaddr, 8)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGSEGV;
+			break;
+		}
+		__asm__ __volatile__(
+			"	.set    push\n"
+			"	.set    reorder\n"
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+			"1:	lb	%1, 0(%2)\n"
+			"	dinsu	%0, %1, 56, 8\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"2:	lb	%1, 0(%2)\n"
+			"	dinsu	%0, %1, 48, 8\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"3:	lb	%1, 0(%2)\n"
+			"	dinsu	%0, %1, 40, 8\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"4:	lb	%1, 0(%2)\n"
+			"	dinsu	%0, %1, 32, 8\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"5:	lb	%1, 0(%2)\n"
+			"	dins	%0, %1, 24, 8\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"6:	lb	%1, 0(%2)\n"
+			"	dins	%0, %1, 16, 8\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"7:	lb	%1, 0(%2)\n"
+			"	dins	%0, %1, 8, 8\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"0:	lb	%1, 0(%2)\n"
+			"	dins	%0, %1, 0, 8\n"
+#else /* !CONFIG_CPU_LITTLE_ENDIAN */
+			"1:	lb	%1, 0(%2)\n"
+			"	dinsu	%0, %1, 56, 8\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"2:	lb	%1, 0(%2)\n"
+			"	dinsu	%0, %1, 48, 8\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"3:	lb	%1, 0(%2)\n"
+			"	dinsu	%0, %1, 40, 8\n"
+			"	daddiu  %2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"4:	lb	%1, 0(%2)\n"
+			"	dinsu	%0, %1, 32, 8\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"5:	lb	%1, 0(%2)\n"
+			"	dins	%0, %1, 24, 8\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"6:	lb	%1, 0(%2)\n"
+			"	dins	%0, %1, 16, 8\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"7:	lb	%1, 0(%2)\n"
+			"	dins	%0, %1, 8, 8\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"0:	lb	%1, 0(%2)\n"
+			"	dins	%0, %1, 0, 8\n"
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+			"9:\n"
+			"	.insn\n"
+			"	.section        .fixup,\"ax\"\n"
+			"8:	li	%3,%4\n"
+			"	j	9b\n"
+			"	.previous\n"
+			"	.section        __ex_table,\"a\"\n"
+			"	.word	1b,8b\n"
+			"	.word	2b,8b\n"
+			"	.word	3b,8b\n"
+			"	.word	4b,8b\n"
+			"	.word	5b,8b\n"
+			"	.word	6b,8b\n"
+			"	.word	7b,8b\n"
+			"	.word	0b,8b\n"
+			"	.previous\n"
+			"	.set	pop\n"
+			: "+&r"(rt), "=&r"(rs),
+			  "+&r"(vaddr), "+&r"(err)
+			: "i"(SIGSEGV));
+		if (MIPSInst_RT(inst) && !err)
+			regs->regs[MIPSInst_RT(inst)] = rt;
+
+		MIPS_R2_STATS(loads);
+		break;
+
+	case ldr_op:
+		if (config_enabled(CONFIG_32BIT)) {
+		    err = SIGILL;
+		    break;
+		}
+
+		rt = regs->regs[MIPSInst_RT(inst)];
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (!access_ok(VERIFY_READ, vaddr, 8)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGSEGV;
+			break;
+		}
+		__asm__ __volatile__(
+			"	.set    push\n"
+			"	.set    reorder\n"
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+			"1:	lb      %1, 0(%2)\n"
+			"	dins   %0, %1, 0, 8\n"
+			"	daddiu  %2, %2, 1\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"2:	lb      %1, 0(%2)\n"
+			"	dins   %0, %1, 8, 8\n"
+			"	daddiu  %2, %2, 1\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"3:	lb      %1, 0(%2)\n"
+			"	dins   %0, %1, 16, 8\n"
+			"	daddiu  %2, %2, 1\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"4:	lb      %1, 0(%2)\n"
+			"	dins   %0, %1, 24, 8\n"
+			"	daddiu  %2, %2, 1\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"5:	lb      %1, 0(%2)\n"
+			"	dinsu    %0, %1, 32, 8\n"
+			"	daddiu  %2, %2, 1\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"6:	lb      %1, 0(%2)\n"
+			"	dinsu    %0, %1, 40, 8\n"
+			"	daddiu  %2, %2, 1\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"7:	lb      %1, 0(%2)\n"
+			"	dinsu    %0, %1, 48, 8\n"
+			"	daddiu  %2, %2, 1\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"0:	lb      %1, 0(%2)\n"
+			"	dinsu    %0, %1, 56, 8\n"
+#else /* !CONFIG_CPU_LITTLE_ENDIAN */
+			"1:	lb      %1, 0(%2)\n"
+			"	dins   %0, %1, 0, 8\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"	daddiu  %2, %2, -1\n"
+			"2:	lb      %1, 0(%2)\n"
+			"	dins   %0, %1, 8, 8\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"	daddiu  %2, %2, -1\n"
+			"3:	lb      %1, 0(%2)\n"
+			"	dins   %0, %1, 16, 8\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"	daddiu  %2, %2, -1\n"
+			"4:	lb      %1, 0(%2)\n"
+			"	dins   %0, %1, 24, 8\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"	daddiu  %2, %2, -1\n"
+			"5:	lb      %1, 0(%2)\n"
+			"	dinsu    %0, %1, 32, 8\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"	daddiu  %2, %2, -1\n"
+			"6:	lb      %1, 0(%2)\n"
+			"	dinsu    %0, %1, 40, 8\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"	daddiu  %2, %2, -1\n"
+			"7:	lb      %1, 0(%2)\n"
+			"	dinsu    %0, %1, 48, 8\n"
+			"	andi    %1, %2, 0x7\n"
+			"	beq     $0, %1, 9f\n"
+			"	daddiu  %2, %2, -1\n"
+			"0:	lb      %1, 0(%2)\n"
+			"	dinsu    %0, %1, 56, 8\n"
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+			"9:\n"
+			"	.insn\n"
+			"	.section        .fixup,\"ax\"\n"
+			"8:	li     %3,%4\n"
+			"	j      9b\n"
+			"	.previous\n"
+			"	.section        __ex_table,\"a\"\n"
+			"	.word  1b,8b\n"
+			"	.word  2b,8b\n"
+			"	.word  3b,8b\n"
+			"	.word  4b,8b\n"
+			"	.word  5b,8b\n"
+			"	.word  6b,8b\n"
+			"	.word  7b,8b\n"
+			"	.word  0b,8b\n"
+			"	.previous\n"
+			"	.set    pop\n"
+			: "+&r"(rt), "=&r"(rs),
+			  "+&r"(vaddr), "+&r"(err)
+			: "i"(SIGSEGV));
+		if (MIPSInst_RT(inst) && !err)
+			regs->regs[MIPSInst_RT(inst)] = rt;
+
+		MIPS_R2_STATS(loads);
+		break;
+
+	case sdl_op:
+		if (config_enabled(CONFIG_32BIT)) {
+		    err = SIGILL;
+		    break;
+		}
+
+		rt = regs->regs[MIPSInst_RT(inst)];
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (!access_ok(VERIFY_WRITE, vaddr, 8)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGSEGV;
+			break;
+		}
+		__asm__ __volatile__(
+			"	.set	push\n"
+			"	.set	reorder\n"
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+			"	dextu	%1, %0, 56, 8\n"
+			"1:	sb	%1, 0(%2)\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"	dextu	%1, %0, 48, 8\n"
+			"2:	sb	%1, 0(%2)\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"	dextu	%1, %0, 40, 8\n"
+			"3:	sb	%1, 0(%2)\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"	dextu	%1, %0, 32, 8\n"
+			"4:	sb	%1, 0(%2)\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"	dext	%1, %0, 24, 8\n"
+			"5:	sb	%1, 0(%2)\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"	dext	%1, %0, 16, 8\n"
+			"6:	sb	%1, 0(%2)\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"	dext	%1, %0, 8, 8\n"
+			"7:	sb	%1, 0(%2)\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	daddiu	%2, %2, -1\n"
+			"	dext	%1, %0, 0, 8\n"
+			"0:	sb	%1, 0(%2)\n"
+#else /* !CONFIG_CPU_LITTLE_ENDIAN */
+			"	dextu	%1, %0, 56, 8\n"
+			"1:	sb	%1, 0(%2)\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	dextu	%1, %0, 48, 8\n"
+			"2:	sb	%1, 0(%2)\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	dextu	%1, %0, 40, 8\n"
+			"3:	sb	%1, 0(%2)\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	dextu	%1, %0, 32, 8\n"
+			"4:	sb	%1, 0(%2)\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	dext	%1, %0, 24, 8\n"
+			"5:	sb	%1, 0(%2)\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	dext	%1, %0, 16, 8\n"
+			"6:	sb	%1, 0(%2)\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	dext	%1, %0, 8, 8\n"
+			"7:	sb	%1, 0(%2)\n"
+			"	daddiu	%2, %2, 1\n"
+			"	andi	%1, %2, 0x7\n"
+			"	beq	$0, %1, 9f\n"
+			"	dext	%1, %0, 0, 8\n"
+			"0:	sb	%1, 0(%2)\n"
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+			"9:\n"
+			"	.insn\n"
+			"	.section        .fixup,\"ax\"\n"
+			"8:	li	%3,%4\n"
+			"	j	9b\n"
+			"	.previous\n"
+			"	.section        __ex_table,\"a\"\n"
+			"	.word	1b,8b\n"
+			"	.word	2b,8b\n"
+			"	.word	3b,8b\n"
+			"	.word	4b,8b\n"
+			"	.word	5b,8b\n"
+			"	.word	6b,8b\n"
+			"	.word	7b,8b\n"
+			"	.word	0b,8b\n"
+			"	.previous\n"
+			"	.set	pop\n"
+			: "+&r"(rt), "=&r"(rs),
+			  "+&r"(vaddr), "+&r"(err)
+			: "i"(SIGSEGV)
+			: "memory");
+
+		MIPS_R2_STATS(stores);
+		break;
+
+	case sdr_op:
+		if (config_enabled(CONFIG_32BIT)) {
+		    err = SIGILL;
+		    break;
+		}
+
+		rt = regs->regs[MIPSInst_RT(inst)];
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (!access_ok(VERIFY_WRITE, vaddr, 8)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGSEGV;
+			break;
+		}
+		__asm__ __volatile__(
+			"       .set	push\n"
+			"       .set	reorder\n"
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+			"       dext	%1, %0, 0, 8\n"
+			"1:     sb	%1, 0(%2)\n"
+			"       daddiu	%2, %2, 1\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       dext	%1, %0, 8, 8\n"
+			"2:     sb	%1, 0(%2)\n"
+			"       daddiu	%2, %2, 1\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       dext	%1, %0, 16, 8\n"
+			"3:     sb	%1, 0(%2)\n"
+			"       daddiu	%2, %2, 1\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       dext	%1, %0, 24, 8\n"
+			"4:     sb	%1, 0(%2)\n"
+			"       daddiu	%2, %2, 1\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       dextu	%1, %0, 32, 8\n"
+			"5:     sb	%1, 0(%2)\n"
+			"       daddiu	%2, %2, 1\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       dextu	%1, %0, 40, 8\n"
+			"6:     sb	%1, 0(%2)\n"
+			"       daddiu	%2, %2, 1\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       dextu	%1, %0, 48, 8\n"
+			"7:     sb	%1, 0(%2)\n"
+			"       daddiu	%2, %2, 1\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       dextu	%1, %0, 56, 8\n"
+			"0:     sb	%1, 0(%2)\n"
+#else /* !CONFIG_CPU_LITTLE_ENDIAN */
+			"       dext	%1, %0, 0, 8\n"
+			"1:     sb	%1, 0(%2)\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       daddiu	%2, %2, -1\n"
+			"       dext	%1, %0, 8, 8\n"
+			"2:     sb	%1, 0(%2)\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       daddiu	%2, %2, -1\n"
+			"       dext	%1, %0, 16, 8\n"
+			"3:     sb	%1, 0(%2)\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       daddiu	%2, %2, -1\n"
+			"       dext	%1, %0, 24, 8\n"
+			"4:     sb	%1, 0(%2)\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       daddiu	%2, %2, -1\n"
+			"       dextu	%1, %0, 32, 8\n"
+			"5:     sb	%1, 0(%2)\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       daddiu	%2, %2, -1\n"
+			"       dextu	%1, %0, 40, 8\n"
+			"6:     sb	%1, 0(%2)\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       daddiu	%2, %2, -1\n"
+			"       dextu	%1, %0, 48, 8\n"
+			"7:     sb	%1, 0(%2)\n"
+			"       andi	%1, %2, 0x7\n"
+			"       beq	$0, %1, 9f\n"
+			"       daddiu	%2, %2, -1\n"
+			"       dextu	%1, %0, 56, 8\n"
+			"0:     sb	%1, 0(%2)\n"
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
+			"9:\n"
+			"       .insn\n"
+			"       .section        .fixup,\"ax\"\n"
+			"8:     li	%3,%4\n"
+			"       j	9b\n"
+			"       .previous\n"
+			"       .section        __ex_table,\"a\"\n"
+			"       .word	1b,8b\n"
+			"       .word	2b,8b\n"
+			"       .word	3b,8b\n"
+			"       .word	4b,8b\n"
+			"       .word	5b,8b\n"
+			"       .word	6b,8b\n"
+			"       .word	7b,8b\n"
+			"       .word	0b,8b\n"
+			"       .previous\n"
+			"       .set	pop\n"
+			: "+&r"(rt), "=&r"(rs),
+			  "+&r"(vaddr), "+&r"(err)
+			: "i"(SIGSEGV)
+			: "memory");
+
+		MIPS_R2_STATS(stores);
+
+		break;
+	case ll_op:
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (vaddr & 0x3) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGBUS;
+			break;
+		}
+		if (!access_ok(VERIFY_READ, vaddr, 4)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGBUS;
+			break;
+		}
+
+		if (!cpu_has_rw_llb) {
+			/*
+			 * An LL/SC block can't be safely emulated without
+			 * a Config5/LLB availability. So it's probably time to
+			 * kill our process before things get any worse. This is
+			 * because Config5/LLB allows us to use ERETNC so that
+			 * the LLAddr/LLB bit is not cleared when we return from
+			 * an exception. MIPS R2 LL/SC instructions trap with an
+			 * RI exception so once we emulate them here, we return
+			 * back to userland with ERETNC. That preserves the
+			 * LLAddr/LLB so the subsequent SC instruction will
+			 * succeed preserving the atomic semantics of the LL/SC
+			 * block. Without that, there is no safe way to emulate
+			 * an LL/SC block in MIPSR2 userland.
+			 */
+			pr_err("Can't emulate MIPSR2 LL/SC without Config5/LLB\n");
+			err = SIGKILL;
+			break;
+		}
+
+		__asm__ __volatile__(
+			"1:\n"
+			"ll	%0, 0(%2)\n"
+			"2:\n"
+			".insn\n"
+			".section        .fixup,\"ax\"\n"
+			"3:\n"
+			"li	%1, %3\n"
+			"j	2b\n"
+			".previous\n"
+			".section        __ex_table,\"a\"\n"
+			".word  1b, 3b\n"
+			".previous\n"
+			: "=&r"(res), "+&r"(err)
+			: "r"(vaddr), "i"(SIGSEGV)
+			: "memory");
+
+		if (MIPSInst_RT(inst) && !err)
+			regs->regs[MIPSInst_RT(inst)] = res;
+		MIPS_R2_STATS(llsc);
+
+		break;
+
+	case sc_op:
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (vaddr & 0x3) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGBUS;
+			break;
+		}
+		if (!access_ok(VERIFY_WRITE, vaddr, 4)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGBUS;
+			break;
+		}
+
+		if (!cpu_has_rw_llb) {
+			/*
+			 * An LL/SC block can't be safely emulated without
+			 * a Config5/LLB availability. So it's probably time to
+			 * kill our process before things get any worse. This is
+			 * because Config5/LLB allows us to use ERETNC so that
+			 * the LLAddr/LLB bit is not cleared when we return from
+			 * an exception. MIPS R2 LL/SC instructions trap with an
+			 * RI exception so once we emulate them here, we return
+			 * back to userland with ERETNC. That preserves the
+			 * LLAddr/LLB so the subsequent SC instruction will
+			 * succeed preserving the atomic semantics of the LL/SC
+			 * block. Without that, there is no safe way to emulate
+			 * an LL/SC block in MIPSR2 userland.
+			 */
+			pr_err("Can't emulate MIPSR2 LL/SC without Config5/LLB\n");
+			err = SIGKILL;
+			break;
+		}
+
+		res = regs->regs[MIPSInst_RT(inst)];
+
+		__asm__ __volatile__(
+			"1:\n"
+			"sc	%0, 0(%2)\n"
+			"2:\n"
+			".insn\n"
+			".section        .fixup,\"ax\"\n"
+			"3:\n"
+			"li	%1, %3\n"
+			"j	2b\n"
+			".previous\n"
+			".section        __ex_table,\"a\"\n"
+			".word	1b, 3b\n"
+			".previous\n"
+			: "+&r"(res), "+&r"(err)
+			: "r"(vaddr), "i"(SIGSEGV));
+
+		if (MIPSInst_RT(inst) && !err)
+			regs->regs[MIPSInst_RT(inst)] = res;
+
+		MIPS_R2_STATS(llsc);
+
+		break;
+
+	case lld_op:
+		if (config_enabled(CONFIG_32BIT)) {
+		    err = SIGILL;
+		    break;
+		}
+
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (vaddr & 0x7) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGBUS;
+			break;
+		}
+		if (!access_ok(VERIFY_READ, vaddr, 8)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGBUS;
+			break;
+		}
+
+		if (!cpu_has_rw_llb) {
+			/*
+			 * An LL/SC block can't be safely emulated without
+			 * a Config5/LLB availability. So it's probably time to
+			 * kill our process before things get any worse. This is
+			 * because Config5/LLB allows us to use ERETNC so that
+			 * the LLAddr/LLB bit is not cleared when we return from
+			 * an exception. MIPS R2 LL/SC instructions trap with an
+			 * RI exception so once we emulate them here, we return
+			 * back to userland with ERETNC. That preserves the
+			 * LLAddr/LLB so the subsequent SC instruction will
+			 * succeed preserving the atomic semantics of the LL/SC
+			 * block. Without that, there is no safe way to emulate
+			 * an LL/SC block in MIPSR2 userland.
+			 */
+			pr_err("Can't emulate MIPSR2 LL/SC without Config5/LLB\n");
+			err = SIGKILL;
+			break;
+		}
+
+		__asm__ __volatile__(
+			"1:\n"
+			"lld	%0, 0(%2)\n"
+			"2:\n"
+			".insn\n"
+			".section        .fixup,\"ax\"\n"
+			"3:\n"
+			"li	%1, %3\n"
+			"j	2b\n"
+			".previous\n"
+			".section        __ex_table,\"a\"\n"
+			".word  1b, 3b\n"
+			".previous\n"
+			: "=&r"(res), "+&r"(err)
+			: "r"(vaddr), "i"(SIGSEGV)
+			: "memory");
+		if (MIPSInst_RT(inst) && !err)
+			regs->regs[MIPSInst_RT(inst)] = res;
+
+		MIPS_R2_STATS(llsc);
+
+		break;
+
+	case scd_op:
+		if (config_enabled(CONFIG_32BIT)) {
+		    err = SIGILL;
+		    break;
+		}
+
+		vaddr = regs->regs[MIPSInst_RS(inst)] + MIPSInst_SIMM(inst);
+		if (vaddr & 0x7) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGBUS;
+			break;
+		}
+		if (!access_ok(VERIFY_WRITE, vaddr, 8)) {
+			current->thread.cp0_baduaddr = vaddr;
+			err = SIGBUS;
+			break;
+		}
+
+		if (!cpu_has_rw_llb) {
+			/*
+			 * An LL/SC block can't be safely emulated without
+			 * a Config5/LLB availability. So it's probably time to
+			 * kill our process before things get any worse. This is
+			 * because Config5/LLB allows us to use ERETNC so that
+			 * the LLAddr/LLB bit is not cleared when we return from
+			 * an exception. MIPS R2 LL/SC instructions trap with an
+			 * RI exception so once we emulate them here, we return
+			 * back to userland with ERETNC. That preserves the
+			 * LLAddr/LLB so the subsequent SC instruction will
+			 * succeed preserving the atomic semantics of the LL/SC
+			 * block. Without that, there is no safe way to emulate
+			 * an LL/SC block in MIPSR2 userland.
+			 */
+			pr_err("Can't emulate MIPSR2 LL/SC without Config5/LLB\n");
+			err = SIGKILL;
+			break;
+		}
+
+		res = regs->regs[MIPSInst_RT(inst)];
+
+		__asm__ __volatile__(
+			"1:\n"
+			"scd	%0, 0(%2)\n"
+			"2:\n"
+			".insn\n"
+			".section        .fixup,\"ax\"\n"
+			"3:\n"
+			"li	%1, %3\n"
+			"j	2b\n"
+			".previous\n"
+			".section        __ex_table,\"a\"\n"
+			".word	1b, 3b\n"
+			".previous\n"
+			: "+&r"(res), "+&r"(err)
+			: "r"(vaddr), "i"(SIGSEGV));
+
+		if (MIPSInst_RT(inst) && !err)
+			regs->regs[MIPSInst_RT(inst)] = res;
+
+		MIPS_R2_STATS(llsc);
+
+		break;
+	case pref_op:
+		/* skip it */
+		break;
+	default:
+		err = SIGILL;
+	}
+
+	/*
+	 * Lets not return to userland just yet. It's constly and
+	 * it's likely we have more R2 instructions to emulate
+	 */
+	if (!err && (pass++ < MIPS_R2_EMUL_TOTAL_PASS)) {
+		regs->cp0_cause &= ~CAUSEF_BD;
+		err = get_user(inst, (u32 __user *)regs->cp0_epc);
+		if (!err)
+			goto repeat;
+
+		if (err < 0)
+			err = SIGSEGV;
+	}
+
+	if (err && (err != SIGEMT)) {
+		regs->regs[31] = r31;
+		regs->cp0_epc = epc;
+	}
+
+	/* Likely a MIPS R6 compatible instruction */
+	if (pass && (err == SIGILL))
+		err = 0;
+
+	return err;
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+static int mipsr2_stats_show(struct seq_file *s, void *unused)
+{
+
+	seq_printf(s, "Instruction\tTotal\tBDslot\n------------------------------\n");
+	seq_printf(s, "movs\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.movs),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.movs));
+	seq_printf(s, "hilo\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.hilo),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.hilo));
+	seq_printf(s, "muls\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.muls),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.muls));
+	seq_printf(s, "divs\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.divs),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.divs));
+	seq_printf(s, "dsps\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.dsps),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.dsps));
+	seq_printf(s, "bops\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.bops),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.bops));
+	seq_printf(s, "traps\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.traps),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.traps));
+	seq_printf(s, "fpus\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.fpus),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.fpus));
+	seq_printf(s, "loads\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.loads),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.loads));
+	seq_printf(s, "stores\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.stores),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.stores));
+	seq_printf(s, "llsc\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.llsc),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.llsc));
+	seq_printf(s, "dsemul\t\t%ld\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2emustats.dsemul),
+		   (unsigned long)__this_cpu_read(mipsr2bdemustats.dsemul));
+	seq_printf(s, "jr\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.jrs));
+	seq_printf(s, "bltzl\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.bltzl));
+	seq_printf(s, "bgezl\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.bgezl));
+	seq_printf(s, "bltzll\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.bltzll));
+	seq_printf(s, "bgezll\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.bgezll));
+	seq_printf(s, "bltzal\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.bltzal));
+	seq_printf(s, "bgezal\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.bgezal));
+	seq_printf(s, "beql\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.beql));
+	seq_printf(s, "bnel\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.bnel));
+	seq_printf(s, "blezl\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.blezl));
+	seq_printf(s, "bgtzl\t\t%ld\n",
+		   (unsigned long)__this_cpu_read(mipsr2bremustats.bgtzl));
+
+	return 0;
+}
+
+static int mipsr2_stats_clear_show(struct seq_file *s, void *unused)
+{
+	mipsr2_stats_show(s, unused);
+
+	__this_cpu_write((mipsr2emustats).movs, 0);
+	__this_cpu_write((mipsr2bdemustats).movs, 0);
+	__this_cpu_write((mipsr2emustats).hilo, 0);
+	__this_cpu_write((mipsr2bdemustats).hilo, 0);
+	__this_cpu_write((mipsr2emustats).muls, 0);
+	__this_cpu_write((mipsr2bdemustats).muls, 0);
+	__this_cpu_write((mipsr2emustats).divs, 0);
+	__this_cpu_write((mipsr2bdemustats).divs, 0);
+	__this_cpu_write((mipsr2emustats).dsps, 0);
+	__this_cpu_write((mipsr2bdemustats).dsps, 0);
+	__this_cpu_write((mipsr2emustats).bops, 0);
+	__this_cpu_write((mipsr2bdemustats).bops, 0);
+	__this_cpu_write((mipsr2emustats).traps, 0);
+	__this_cpu_write((mipsr2bdemustats).traps, 0);
+	__this_cpu_write((mipsr2emustats).fpus, 0);
+	__this_cpu_write((mipsr2bdemustats).fpus, 0);
+	__this_cpu_write((mipsr2emustats).loads, 0);
+	__this_cpu_write((mipsr2bdemustats).loads, 0);
+	__this_cpu_write((mipsr2emustats).stores, 0);
+	__this_cpu_write((mipsr2bdemustats).stores, 0);
+	__this_cpu_write((mipsr2emustats).llsc, 0);
+	__this_cpu_write((mipsr2bdemustats).llsc, 0);
+	__this_cpu_write((mipsr2emustats).dsemul, 0);
+	__this_cpu_write((mipsr2bdemustats).dsemul, 0);
+	__this_cpu_write((mipsr2bremustats).jrs, 0);
+	__this_cpu_write((mipsr2bremustats).bltzl, 0);
+	__this_cpu_write((mipsr2bremustats).bgezl, 0);
+	__this_cpu_write((mipsr2bremustats).bltzll, 0);
+	__this_cpu_write((mipsr2bremustats).bgezll, 0);
+	__this_cpu_write((mipsr2bremustats).bltzal, 0);
+	__this_cpu_write((mipsr2bremustats).bgezal, 0);
+	__this_cpu_write((mipsr2bremustats).beql, 0);
+	__this_cpu_write((mipsr2bremustats).bnel, 0);
+	__this_cpu_write((mipsr2bremustats).blezl, 0);
+	__this_cpu_write((mipsr2bremustats).bgtzl, 0);
+
+	return 0;
+}
+
+static int mipsr2_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mipsr2_stats_show, inode->i_private);
+}
+
+static int mipsr2_stats_clear_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mipsr2_stats_clear_show, inode->i_private);
+}
+
+static const struct file_operations mipsr2_emul_fops = {
+	.open                   = mipsr2_stats_open,
+	.read			= seq_read,
+	.llseek			= seq_lseek,
+	.release		= single_release,
+};
+
+static const struct file_operations mipsr2_clear_fops = {
+	.open                   = mipsr2_stats_clear_open,
+	.read			= seq_read,
+	.llseek			= seq_lseek,
+	.release		= single_release,
+};
+
+
+static int __init mipsr2_init_debugfs(void)
+{
+	extern struct dentry	*mips_debugfs_dir;
+	struct dentry		*mipsr2_emul;
+
+	if (!mips_debugfs_dir)
+		return -ENODEV;
+
+	mipsr2_emul = debugfs_create_file("r2_emul_stats", S_IRUGO,
+					  mips_debugfs_dir, NULL,
+					  &mipsr2_emul_fops);
+	if (!mipsr2_emul)
+		return -ENOMEM;
+
+	mipsr2_emul = debugfs_create_file("r2_emul_stats_clear", S_IRUGO,
+					  mips_debugfs_dir, NULL,
+					  &mipsr2_clear_fops);
+	if (!mipsr2_emul)
+		return -ENOMEM;
+
+	return 0;
+}
+
+device_initcall(mipsr2_init_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index 17eaf0c..291af0b 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -14,6 +14,8 @@
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 #include <asm/ftrace.h>
+#include <asm/fpu.h>
+#include <asm/msa.h>
 
 extern void *__bzero(void *__s, size_t __count);
 extern long __strncpy_from_kernel_nocheck_asm(char *__to,
@@ -32,6 +34,14 @@
 extern long __strnlen_user_asm(const char *s);
 
 /*
+ * Core architecture code
+ */
+EXPORT_SYMBOL_GPL(_save_fp);
+#ifdef CONFIG_CPU_HAS_MSA
+EXPORT_SYMBOL_GPL(_save_msa);
+#endif
+
+/*
  * String functions
  */
 EXPORT_SYMBOL(memset);
@@ -67,11 +77,13 @@
 EXPORT_SYMBOL(__strnlen_user_nocheck_asm);
 EXPORT_SYMBOL(__strnlen_user_asm);
 
+#ifndef CONFIG_CPU_MIPSR6
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy_nocheck);
 EXPORT_SYMBOL(__csum_partial_copy_kernel);
 EXPORT_SYMBOL(__csum_partial_copy_to_user);
 EXPORT_SYMBOL(__csum_partial_copy_from_user);
+#endif
 
 EXPORT_SYMBOL(invalid_pte_table);
 #ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S
index f654768..423ae83 100644
--- a/arch/mips/kernel/octeon_switch.S
+++ b/arch/mips/kernel/octeon_switch.S
@@ -31,15 +31,11 @@
 	/*
 	 * check if we need to save FPU registers
 	 */
-	PTR_L	t3, TASK_THREAD_INFO(a0)
-	LONG_L	t0, TI_FLAGS(t3)
-	li	t1, _TIF_USEDFPU
-	and	t2, t0, t1
-	beqz	t2, 1f
-	nor	t1, zero, t1
-
-	and	t0, t0, t1
-	LONG_S	t0, TI_FLAGS(t3)
+	.set push
+	.set noreorder
+	beqz	a3, 1f
+	 PTR_L	t3, TASK_THREAD_INFO(a0)
+	.set pop
 
 	/*
 	 * clear saved user stack CU1 bit
@@ -56,36 +52,9 @@
 	.set pop
 1:
 
-	/* check if we need to save COP2 registers */
-	PTR_L	t2, TASK_THREAD_INFO(a0)
-	LONG_L	t0, ST_OFF(t2)
-	bbit0	t0, 30, 1f
-
-	/* Disable COP2 in the stored process state */
-	li	t1, ST0_CU2
-	xor	t0, t1
-	LONG_S	t0, ST_OFF(t2)
-
-	/* Enable COP2 so we can save it */
-	mfc0	t0, CP0_STATUS
-	or	t0, t1
-	mtc0	t0, CP0_STATUS
-
-	/* Save COP2 */
-	daddu	a0, THREAD_CP2
-	jal octeon_cop2_save
-	dsubu	a0, THREAD_CP2
-
-	/* Disable COP2 now that we are done */
-	mfc0	t0, CP0_STATUS
-	li	t1, ST0_CU2
-	xor	t0, t1
-	mtc0	t0, CP0_STATUS
-
-1:
 #if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
 	/* Check if we need to store CVMSEG state */
-	mfc0	t0, $11,7	/* CvmMemCtl */
+	dmfc0	t0, $11,7	/* CvmMemCtl */
 	bbit0	t0, 6, 3f	/* Is user access enabled? */
 
 	/* Store the CVMSEG state */
@@ -109,9 +78,9 @@
 	.set reorder
 
 	/* Disable access to CVMSEG */
-	mfc0	t0, $11,7	/* CvmMemCtl */
+	dmfc0	t0, $11,7	/* CvmMemCtl */
 	xori	t0, t0, 0x40	/* Bit 6 is CVMSEG user enable */
-	mtc0	t0, $11,7	/* CvmMemCtl */
+	dmtc0	t0, $11,7	/* CvmMemCtl */
 #endif
 3:
 
@@ -147,6 +116,8 @@
  * void octeon_cop2_save(struct octeon_cop2_state *a0)
  */
 	.align	7
+	.set push
+	.set noreorder
 	LEAF(octeon_cop2_save)
 
 	dmfc0	t9, $9,7	/* CvmCtl register. */
@@ -157,17 +128,17 @@
 	dmfc2	t2, 0x0200
 	sd	t0, OCTEON_CP2_CRC_IV(a0)
 	sd	t1, OCTEON_CP2_CRC_LENGTH(a0)
-	sd	t2, OCTEON_CP2_CRC_POLY(a0)
 	/* Skip next instructions if CvmCtl[NODFA_CP2] set */
 	bbit1	t9, 28, 1f
+	 sd	t2, OCTEON_CP2_CRC_POLY(a0)
 
 	/* Save the LLM state */
 	dmfc2	t0, 0x0402
 	dmfc2	t1, 0x040A
 	sd	t0, OCTEON_CP2_LLM_DAT(a0)
-	sd	t1, OCTEON_CP2_LLM_DAT+8(a0)
 
 1:	bbit1	t9, 26, 3f	/* done if CvmCtl[NOCRYPTO] set */
+	 sd	t1, OCTEON_CP2_LLM_DAT+8(a0)
 
 	/* Save the COP2 crypto state */
 	/* this part is mostly common to both pass 1 and later revisions */
@@ -198,18 +169,20 @@
 	sd	t2, OCTEON_CP2_AES_KEY+16(a0)
 	dmfc2	t2, 0x0101
 	sd	t3, OCTEON_CP2_AES_KEY+24(a0)
-	mfc0	t3, $15,0	/* Get the processor ID register */
+	mfc0	v0, $15,0	/* Get the processor ID register */
 	sd	t0, OCTEON_CP2_AES_KEYLEN(a0)
-	li	t0, 0x000d0000	/* This is the processor ID of Octeon Pass1 */
+	li	v1, 0x000d0000	/* This is the processor ID of Octeon Pass1 */
 	sd	t1, OCTEON_CP2_AES_RESULT(a0)
-	sd	t2, OCTEON_CP2_AES_RESULT+8(a0)
 	/* Skip to the Pass1 version of the remainder of the COP2 state */
-	beq	t3, t0, 2f
+	beq	v0, v1, 2f
+	 sd	t2, OCTEON_CP2_AES_RESULT+8(a0)
 
 	/* the non-pass1 state when !CvmCtl[NOCRYPTO] */
 	dmfc2	t1, 0x0240
 	dmfc2	t2, 0x0241
+	ori	v1, v1, 0x9500 /* lowest OCTEON III PrId*/
 	dmfc2	t3, 0x0242
+	subu	v1, v0, v1 /* prid - lowest OCTEON III PrId */
 	dmfc2	t0, 0x0243
 	sd	t1, OCTEON_CP2_HSH_DATW(a0)
 	dmfc2	t1, 0x0244
@@ -262,8 +235,16 @@
 	sd	t1, OCTEON_CP2_GFM_MULT+8(a0)
 	sd	t2, OCTEON_CP2_GFM_POLY(a0)
 	sd	t3, OCTEON_CP2_GFM_RESULT(a0)
-	sd	t0, OCTEON_CP2_GFM_RESULT+8(a0)
+	bltz	v1, 4f
+	 sd	t0, OCTEON_CP2_GFM_RESULT+8(a0)
+	/* OCTEON III things*/
+	dmfc2	t0, 0x024F
+	dmfc2	t1, 0x0050
+	sd	t0, OCTEON_CP2_SHA3(a0)
+	sd	t1, OCTEON_CP2_SHA3+8(a0)
+4:
 	jr	ra
+	 nop
 
 2:	/* pass 1 special stuff when !CvmCtl[NOCRYPTO] */
 	dmfc2	t3, 0x0040
@@ -289,7 +270,9 @@
 
 3:	/* pass 1 or CvmCtl[NOCRYPTO] set */
 	jr	ra
+	 nop
 	END(octeon_cop2_save)
+	.set pop
 
 /*
  * void octeon_cop2_restore(struct octeon_cop2_state *a0)
@@ -354,9 +337,9 @@
 	ld	t2, OCTEON_CP2_AES_RESULT+8(a0)
 	mfc0	t3, $15,0	/* Get the processor ID register */
 	dmtc2	t0, 0x0110
-	li	t0, 0x000d0000	/* This is the processor ID of Octeon Pass1 */
+	li	v0, 0x000d0000	/* This is the processor ID of Octeon Pass1 */
 	dmtc2	t1, 0x0100
-	bne	t0, t3, 3f	/* Skip the next stuff for non-pass1 */
+	bne	v0, t3, 3f	/* Skip the next stuff for non-pass1 */
 	 dmtc2	t2, 0x0101
 
 	/* this code is specific for pass 1 */
@@ -384,6 +367,7 @@
 
 3:	/* this is post-pass1 code */
 	ld	t2, OCTEON_CP2_HSH_DATW(a0)
+	ori	v0, v0, 0x9500 /* lowest OCTEON III PrId*/
 	ld	t0, OCTEON_CP2_HSH_DATW+8(a0)
 	ld	t1, OCTEON_CP2_HSH_DATW+16(a0)
 	dmtc2	t2, 0x0240
@@ -437,9 +421,15 @@
 	dmtc2	t2, 0x0259
 	ld	t2, OCTEON_CP2_GFM_RESULT+8(a0)
 	dmtc2	t0, 0x025E
+	subu	v0, t3, v0 /* prid - lowest OCTEON III PrId */
 	dmtc2	t1, 0x025A
-	dmtc2	t2, 0x025B
-
+	bltz	v0, done_restore
+	 dmtc2	t2, 0x025B
+	/* OCTEON III things*/
+	ld	t0, OCTEON_CP2_SHA3(a0)
+	ld	t1, OCTEON_CP2_SHA3+8(a0)
+	dmtc2	t0, 0x0051
+	dmtc2	t1, 0x0050
 done_restore:
 	jr	ra
 	 nop
@@ -450,18 +440,23 @@
  * void octeon_mult_save()
  * sp is assumed to point to a struct pt_regs
  *
- * NOTE: This is called in SAVE_SOME in stackframe.h. It can only
- *	 safely modify k0 and k1.
+ * NOTE: This is called in SAVE_TEMP in stackframe.h. It can
+ *       safely modify v1,k0, k1,$10-$15, and $24.  It will
+ *	 be overwritten with a processor specific version of the code.
  */
-	.align	7
+	.p2align 7
 	.set push
 	.set noreorder
 	LEAF(octeon_mult_save)
-	dmfc0	k0, $9,7	/* CvmCtl register. */
-	bbit1	k0, 27, 1f	/* Skip CvmCtl[NOMUL] */
+	jr	ra
 	 nop
+	.space 30 * 4, 0
+octeon_mult_save_end:
+	EXPORT(octeon_mult_save_end)
+	END(octeon_mult_save)
 
-	/* Save the multiplier state */
+	LEAF(octeon_mult_save2)
+	/* Save the multiplier state OCTEON II and earlier*/
 	v3mulu	k0, $0, $0
 	v3mulu	k1, $0, $0
 	sd	k0, PT_MTP(sp)	      /* PT_MTP	   has P0 */
@@ -476,44 +471,107 @@
 	sd	k0, PT_MPL+8(sp)      /* PT_MPL+8  has MPL1 */
 	jr	ra
 	 sd	k1, PT_MPL+16(sp)     /* PT_MPL+16 has MPL2 */
+octeon_mult_save2_end:
+	EXPORT(octeon_mult_save2_end)
+	END(octeon_mult_save2)
 
-1:	/* Resume here if CvmCtl[NOMUL] */
+	LEAF(octeon_mult_save3)
+	/* Save the multiplier state OCTEON III */
+	v3mulu	$10, $0, $0		/* read P0 */
+	v3mulu	$11, $0, $0		/* read P1 */
+	v3mulu	$12, $0, $0		/* read P2 */
+	sd	$10, PT_MTP+(0*8)(sp)	/* store P0 */
+	v3mulu	$10, $0, $0		/* read P3 */
+	sd	$11, PT_MTP+(1*8)(sp)	/*  store P1 */
+	v3mulu	$11, $0, $0		/* read P4 */
+	sd	$12, PT_MTP+(2*8)(sp)	/* store P2 */
+	ori	$13, $0, 1
+	v3mulu	$12, $0, $0		/* read P5 */
+	sd	$10, PT_MTP+(3*8)(sp)	/* store P3 */
+	v3mulu	$13, $13, $0		/* P4-P0 = MPL5-MPL1, $13 = MPL0 */
+	sd	$11, PT_MTP+(4*8)(sp)	/* store P4 */
+	v3mulu	$10, $0, $0		/* read MPL1 */
+	sd	$12, PT_MTP+(5*8)(sp)	/* store P5 */
+	v3mulu	$11, $0, $0		/* read MPL2 */
+	sd	$13, PT_MPL+(0*8)(sp)	/* store MPL0 */
+	v3mulu	$12, $0, $0		/* read MPL3 */
+	sd	$10, PT_MPL+(1*8)(sp)	/* store MPL1 */
+	v3mulu	$10, $0, $0		/* read MPL4 */
+	sd	$11, PT_MPL+(2*8)(sp)	/* store MPL2 */
+	v3mulu	$11, $0, $0		/* read MPL5 */
+	sd	$12, PT_MPL+(3*8)(sp)	/* store MPL3 */
+	sd	$10, PT_MPL+(4*8)(sp)	/* store MPL4 */
 	jr	ra
-	END(octeon_mult_save)
+	 sd	$11, PT_MPL+(5*8)(sp)	/* store MPL5 */
+octeon_mult_save3_end:
+	EXPORT(octeon_mult_save3_end)
+	END(octeon_mult_save3)
 	.set pop
 
 /*
  * void octeon_mult_restore()
  * sp is assumed to point to a struct pt_regs
  *
- * NOTE: This is called in RESTORE_SOME in stackframe.h.
+ * NOTE: This is called in RESTORE_TEMP in stackframe.h.
  */
-	.align	7
+	.p2align 7
 	.set push
 	.set noreorder
 	LEAF(octeon_mult_restore)
-	dmfc0	k1, $9,7		/* CvmCtl register. */
-	ld	v0, PT_MPL(sp)		/* MPL0 */
-	ld	v1, PT_MPL+8(sp)	/* MPL1 */
-	ld	k0, PT_MPL+16(sp)	/* MPL2 */
-	bbit1	k1, 27, 1f		/* Skip CvmCtl[NOMUL] */
-	/* Normally falls through, so no time wasted here */
-	nop
-
-	/* Restore the multiplier state */
-	ld	k1, PT_MTP+16(sp)	/* P2 */
-	MTM0	v0			/* MPL0 */
-	ld	v0, PT_MTP+8(sp)	/* P1 */
-	MTM1	v1			/* MPL1 */
-	ld	v1, PT_MTP(sp)		/* P0 */
-	MTM2	k0			/* MPL2 */
-	MTP2	k1			/* P2 */
-	MTP1	v0			/* P1 */
-	jr	ra
-	 MTP0	v1			/* P0 */
-
-1:	/* Resume here if CvmCtl[NOMUL] */
 	jr	ra
 	 nop
+	.space 30 * 4, 0
+octeon_mult_restore_end:
+	EXPORT(octeon_mult_restore_end)
 	END(octeon_mult_restore)
+
+	LEAF(octeon_mult_restore2)
+	ld	v0, PT_MPL(sp)        	/* MPL0 */
+	ld	v1, PT_MPL+8(sp)      	/* MPL1 */
+	ld	k0, PT_MPL+16(sp)     	/* MPL2 */
+	/* Restore the multiplier state */
+	ld	k1, PT_MTP+16(sp)     	/* P2 */
+	mtm0	v0			/* MPL0 */
+	ld	v0, PT_MTP+8(sp)	/* P1 */
+	mtm1	v1			/* MPL1 */
+	ld	v1, PT_MTP(sp)   	/* P0 */
+	mtm2	k0			/* MPL2 */
+	mtp2	k1			/* P2 */
+	mtp1	v0			/* P1 */
+	jr	ra
+	 mtp0	v1			/* P0 */
+octeon_mult_restore2_end:
+	EXPORT(octeon_mult_restore2_end)
+	END(octeon_mult_restore2)
+
+	LEAF(octeon_mult_restore3)
+	ld	$12, PT_MPL+(0*8)(sp)	/* read MPL0 */
+	ld	$13, PT_MPL+(3*8)(sp)	/* read MPL3 */
+	ld	$10, PT_MPL+(1*8)(sp)	/* read MPL1 */
+	ld	$11, PT_MPL+(4*8)(sp)	/* read MPL4 */
+	.word	0x718d0008
+	/* mtm0	$12, $13		   restore MPL0 and MPL3 */
+	ld	$12, PT_MPL+(2*8)(sp)	/* read MPL2 */
+	.word	0x714b000c
+	/* mtm1	$10, $11		   restore MPL1 and MPL4 */
+	ld	$13, PT_MPL+(5*8)(sp)	/* read MPL5 */
+	ld	$10, PT_MTP+(0*8)(sp)	/* read P0 */
+	ld	$11, PT_MTP+(3*8)(sp)	/* read P3 */
+	.word	0x718d000d
+	/* mtm2	$12, $13		   restore MPL2 and MPL5 */
+	ld	$12, PT_MTP+(1*8)(sp)	/* read P1 */
+	.word	0x714b0009
+	/* mtp0	$10, $11		   restore P0 and P3 */
+	ld	$13, PT_MTP+(4*8)(sp)	/* read P4 */
+	ld	$10, PT_MTP+(2*8)(sp)	/* read P2 */
+	ld	$11, PT_MTP+(5*8)(sp)	/* read P5 */
+	.word	0x718d000a
+	/* mtp1	$12, $13		   restore P1 and P4 */
+	jr	ra
+	.word	0x714b000b
+	/* mtp2	$10, $11		   restore P2 and P5 */
+
+octeon_mult_restore3_end:
+	EXPORT(octeon_mult_restore3_end)
+	END(octeon_mult_restore3)
 	.set pop
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 097fc8d..130af7d 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -82,7 +82,9 @@
 		seq_printf(m, "]\n");
 	}
 
-	seq_printf(m, "isa\t\t\t: mips1");
+	seq_printf(m, "isa\t\t\t:"); 
+	if (cpu_has_mips_r1)
+		seq_printf(m, " mips1");
 	if (cpu_has_mips_2)
 		seq_printf(m, "%s", " mips2");
 	if (cpu_has_mips_3)
@@ -95,10 +97,14 @@
 		seq_printf(m, "%s", " mips32r1");
 	if (cpu_has_mips32r2)
 		seq_printf(m, "%s", " mips32r2");
+	if (cpu_has_mips32r6)
+		seq_printf(m, "%s", " mips32r6");
 	if (cpu_has_mips64r1)
 		seq_printf(m, "%s", " mips64r1");
 	if (cpu_has_mips64r2)
 		seq_printf(m, "%s", " mips64r2");
+	if (cpu_has_mips64r6)
+		seq_printf(m, "%s", " mips64r6");
 	seq_printf(m, "\n");
 
 	seq_printf(m, "ASEs implemented\t:");
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 85bff5d..bf85cc1 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -25,6 +25,7 @@
 #include <linux/completion.h>
 #include <linux/kallsyms.h>
 #include <linux/random.h>
+#include <linux/prctl.h>
 
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
@@ -562,3 +563,98 @@
 {
 	smp_call_function(arch_dump_stack, NULL, 1);
 }
+
+int mips_get_process_fp_mode(struct task_struct *task)
+{
+	int value = 0;
+
+	if (!test_tsk_thread_flag(task, TIF_32BIT_FPREGS))
+		value |= PR_FP_MODE_FR;
+	if (test_tsk_thread_flag(task, TIF_HYBRID_FPREGS))
+		value |= PR_FP_MODE_FRE;
+
+	return value;
+}
+
+int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
+{
+	const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE;
+	unsigned long switch_count;
+	struct task_struct *t;
+
+	/* Check the value is valid */
+	if (value & ~known_bits)
+		return -EOPNOTSUPP;
+
+	/* Avoid inadvertently triggering emulation */
+	if ((value & PR_FP_MODE_FR) && cpu_has_fpu &&
+	    !(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+		return -EOPNOTSUPP;
+	if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre)
+		return -EOPNOTSUPP;
+
+	/* FR = 0 not supported in MIPS R6 */
+	if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6)
+		return -EOPNOTSUPP;
+
+	/* Save FP & vector context, then disable FPU & MSA */
+	if (task->signal == current->signal)
+		lose_fpu(1);
+
+	/* Prevent any threads from obtaining live FP context */
+	atomic_set(&task->mm->context.fp_mode_switching, 1);
+	smp_mb__after_atomic();
+
+	/*
+	 * If there are multiple online CPUs then wait until all threads whose
+	 * FP mode is about to change have been context switched. This approach
+	 * allows us to only worry about whether an FP mode switch is in
+	 * progress when FP is first used in a tasks time slice. Pretty much all
+	 * of the mode switch overhead can thus be confined to cases where mode
+	 * switches are actually occuring. That is, to here. However for the
+	 * thread performing the mode switch it may take a while...
+	 */
+	if (num_online_cpus() > 1) {
+		spin_lock_irq(&task->sighand->siglock);
+
+		for_each_thread(task, t) {
+			if (t == current)
+				continue;
+
+			switch_count = t->nvcsw + t->nivcsw;
+
+			do {
+				spin_unlock_irq(&task->sighand->siglock);
+				cond_resched();
+				spin_lock_irq(&task->sighand->siglock);
+			} while ((t->nvcsw + t->nivcsw) == switch_count);
+		}
+
+		spin_unlock_irq(&task->sighand->siglock);
+	}
+
+	/*
+	 * There are now no threads of the process with live FP context, so it
+	 * is safe to proceed with the FP mode switch.
+	 */
+	for_each_thread(task, t) {
+		/* Update desired FP register width */
+		if (value & PR_FP_MODE_FR) {
+			clear_tsk_thread_flag(t, TIF_32BIT_FPREGS);
+		} else {
+			set_tsk_thread_flag(t, TIF_32BIT_FPREGS);
+			clear_tsk_thread_flag(t, TIF_MSA_CTX_LIVE);
+		}
+
+		/* Update desired FP single layout */
+		if (value & PR_FP_MODE_FRE)
+			set_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
+		else
+			clear_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
+	}
+
+	/* Allow threads to use FP again */
+	atomic_set(&task->mm->context.fp_mode_switching, 0);
+
+	return 0;
+}
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 6c160c6..676c503 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -34,7 +34,7 @@
 	.endm
 
 	.set	noreorder
-	.set	arch=r4000
+	.set	MIPS_ISA_ARCH_LEVEL_RAW
 
 LEAF(_save_fp_context)
 	.set	push
@@ -42,7 +42,8 @@
 	cfc1	t1, fcr31
 	.set	pop
 
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) || \
+		defined(CONFIG_CPU_MIPS32_R6)
 	.set	push
 	SET_HARDFLOAT
 #ifdef CONFIG_CPU_MIPS32_R2
@@ -105,10 +106,12 @@
 	SET_HARDFLOAT
 	cfc1	t1, fcr31
 
+#ifndef CONFIG_CPU_MIPS64_R6
 	mfc0	t0, CP0_STATUS
 	sll	t0, t0, 5
 	bgez	t0, 1f			# skip storing odd if FR=0
 	 nop
+#endif
 
 	/* Store the 16 odd double precision registers */
 	EX      sdc1 $f1, SC32_FPREGS+8(a0)
@@ -163,7 +166,8 @@
 LEAF(_restore_fp_context)
 	EX	lw t1, SC_FPC_CSR(a0)
 
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)  || \
+		defined(CONFIG_CPU_MIPS32_R6)
 	.set	push
 	SET_HARDFLOAT
 #ifdef CONFIG_CPU_MIPS32_R2
@@ -223,10 +227,12 @@
 	SET_HARDFLOAT
 	EX	lw t1, SC32_FPC_CSR(a0)
 
+#ifndef CONFIG_CPU_MIPS64_R6
 	mfc0	t0, CP0_STATUS
 	sll	t0, t0, 5
 	bgez	t0, 1f			# skip loading odd if FR=0
 	 nop
+#endif
 
 	EX      ldc1 $f1, SC32_FPREGS+8(a0)
 	EX      ldc1 $f3, SC32_FPREGS+24(a0)
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 64591e6..3b1a36f 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -115,7 +115,8 @@
  * Save a thread's fp context.
  */
 LEAF(_save_fp)
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) || \
+		defined(CONFIG_CPU_MIPS32_R6)
 	mfc0	t0, CP0_STATUS
 #endif
 	fpu_save_double a0 t0 t1		# clobbers t1
@@ -126,7 +127,8 @@
  * Restore a thread's fp context.
  */
 LEAF(_restore_fp)
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) || \
+		defined(CONFIG_CPU_MIPS32_R6)
 	mfc0	t0, CP0_STATUS
 #endif
 	fpu_restore_double a0 t0 t1		# clobbers t1
@@ -240,9 +242,9 @@
 	mtc1	t1, $f30
 	mtc1	t1, $f31
 
-#ifdef CONFIG_CPU_MIPS32_R2
+#if defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS32_R6)
 	.set    push
-	.set    mips32r2
+	.set    MIPS_ISA_LEVEL_RAW
 	.set	fp=64
 	sll     t0, t0, 5			# is Status.FR set?
 	bgez    t0, 1f				# no: skip setting upper 32b
@@ -280,9 +282,9 @@
 	mthc1   t1, $f30
 	mthc1   t1, $f31
 1:	.set    pop
-#endif /* CONFIG_CPU_MIPS32_R2 */
+#endif /* CONFIG_CPU_MIPS32_R2 || CONFIG_CPU_MIPS32_R6 */
 #else
-	.set	arch=r4000
+	.set	MIPS_ISA_ARCH_LEVEL_RAW
 	dmtc1	t1, $f0
 	dmtc1	t1, $f2
 	dmtc1	t1, $f4
diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c
index 67f2495..d1168d7 100644
--- a/arch/mips/kernel/spram.c
+++ b/arch/mips/kernel/spram.c
@@ -208,6 +208,7 @@
 	case CPU_INTERAPTIV:
 	case CPU_PROAPTIV:
 	case CPU_P5600:
+	case CPU_QEMU_GENERIC:
 		config0 = read_c0_config();
 		/* FIXME: addresses are Malta specific */
 		if (config0 & (1<<24)) {
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 604b558..53a7ef9 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -136,7 +136,7 @@
 		: "memory");
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__ (
-		"	.set	arch=r4000				\n"
+		"	.set	"MIPS_ISA_ARCH_LEVEL"			\n"
 		"	li	%[err], 0				\n"
 		"1:	ll	%[old], (%[addr])			\n"
 		"	move	%[tmp], %[new]				\n"
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c3b41e2..33984c0 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -46,6 +46,7 @@
 #include <asm/fpu.h>
 #include <asm/fpu_emulator.h>
 #include <asm/idle.h>
+#include <asm/mips-r2-to-r6-emul.h>
 #include <asm/mipsregs.h>
 #include <asm/mipsmtregs.h>
 #include <asm/module.h>
@@ -837,7 +838,7 @@
 	exception_exit(prev_state);
 }
 
-static void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
+void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
 	const char *str)
 {
 	siginfo_t info;
@@ -1027,7 +1028,34 @@
 	unsigned int opcode = 0;
 	int status = -1;
 
+	/*
+	 * Avoid any kernel code. Just emulate the R2 instruction
+	 * as quickly as possible.
+	 */
+	if (mipsr2_emulation && cpu_has_mips_r6 &&
+	    likely(user_mode(regs))) {
+		if (likely(get_user(opcode, epc) >= 0)) {
+			status = mipsr2_decoder(regs, opcode);
+			switch (status) {
+			case 0:
+			case SIGEMT:
+				task_thread_info(current)->r2_emul_return = 1;
+				return;
+			case SIGILL:
+				goto no_r2_instr;
+			default:
+				process_fpemu_return(status,
+						     &current->thread.cp0_baduaddr);
+				task_thread_info(current)->r2_emul_return = 1;
+				return;
+			}
+		}
+	}
+
+no_r2_instr:
+
 	prev_state = exception_enter();
+
 	if (notify_die(DIE_RI, "RI Fault", regs, 0, regs_to_trapnr(regs),
 		       SIGILL) == NOTIFY_STOP)
 		goto out;
@@ -1134,10 +1162,29 @@
 	return NOTIFY_OK;
 }
 
+static int wait_on_fp_mode_switch(atomic_t *p)
+{
+	/*
+	 * The FP mode for this task is currently being switched. That may
+	 * involve modifications to the format of this tasks FP context which
+	 * make it unsafe to proceed with execution for the moment. Instead,
+	 * schedule some other task.
+	 */
+	schedule();
+	return 0;
+}
+
 static int enable_restore_fp_context(int msa)
 {
 	int err, was_fpu_owner, prior_msa;
 
+	/*
+	 * If an FP mode switch is currently underway, wait for it to
+	 * complete before proceeding.
+	 */
+	wait_on_atomic_t(&current->mm->context.fp_mode_switching,
+			 wait_on_fp_mode_switch, TASK_KILLABLE);
+
 	if (!used_math()) {
 		/* First time FP context user. */
 		preempt_disable();
@@ -1541,6 +1588,7 @@
 	case CPU_INTERAPTIV:
 	case CPU_PROAPTIV:
 	case CPU_P5600:
+	case CPU_QEMU_GENERIC:
 		{
 #define ERRCTL_PE	0x80000000
 #define ERRCTL_L2P	0x00800000
@@ -1630,7 +1678,7 @@
 	printk("Decoded c0_cacheerr: %s cache fault in %s reference.\n",
 	       reg_val & (1<<30) ? "secondary" : "primary",
 	       reg_val & (1<<31) ? "data" : "insn");
-	if (cpu_has_mips_r2 &&
+	if ((cpu_has_mips_r2_r6) &&
 	    ((current_cpu_data.processor_id & 0xff0000) == PRID_COMP_MIPS)) {
 		pr_err("Error bits: %s%s%s%s%s%s%s%s\n",
 			reg_val & (1<<29) ? "ED " : "",
@@ -1670,7 +1718,7 @@
 	unsigned int reg_val;
 
 	/* For the moment, report the problem and hang. */
-	if (cpu_has_mips_r2 &&
+	if ((cpu_has_mips_r2_r6) &&
 	    ((current_cpu_data.processor_id & 0xff0000) == PRID_COMP_MIPS)) {
 		pr_err("FTLB error exception, cp0_ecc=0x%08x:\n",
 		       read_c0_ecc());
@@ -1959,7 +2007,7 @@
 {
 	unsigned int hwrena = cpu_hwrena_impl_bits;
 
-	if (cpu_has_mips_r2)
+	if (cpu_has_mips_r2_r6)
 		hwrena |= 0x0000000f;
 
 	if (!noulri && cpu_has_userlocal)
@@ -2003,7 +2051,7 @@
 	 *  o read IntCtl.IPTI to determine the timer interrupt
 	 *  o read IntCtl.IPPCI to determine the performance counter interrupt
 	 */
-	if (cpu_has_mips_r2) {
+	if (cpu_has_mips_r2_r6) {
 		cp0_compare_irq_shift = CAUSEB_TI - CAUSEB_IP;
 		cp0_compare_irq = (read_c0_intctl() >> INTCTLB_IPTI) & 7;
 		cp0_perfcount_irq = (read_c0_intctl() >> INTCTLB_IPPCI) & 7;
@@ -2094,7 +2142,7 @@
 #else
         ebase = CKSEG0;
 #endif
-		if (cpu_has_mips_r2)
+		if (cpu_has_mips_r2_r6)
 			ebase += (read_c0_ebase() & 0x3ffff000);
 	}
 
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index e11906d..bbb6969 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -129,6 +129,7 @@
 			: "=&r" (value), "=r" (res)         \
 			: "r" (addr), "i" (-EFAULT));
 
+#ifndef CONFIG_CPU_MIPSR6
 #define     LoadW(addr, value, res)   \
 		__asm__ __volatile__ (                      \
 			"1:\t"user_lwl("%0", "(%2)")"\n"    \
@@ -146,6 +147,39 @@
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
 			: "r" (addr), "i" (-EFAULT));
+#else
+/* MIPSR6 has no lwl instruction */
+#define     LoadW(addr, value, res) \
+		__asm__ __volatile__ (			    \
+			".set\tpush\n"			    \
+			".set\tnoat\n\t"		    \
+			"1:"user_lb("%0", "0(%2)")"\n\t"    \
+			"2:"user_lbu("$1", "1(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"3:"user_lbu("$1", "2(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"4:"user_lbu("$1", "3(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"li\t%1, 0\n"			    \
+			".set\tpop\n"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%1, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			".previous"			    \
+			: "=&r" (value), "=r" (res)	    \
+			: "r" (addr), "i" (-EFAULT));
+#endif /* CONFIG_CPU_MIPSR6 */
 
 #define     LoadHWU(addr, value, res) \
 		__asm__ __volatile__ (                      \
@@ -169,6 +203,7 @@
 			: "=&r" (value), "=r" (res)         \
 			: "r" (addr), "i" (-EFAULT));
 
+#ifndef CONFIG_CPU_MIPSR6
 #define     LoadWU(addr, value, res)  \
 		__asm__ __volatile__ (                      \
 			"1:\t"user_lwl("%0", "(%2)")"\n"    \
@@ -206,6 +241,87 @@
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
 			: "r" (addr), "i" (-EFAULT));
+#else
+/* MIPSR6 has not lwl and ldl instructions */
+#define	    LoadWU(addr, value, res) \
+		__asm__ __volatile__ (			    \
+			".set\tpush\n\t"		    \
+			".set\tnoat\n\t"		    \
+			"1:"user_lbu("%0", "0(%2)")"\n\t"   \
+			"2:"user_lbu("$1", "1(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"3:"user_lbu("$1", "2(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"4:"user_lbu("$1", "3(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"li\t%1, 0\n"			    \
+			".set\tpop\n"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%1, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			".previous"			    \
+			: "=&r" (value), "=r" (res)	    \
+			: "r" (addr), "i" (-EFAULT));
+
+#define     LoadDW(addr, value, res)  \
+		__asm__ __volatile__ (			    \
+			".set\tpush\n\t"		    \
+			".set\tnoat\n\t"		    \
+			"1:lb\t%0, 0(%2)\n\t"    	    \
+			"2:lbu\t $1, 1(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"3:lbu\t$1, 2(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"4:lbu\t$1, 3(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"5:lbu\t$1, 4(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"6:lbu\t$1, 5(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"7:lbu\t$1, 6(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"8:lbu\t$1, 7(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"li\t%1, 0\n"			    \
+			".set\tpop\n\t"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%1, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			STR(PTR)"\t5b, 11b\n\t"		    \
+			STR(PTR)"\t6b, 11b\n\t"		    \
+			STR(PTR)"\t7b, 11b\n\t"		    \
+			STR(PTR)"\t8b, 11b\n\t"		    \
+			".previous"			    \
+			: "=&r" (value), "=r" (res)	    \
+			: "r" (addr), "i" (-EFAULT));
+#endif /* CONFIG_CPU_MIPSR6 */
+
 
 #define     StoreHW(addr, value, res) \
 		__asm__ __volatile__ (                      \
@@ -228,6 +344,7 @@
 			: "=r" (res)                        \
 			: "r" (value), "r" (addr), "i" (-EFAULT));
 
+#ifndef CONFIG_CPU_MIPSR6
 #define     StoreW(addr, value, res)  \
 		__asm__ __volatile__ (                      \
 			"1:\t"user_swl("%1", "(%2)")"\n"    \
@@ -263,9 +380,82 @@
 			".previous"                         \
 		: "=r" (res)                                \
 		: "r" (value), "r" (addr), "i" (-EFAULT));
-#endif
+#else
+/* MIPSR6 has no swl and sdl instructions */
+#define     StoreW(addr, value, res)  \
+		__asm__ __volatile__ (                      \
+			".set\tpush\n\t"		    \
+			".set\tnoat\n\t"		    \
+			"1:"user_sb("%1", "3(%2)")"\n\t"    \
+			"srl\t$1, %1, 0x8\n\t"		    \
+			"2:"user_sb("$1", "2(%2)")"\n\t"    \
+			"srl\t$1, $1,  0x8\n\t"		    \
+			"3:"user_sb("$1", "1(%2)")"\n\t"    \
+			"srl\t$1, $1, 0x8\n\t"		    \
+			"4:"user_sb("$1", "0(%2)")"\n\t"    \
+			".set\tpop\n\t"			    \
+			"li\t%0, 0\n"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%0, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			".previous"			    \
+		: "=&r" (res)			    	    \
+		: "r" (value), "r" (addr), "i" (-EFAULT)    \
+		: "memory");
 
-#ifdef __LITTLE_ENDIAN
+#define     StoreDW(addr, value, res) \
+		__asm__ __volatile__ (                      \
+			".set\tpush\n\t"		    \
+			".set\tnoat\n\t"		    \
+			"1:sb\t%1, 7(%2)\n\t"    	    \
+			"dsrl\t$1, %1, 0x8\n\t"		    \
+			"2:sb\t$1, 6(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"3:sb\t$1, 5(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"4:sb\t$1, 4(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"5:sb\t$1, 3(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"6:sb\t$1, 2(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"7:sb\t$1, 1(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"8:sb\t$1, 0(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			".set\tpop\n\t"			    \
+			"li\t%0, 0\n"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%0, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			STR(PTR)"\t5b, 11b\n\t"		    \
+			STR(PTR)"\t6b, 11b\n\t"		    \
+			STR(PTR)"\t7b, 11b\n\t"		    \
+			STR(PTR)"\t8b, 11b\n\t"		    \
+			".previous"			    \
+		: "=&r" (res)			    	    \
+		: "r" (value), "r" (addr), "i" (-EFAULT)    \
+		: "memory");
+#endif /* CONFIG_CPU_MIPSR6 */
+
+#else /* __BIG_ENDIAN */
+
 #define     LoadHW(addr, value, res)  \
 		__asm__ __volatile__ (".set\tnoat\n"        \
 			"1:\t"user_lb("%0", "1(%2)")"\n"    \
@@ -286,6 +476,7 @@
 			: "=&r" (value), "=r" (res)         \
 			: "r" (addr), "i" (-EFAULT));
 
+#ifndef CONFIG_CPU_MIPSR6
 #define     LoadW(addr, value, res)   \
 		__asm__ __volatile__ (                      \
 			"1:\t"user_lwl("%0", "3(%2)")"\n"   \
@@ -303,6 +494,40 @@
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
 			: "r" (addr), "i" (-EFAULT));
+#else
+/* MIPSR6 has no lwl instruction */
+#define     LoadW(addr, value, res) \
+		__asm__ __volatile__ (			    \
+			".set\tpush\n"			    \
+			".set\tnoat\n\t"		    \
+			"1:"user_lb("%0", "3(%2)")"\n\t"    \
+			"2:"user_lbu("$1", "2(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"3:"user_lbu("$1", "1(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"4:"user_lbu("$1", "0(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"li\t%1, 0\n"			    \
+			".set\tpop\n"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%1, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			".previous"			    \
+			: "=&r" (value), "=r" (res)	    \
+			: "r" (addr), "i" (-EFAULT));
+#endif /* CONFIG_CPU_MIPSR6 */
+
 
 #define     LoadHWU(addr, value, res) \
 		__asm__ __volatile__ (                      \
@@ -326,6 +551,7 @@
 			: "=&r" (value), "=r" (res)         \
 			: "r" (addr), "i" (-EFAULT));
 
+#ifndef CONFIG_CPU_MIPSR6
 #define     LoadWU(addr, value, res)  \
 		__asm__ __volatile__ (                      \
 			"1:\t"user_lwl("%0", "3(%2)")"\n"   \
@@ -363,6 +589,86 @@
 			".previous"                         \
 			: "=&r" (value), "=r" (res)         \
 			: "r" (addr), "i" (-EFAULT));
+#else
+/* MIPSR6 has not lwl and ldl instructions */
+#define	    LoadWU(addr, value, res) \
+		__asm__ __volatile__ (			    \
+			".set\tpush\n\t"		    \
+			".set\tnoat\n\t"		    \
+			"1:"user_lbu("%0", "3(%2)")"\n\t"   \
+			"2:"user_lbu("$1", "2(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"3:"user_lbu("$1", "1(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"4:"user_lbu("$1", "0(%2)")"\n\t"   \
+			"sll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"li\t%1, 0\n"			    \
+			".set\tpop\n"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%1, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			".previous"			    \
+			: "=&r" (value), "=r" (res)	    \
+			: "r" (addr), "i" (-EFAULT));
+
+#define     LoadDW(addr, value, res)  \
+		__asm__ __volatile__ (			    \
+			".set\tpush\n\t"		    \
+			".set\tnoat\n\t"		    \
+			"1:lb\t%0, 7(%2)\n\t"    	    \
+			"2:lbu\t$1, 6(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"3:lbu\t$1, 5(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"4:lbu\t$1, 4(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"5:lbu\t$1, 3(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"6:lbu\t$1, 2(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"7:lbu\t$1, 1(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"8:lbu\t$1, 0(%2)\n\t"   	    \
+			"dsll\t%0, 0x8\n\t"		    \
+			"or\t%0, $1\n\t"		    \
+			"li\t%1, 0\n"			    \
+			".set\tpop\n\t"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%1, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			STR(PTR)"\t5b, 11b\n\t"		    \
+			STR(PTR)"\t6b, 11b\n\t"		    \
+			STR(PTR)"\t7b, 11b\n\t"		    \
+			STR(PTR)"\t8b, 11b\n\t"		    \
+			".previous"			    \
+			: "=&r" (value), "=r" (res)	    \
+			: "r" (addr), "i" (-EFAULT));
+#endif /* CONFIG_CPU_MIPSR6 */
 
 #define     StoreHW(addr, value, res) \
 		__asm__ __volatile__ (                      \
@@ -384,7 +690,7 @@
 			".previous"                         \
 			: "=r" (res)                        \
 			: "r" (value), "r" (addr), "i" (-EFAULT));
-
+#ifndef CONFIG_CPU_MIPSR6
 #define     StoreW(addr, value, res)  \
 		__asm__ __volatile__ (                      \
 			"1:\t"user_swl("%1", "3(%2)")"\n"   \
@@ -420,6 +726,79 @@
 			".previous"                         \
 		: "=r" (res)                                \
 		: "r" (value), "r" (addr), "i" (-EFAULT));
+#else
+/* MIPSR6 has no swl and sdl instructions */
+#define     StoreW(addr, value, res)  \
+		__asm__ __volatile__ (                      \
+			".set\tpush\n\t"		    \
+			".set\tnoat\n\t"		    \
+			"1:"user_sb("%1", "0(%2)")"\n\t"    \
+			"srl\t$1, %1, 0x8\n\t"		    \
+			"2:"user_sb("$1", "1(%2)")"\n\t"    \
+			"srl\t$1, $1,  0x8\n\t"		    \
+			"3:"user_sb("$1", "2(%2)")"\n\t"    \
+			"srl\t$1, $1, 0x8\n\t"		    \
+			"4:"user_sb("$1", "3(%2)")"\n\t"    \
+			".set\tpop\n\t"			    \
+			"li\t%0, 0\n"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%0, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			".previous"			    \
+		: "=&r" (res)			    	    \
+		: "r" (value), "r" (addr), "i" (-EFAULT)    \
+		: "memory");
+
+#define     StoreDW(addr, value, res) \
+		__asm__ __volatile__ (                      \
+			".set\tpush\n\t"		    \
+			".set\tnoat\n\t"		    \
+			"1:sb\t%1, 0(%2)\n\t"    	    \
+			"dsrl\t$1, %1, 0x8\n\t"		    \
+			"2:sb\t$1, 1(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"3:sb\t$1, 2(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"4:sb\t$1, 3(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"5:sb\t$1, 4(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"6:sb\t$1, 5(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"7:sb\t$1, 6(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			"8:sb\t$1, 7(%2)\n\t"    	    \
+			"dsrl\t$1, $1, 0x8\n\t"		    \
+			".set\tpop\n\t"			    \
+			"li\t%0, 0\n"			    \
+			"10:\n\t"			    \
+			".insn\n\t"			    \
+			".section\t.fixup,\"ax\"\n\t"	    \
+			"11:\tli\t%0, %3\n\t"		    \
+			"j\t10b\n\t"			    \
+			".previous\n\t"			    \
+			".section\t__ex_table,\"a\"\n\t"    \
+			STR(PTR)"\t1b, 11b\n\t"		    \
+			STR(PTR)"\t2b, 11b\n\t"		    \
+			STR(PTR)"\t3b, 11b\n\t"		    \
+			STR(PTR)"\t4b, 11b\n\t"		    \
+			STR(PTR)"\t5b, 11b\n\t"		    \
+			STR(PTR)"\t6b, 11b\n\t"		    \
+			STR(PTR)"\t7b, 11b\n\t"		    \
+			STR(PTR)"\t8b, 11b\n\t"		    \
+			".previous"			    \
+		: "=&r" (res)			    	    \
+		: "r" (value), "r" (addr), "i" (-EFAULT)    \
+		: "memory");
+#endif /* CONFIG_CPU_MIPSR6 */
 #endif
 
 static void emulate_load_store_insn(struct pt_regs *regs,
@@ -703,10 +1082,13 @@
 			break;
 		return;
 
+#ifndef CONFIG_CPU_MIPSR6
 	/*
 	 * COP2 is available to implementor for application specific use.
 	 * It's up to applications to register a notifier chain and do
 	 * whatever they have to do, including possible sending of signals.
+	 *
+	 * This instruction has been reallocated in Release 6
 	 */
 	case lwc2_op:
 		cu2_notifier_call_chain(CU2_LWC2_OP, regs);
@@ -723,7 +1105,7 @@
 	case sdc2_op:
 		cu2_notifier_call_chain(CU2_SDC2_OP, regs);
 		break;
-
+#endif
 	default:
 		/*
 		 * Pheeee...  We encountered an yet unknown instruction or
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index eeddc58..1e9e900 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -8,6 +8,7 @@
 
 obj-y			+= iomap.o
 obj-$(CONFIG_PCI)	+= iomap-pci.o
+lib-$(CONFIG_GENERIC_CSUM)	:= $(filter-out csum_partial.o, $(lib-y))
 
 obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o
 obj-$(CONFIG_CPU_R3000)		+= r3k_dump_tlb.o
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 5d3238a..9245e17 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -293,9 +293,14 @@
 	 and	t0, src, ADDRMASK
 	PREFS(	0, 2*32(src) )
 	PREFD(	1, 2*32(dst) )
+#ifndef CONFIG_CPU_MIPSR6
 	bnez	t1, .Ldst_unaligned\@
 	 nop
 	bnez	t0, .Lsrc_unaligned_dst_aligned\@
+#else
+	or	t0, t0, t1
+	bnez	t0, .Lcopy_unaligned_bytes\@
+#endif
 	/*
 	 * use delay slot for fall-through
 	 * src and dst are aligned; need to compute rem
@@ -376,6 +381,7 @@
 	bne	rem, len, 1b
 	.set	noreorder
 
+#ifndef CONFIG_CPU_MIPSR6
 	/*
 	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
 	 * A loop would do only a byte at a time with possible branch
@@ -477,6 +483,7 @@
 	bne	len, rem, 1b
 	.set	noreorder
 
+#endif /* !CONFIG_CPU_MIPSR6 */
 .Lcopy_bytes_checklen\@:
 	beqz	len, .Ldone\@
 	 nop
@@ -504,6 +511,22 @@
 .Ldone\@:
 	jr	ra
 	 nop
+
+#ifdef CONFIG_CPU_MIPSR6
+.Lcopy_unaligned_bytes\@:
+1:
+	COPY_BYTE(0)
+	COPY_BYTE(1)
+	COPY_BYTE(2)
+	COPY_BYTE(3)
+	COPY_BYTE(4)
+	COPY_BYTE(5)
+	COPY_BYTE(6)
+	COPY_BYTE(7)
+	ADD	src, src, 8
+	b	1b
+	 ADD	dst, dst, 8
+#endif /* CONFIG_CPU_MIPSR6 */
 	.if __memcpy == 1
 	END(memcpy)
 	.set __memcpy, 0
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index c8fe6b1..b8e63fd 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -111,6 +111,7 @@
 	.set		at
 #endif
 
+#ifndef CONFIG_CPU_MIPSR6
 	R10KCBARRIER(0(ra))
 #ifdef __MIPSEB__
 	EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@)	/* make word/dword aligned */
@@ -120,6 +121,30 @@
 	PTR_SUBU	a0, t0			/* long align ptr */
 	PTR_ADDU	a2, t0			/* correct size */
 
+#else /* CONFIG_CPU_MIPSR6 */
+#define STORE_BYTE(N)				\
+	EX(sb, a1, N(a0), .Lbyte_fixup\@);	\
+	beqz		t0, 0f;			\
+	PTR_ADDU	t0, 1;
+
+	PTR_ADDU	a2, t0			/* correct size */
+	PTR_ADDU	t0, 1
+	STORE_BYTE(0)
+	STORE_BYTE(1)
+#if LONGSIZE == 4
+	EX(sb, a1, 2(a0), .Lbyte_fixup\@)
+#else
+	STORE_BYTE(2)
+	STORE_BYTE(3)
+	STORE_BYTE(4)
+	STORE_BYTE(5)
+	EX(sb, a1, 6(a0), .Lbyte_fixup\@)
+#endif
+0:
+	ori		a0, STORMASK
+	xori		a0, STORMASK
+	PTR_ADDIU	a0, STORSIZE
+#endif /* CONFIG_CPU_MIPSR6 */
 1:	ori		t1, a2, 0x3f		/* # of full blocks */
 	xori		t1, 0x3f
 	beqz		t1, .Lmemset_partial\@	/* no block to fill */
@@ -159,6 +184,7 @@
 	andi		a2, STORMASK		/* At most one long to go */
 
 	beqz		a2, 1f
+#ifndef CONFIG_CPU_MIPSR6
 	PTR_ADDU	a0, a2			/* What's left */
 	R10KCBARRIER(0(ra))
 #ifdef __MIPSEB__
@@ -166,6 +192,22 @@
 #else
 	EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@)
 #endif
+#else
+	PTR_SUBU	t0, $0, a2
+	PTR_ADDIU	t0, 1
+	STORE_BYTE(0)
+	STORE_BYTE(1)
+#if LONGSIZE == 4
+	EX(sb, a1, 2(a0), .Lbyte_fixup\@)
+#else
+	STORE_BYTE(2)
+	STORE_BYTE(3)
+	STORE_BYTE(4)
+	STORE_BYTE(5)
+	EX(sb, a1, 6(a0), .Lbyte_fixup\@)
+#endif
+0:
+#endif
 1:	jr		ra
 	move		a2, zero
 
@@ -186,6 +228,11 @@
 	.hidden __memset
 	.endif
 
+.Lbyte_fixup\@:
+	PTR_SUBU	a2, $0, t0
+	jr		ra
+	 PTR_ADDIU	a2, 1
+
 .Lfirst_fixup\@:
 	jr	ra
 	nop
diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c
index be777d9..272af8a 100644
--- a/arch/mips/lib/mips-atomic.c
+++ b/arch/mips/lib/mips-atomic.c
@@ -15,7 +15,7 @@
 #include <linux/export.h>
 #include <linux/stringify.h>
 
-#ifndef CONFIG_CPU_MIPSR2
+#if !defined(CONFIG_CPU_MIPSR2) && !defined(CONFIG_CPU_MIPSR6)
 
 /*
  * For cli() we have to insert nops to make sure that the new value
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 9dfcd7f..b30bf65 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -48,6 +48,7 @@
 #include <asm/processor.h>
 #include <asm/fpu_emulator.h>
 #include <asm/fpu.h>
+#include <asm/mips-r2-to-r6-emul.h>
 
 #include "ieee754.h"
 
@@ -68,7 +69,7 @@
 #define modeindex(v) ((v) & FPU_CSR_RM)
 
 /* convert condition code register number to csr bit */
-static const unsigned int fpucondbit[8] = {
+const unsigned int fpucondbit[8] = {
 	FPU_CSR_COND0,
 	FPU_CSR_COND1,
 	FPU_CSR_COND2,
@@ -448,6 +449,9 @@
 				dec_insn.next_pc_inc;
 			/* Fall through */
 		case jr_op:
+			/* For R6, JR already emulated in jalr_op */
+			if (NO_R6EMU && insn.r_format.opcode == jr_op)
+				break;
 			*contpc = regs->regs[insn.r_format.rs];
 			return 1;
 		}
@@ -456,12 +460,18 @@
 		switch (insn.i_format.rt) {
 		case bltzal_op:
 		case bltzall_op:
+			if (NO_R6EMU && (insn.i_format.rs ||
+			    insn.i_format.rt == bltzall_op))
+				break;
+
 			regs->regs[31] = regs->cp0_epc +
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
 			/* Fall through */
-		case bltz_op:
 		case bltzl_op:
+			if (NO_R6EMU)
+				break;
+		case bltz_op:
 			if ((long)regs->regs[insn.i_format.rs] < 0)
 				*contpc = regs->cp0_epc +
 					dec_insn.pc_inc +
@@ -473,12 +483,18 @@
 			return 1;
 		case bgezal_op:
 		case bgezall_op:
+			if (NO_R6EMU && (insn.i_format.rs ||
+			    insn.i_format.rt == bgezall_op))
+				break;
+
 			regs->regs[31] = regs->cp0_epc +
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
 			/* Fall through */
-		case bgez_op:
 		case bgezl_op:
+			if (NO_R6EMU)
+				break;
+		case bgez_op:
 			if ((long)regs->regs[insn.i_format.rs] >= 0)
 				*contpc = regs->cp0_epc +
 					dec_insn.pc_inc +
@@ -505,8 +521,10 @@
 		/* Set microMIPS mode bit: XOR for jalx. */
 		*contpc ^= bit;
 		return 1;
-	case beq_op:
 	case beql_op:
+		if (NO_R6EMU)
+			break;
+	case beq_op:
 		if (regs->regs[insn.i_format.rs] ==
 		    regs->regs[insn.i_format.rt])
 			*contpc = regs->cp0_epc +
@@ -517,8 +535,10 @@
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
 		return 1;
-	case bne_op:
 	case bnel_op:
+		if (NO_R6EMU)
+			break;
+	case bne_op:
 		if (regs->regs[insn.i_format.rs] !=
 		    regs->regs[insn.i_format.rt])
 			*contpc = regs->cp0_epc +
@@ -529,8 +549,34 @@
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
 		return 1;
-	case blez_op:
 	case blezl_op:
+		if (NO_R6EMU)
+			break;
+	case blez_op:
+
+		/*
+		 * Compact branches for R6 for the
+		 * blez and blezl opcodes.
+		 * BLEZ  | rs = 0 | rt != 0  == BLEZALC
+		 * BLEZ  | rs = rt != 0      == BGEZALC
+		 * BLEZ  | rs != 0 | rt != 0 == BGEUC
+		 * BLEZL | rs = 0 | rt != 0  == BLEZC
+		 * BLEZL | rs = rt != 0      == BGEZC
+		 * BLEZL | rs != 0 | rt != 0 == BGEC
+		 *
+		 * For real BLEZ{,L}, rt is always 0.
+		 */
+		if (cpu_has_mips_r6 && insn.i_format.rt) {
+			if ((insn.i_format.opcode == blez_op) &&
+			    ((!insn.i_format.rs && insn.i_format.rt) ||
+			     (insn.i_format.rs == insn.i_format.rt)))
+				regs->regs[31] = regs->cp0_epc +
+					dec_insn.pc_inc;
+			*contpc = regs->cp0_epc + dec_insn.pc_inc +
+				dec_insn.next_pc_inc;
+
+			return 1;
+		}
 		if ((long)regs->regs[insn.i_format.rs] <= 0)
 			*contpc = regs->cp0_epc +
 				dec_insn.pc_inc +
@@ -540,8 +586,35 @@
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
 		return 1;
-	case bgtz_op:
 	case bgtzl_op:
+		if (NO_R6EMU)
+			break;
+	case bgtz_op:
+		/*
+		 * Compact branches for R6 for the
+		 * bgtz and bgtzl opcodes.
+		 * BGTZ  | rs = 0 | rt != 0  == BGTZALC
+		 * BGTZ  | rs = rt != 0      == BLTZALC
+		 * BGTZ  | rs != 0 | rt != 0 == BLTUC
+		 * BGTZL | rs = 0 | rt != 0  == BGTZC
+		 * BGTZL | rs = rt != 0      == BLTZC
+		 * BGTZL | rs != 0 | rt != 0 == BLTC
+		 *
+		 * *ZALC varint for BGTZ &&& rt != 0
+		 * For real GTZ{,L}, rt is always 0.
+		 */
+		if (cpu_has_mips_r6 && insn.i_format.rt) {
+			if ((insn.i_format.opcode == blez_op) &&
+			    ((!insn.i_format.rs && insn.i_format.rt) ||
+			     (insn.i_format.rs == insn.i_format.rt)))
+				regs->regs[31] = regs->cp0_epc +
+					dec_insn.pc_inc;
+			*contpc = regs->cp0_epc + dec_insn.pc_inc +
+				dec_insn.next_pc_inc;
+
+			return 1;
+		}
+
 		if ((long)regs->regs[insn.i_format.rs] > 0)
 			*contpc = regs->cp0_epc +
 				dec_insn.pc_inc +
@@ -551,6 +624,16 @@
 				dec_insn.pc_inc +
 				dec_insn.next_pc_inc;
 		return 1;
+	case cbcond0_op:
+	case cbcond1_op:
+		if (!cpu_has_mips_r6)
+			break;
+		if (insn.i_format.rt && !insn.i_format.rs)
+			regs->regs[31] = regs->cp0_epc + 4;
+		*contpc = regs->cp0_epc + dec_insn.pc_inc +
+			dec_insn.next_pc_inc;
+
+		return 1;
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 	case lwc2_op: /* This is bbit0 on Octeon */
 		if ((regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) == 0)
@@ -576,9 +659,73 @@
 		else
 			*contpc = regs->cp0_epc + 8;
 		return 1;
+#else
+	case bc6_op:
+		/*
+		 * Only valid for MIPS R6 but we can still end up
+		 * here from a broken userland so just tell emulator
+		 * this is not a branch and let it break later on.
+		 */
+		if  (!cpu_has_mips_r6)
+			break;
+		*contpc = regs->cp0_epc + dec_insn.pc_inc +
+			dec_insn.next_pc_inc;
+
+		return 1;
+	case balc6_op:
+		if (!cpu_has_mips_r6)
+			break;
+		regs->regs[31] = regs->cp0_epc + 4;
+		*contpc = regs->cp0_epc + dec_insn.pc_inc +
+			dec_insn.next_pc_inc;
+
+		return 1;
+	case beqzcjic_op:
+		if (!cpu_has_mips_r6)
+			break;
+		*contpc = regs->cp0_epc + dec_insn.pc_inc +
+			dec_insn.next_pc_inc;
+
+		return 1;
+	case bnezcjialc_op:
+		if (!cpu_has_mips_r6)
+			break;
+		if (!insn.i_format.rs)
+			regs->regs[31] = regs->cp0_epc + 4;
+		*contpc = regs->cp0_epc + dec_insn.pc_inc +
+			dec_insn.next_pc_inc;
+
+		return 1;
 #endif
 	case cop0_op:
 	case cop1_op:
+		/* Need to check for R6 bc1nez and bc1eqz branches */
+		if (cpu_has_mips_r6 &&
+		    ((insn.i_format.rs == bc1eqz_op) ||
+		     (insn.i_format.rs == bc1nez_op))) {
+			bit = 0;
+			switch (insn.i_format.rs) {
+			case bc1eqz_op:
+				if (get_fpr32(&current->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1)
+				    bit = 1;
+				break;
+			case bc1nez_op:
+				if (!(get_fpr32(&current->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1))
+				    bit = 1;
+				break;
+			}
+			if (bit)
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					(insn.i_format.simmediate << 2);
+			else
+				*contpc = regs->cp0_epc +
+					dec_insn.pc_inc +
+					dec_insn.next_pc_inc;
+
+			return 1;
+		}
+		/* R2/R6 compatible cop1 instruction. Fall through */
 	case cop2_op:
 	case cop1x_op:
 		if (insn.i_format.rs == bc_op) {
@@ -1414,14 +1561,14 @@
 		 * achieve full IEEE-754 accuracy - however this emulator does.
 		 */
 		case frsqrt_op:
-			if (!cpu_has_mips_4_5_r2)
+			if (!cpu_has_mips_4_5_r2_r6)
 				return SIGILL;
 
 			handler.u = fpemu_sp_rsqrt;
 			goto scopuop;
 
 		case frecip_op:
-			if (!cpu_has_mips_4_5_r2)
+			if (!cpu_has_mips_4_5_r2_r6)
 				return SIGILL;
 
 			handler.u = fpemu_sp_recip;
@@ -1616,13 +1763,13 @@
 		 * achieve full IEEE-754 accuracy - however this emulator does.
 		 */
 		case frsqrt_op:
-			if (!cpu_has_mips_4_5_r2)
+			if (!cpu_has_mips_4_5_r2_r6)
 				return SIGILL;
 
 			handler.u = fpemu_dp_rsqrt;
 			goto dcopuop;
 		case frecip_op:
-			if (!cpu_has_mips_4_5_r2)
+			if (!cpu_has_mips_4_5_r2_r6)
 				return SIGILL;
 
 			handler.u = fpemu_dp_recip;
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index dd261df..3f80596 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -794,7 +794,7 @@
 		__asm__ __volatile__ (
 			".set push\n\t"
 			".set noat\n\t"
-			".set mips3\n\t"
+			".set "MIPS_ISA_LEVEL"\n\t"
 #ifdef CONFIG_32BIT
 			"la	$at,1f\n\t"
 #endif
@@ -1255,6 +1255,7 @@
 	case CPU_P5600:
 	case CPU_PROAPTIV:
 	case CPU_M5150:
+	case CPU_QEMU_GENERIC:
 		if (!(read_c0_config7() & MIPS_CONF7_IAR) &&
 		    (c->icache.waysize > PAGE_SIZE))
 			c->icache.flags |= MIPS_CACHE_ALIASES;
@@ -1472,7 +1473,8 @@
 
 	default:
 		if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 |
-				    MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)) {
+				    MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R1 |
+				    MIPS_CPU_ISA_M64R2 | MIPS_CPU_ISA_M64R6)) {
 #ifdef CONFIG_MIPS_CPU_SCACHE
 			if (mips_sc_init ()) {
 				scache_size = c->scache.ways * c->scache.sets * c->scache.linesz;
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 70ab5d6..7ff8637 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
+#include <linux/ratelimit.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
@@ -28,6 +29,8 @@
 #include <asm/highmem.h>		/* For VMALLOC_END */
 #include <linux/kdebug.h>
 
+int show_unhandled_signals = 1;
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -44,6 +47,8 @@
 	int fault;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
+	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
+
 #if 0
 	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
 	       current->comm, current->pid, field, address, write,
@@ -203,15 +208,21 @@
 	if (user_mode(regs)) {
 		tsk->thread.cp0_badvaddr = address;
 		tsk->thread.error_code = write;
-#if 0
-		printk("do_page_fault() #2: sending SIGSEGV to %s for "
-		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
-		       tsk->comm,
-		       write ? "write access to" : "read access from",
-		       field, address,
-		       field, (unsigned long) regs->cp0_epc,
-		       field, (unsigned long) regs->regs[31]);
-#endif
+		if (show_unhandled_signals &&
+		    unhandled_signal(tsk, SIGSEGV) &&
+		    __ratelimit(&ratelimit_state)) {
+			pr_info("\ndo_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx",
+				tsk->comm,
+				write ? "write access to" : "read access from",
+				field, address);
+			pr_info("epc = %0*lx in", field,
+				(unsigned long) regs->cp0_epc);
+			print_vma_addr(" ", regs->cp0_epc);
+			pr_info("ra  = %0*lx in", field,
+				(unsigned long) regs->regs[31]);
+			print_vma_addr(" ", regs->regs[31]);
+			pr_info("\n");
+		}
 		info.si_signo = SIGSEGV;
 		info.si_errno = 0;
 		/* info.si_code has been set above */
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index b611102..3f85f92 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -72,6 +72,20 @@
 #define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
 #define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
 
+/*
+ * R6 has a limited offset of the pref instruction.
+ * Skip it if the offset is more than 9 bits.
+ */
+#define _uasm_i_pref(a, b, c, d)		\
+do {						\
+	if (cpu_has_mips_r6) {			\
+		if (c <= 0xff && c >= -0x100)	\
+			uasm_i_pref(a, b, c, d);\
+	} else {				\
+		uasm_i_pref(a, b, c, d);	\
+	}					\
+} while(0)
+
 static int pref_bias_clear_store;
 static int pref_bias_copy_load;
 static int pref_bias_copy_store;
@@ -178,7 +192,15 @@
 			pref_bias_copy_load = 256;
 			pref_bias_copy_store = 128;
 			pref_src_mode = Pref_LoadStreamed;
-			pref_dst_mode = Pref_PrepareForStore;
+			if (cpu_has_mips_r6)
+				/*
+				 * Bit 30 (Pref_PrepareForStore) has been
+				 * removed from MIPS R6. Use bit 5
+				 * (Pref_StoreStreamed).
+				 */
+				pref_dst_mode = Pref_StoreStreamed;
+			else
+				pref_dst_mode = Pref_PrepareForStore;
 			break;
 		}
 	} else {
@@ -214,7 +236,7 @@
 		return;
 
 	if (pref_bias_clear_store) {
-		uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
+		_uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
 			    A0);
 	} else if (cache_line_size == (half_clear_loop_size << 1)) {
 		if (cpu_has_cache_cdex_s) {
@@ -357,7 +379,7 @@
 		return;
 
 	if (pref_bias_copy_load)
-		uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
+		_uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
 }
 
 static inline void build_copy_store_pref(u32 **buf, int off)
@@ -366,7 +388,7 @@
 		return;
 
 	if (pref_bias_copy_store) {
-		uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
+		_uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
 			    A0);
 	} else if (cache_line_size == (half_copy_loop_size << 1)) {
 		if (cpu_has_cache_cdex_s) {
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 99eb8fa..4ceafd1 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -81,6 +81,7 @@
 	case CPU_PROAPTIV:
 	case CPU_P5600:
 	case CPU_BMIPS5000:
+	case CPU_QEMU_GENERIC:
 		if (config2 & (1 << 12))
 			return 0;
 	}
@@ -104,7 +105,8 @@
 
 	/* Ignore anything but MIPSxx processors */
 	if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 |
-			      MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)))
+			      MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R1 |
+			      MIPS_CPU_ISA_M64R2 | MIPS_CPU_ISA_M64R6)))
 		return 0;
 
 	/* Does this MIPS32/MIPS64 CPU have a config2 register? */
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 30639a6..b2afa49 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -485,13 +485,11 @@
 		 * Enable the no read, no exec bits, and enable large virtual
 		 * address.
 		 */
-		u32 pg = PG_RIE | PG_XIE;
 #ifdef CONFIG_64BIT
-		pg |= PG_ELPA;
+		set_c0_pagegrain(PG_RIE | PG_XIE | PG_ELPA);
+#else
+		set_c0_pagegrain(PG_RIE | PG_XIE);
 #endif
-		if (cpu_has_rixiex)
-			pg |= PG_IEC;
-		write_c0_pagegrain(pg);
 	}
 
 	temp_tlb_entry = current_cpu_data.tlbsize - 1;
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 3978a3d..d75ff73 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -501,7 +501,7 @@
 	case tlb_indexed: tlbw = uasm_i_tlbwi; break;
 	}
 
-	if (cpu_has_mips_r2) {
+	if (cpu_has_mips_r2_exec_hazard) {
 		/*
 		 * The architecture spec says an ehb is required here,
 		 * but a number of cores do not have the hazard and
@@ -514,6 +514,7 @@
 		case CPU_PROAPTIV:
 		case CPU_P5600:
 		case CPU_M5150:
+		case CPU_QEMU_GENERIC:
 			break;
 
 		default:
@@ -1952,7 +1953,7 @@
 
 		switch (current_cpu_type()) {
 		default:
-			if (cpu_has_mips_r2) {
+			if (cpu_has_mips_r2_exec_hazard) {
 				uasm_i_ehb(&p);
 
 		case CPU_CAVIUM_OCTEON:
@@ -2019,7 +2020,7 @@
 
 		switch (current_cpu_type()) {
 		default:
-			if (cpu_has_mips_r2) {
+			if (cpu_has_mips_r2_exec_hazard) {
 				uasm_i_ehb(&p);
 
 		case CPU_CAVIUM_OCTEON:
diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index 8399ddf..d78178d 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c
@@ -38,14 +38,6 @@
 	 | (e) << RE_SH						\
 	 | (f) << FUNC_SH)
 
-/* Define these when we are not the ISA the kernel is being compiled with. */
-#ifndef CONFIG_CPU_MICROMIPS
-#define MM_uasm_i_b(buf, off) ISAOPC(_beq)(buf, 0, 0, off)
-#define MM_uasm_i_beqz(buf, rs, off) ISAOPC(_beq)(buf, rs, 0, off)
-#define MM_uasm_i_beqzl(buf, rs, off) ISAOPC(_beql)(buf, rs, 0, off)
-#define MM_uasm_i_bnez(buf, rs, off) ISAOPC(_bne)(buf, rs, 0, off)
-#endif
-
 #include "uasm.c"
 
 static struct insn insn_table_MM[] = {
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 8e02291..b4a83789 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -38,13 +38,13 @@
 	 | (e) << RE_SH						\
 	 | (f) << FUNC_SH)
 
-/* Define these when we are not the ISA the kernel is being compiled with. */
-#ifdef CONFIG_CPU_MICROMIPS
-#define CL_uasm_i_b(buf, off) ISAOPC(_beq)(buf, 0, 0, off)
-#define CL_uasm_i_beqz(buf, rs, off) ISAOPC(_beq)(buf, rs, 0, off)
-#define CL_uasm_i_beqzl(buf, rs, off) ISAOPC(_beql)(buf, rs, 0, off)
-#define CL_uasm_i_bnez(buf, rs, off) ISAOPC(_bne)(buf, rs, 0, off)
-#endif
+/* This macro sets the non-variable bits of an R6 instruction. */
+#define M6(a, b, c, d, e)					\
+	((a) << OP_SH						\
+	 | (b) << RS_SH						\
+	 | (c) << RT_SH						\
+	 | (d) << SIMM9_SH					\
+	 | (e) << FUNC_SH)
 
 #include "uasm.c"
 
@@ -62,7 +62,11 @@
 	{ insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM },
 	{ insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM },
 	{ insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
+#ifndef CONFIG_CPU_MIPSR6
 	{ insn_cache,  M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+#else
+	{ insn_cache,  M6(cache_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9 },
+#endif
 	{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
 	{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
 	{ insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE },
@@ -85,13 +89,22 @@
 	{ insn_jal,  M(jal_op, 0, 0, 0, 0, 0),	JIMM },
 	{ insn_jalr,  M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD },
 	{ insn_j,  M(j_op, 0, 0, 0, 0, 0),  JIMM },
+#ifndef CONFIG_CPU_MIPSR6
 	{ insn_jr,  M(spec_op, 0, 0, 0, 0, jr_op),  RS },
+#else
+	{ insn_jr,  M(spec_op, 0, 0, 0, 0, jalr_op),  RS },
+#endif
 	{ insn_lb, M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
 	{ insn_ld,  M(ld_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD },
 	{ insn_lh,  M(lh_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+#ifndef CONFIG_CPU_MIPSR6
 	{ insn_lld,  M(lld_op, 0, 0, 0, 0, 0),	RS | RT | SIMM },
 	{ insn_ll,  M(ll_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+#else
+	{ insn_lld,  M6(spec3_op, 0, 0, 0, lld6_op),  RS | RT | SIMM9 },
+	{ insn_ll,  M6(spec3_op, 0, 0, 0, ll6_op),  RS | RT | SIMM9 },
+#endif
 	{ insn_lui,  M(lui_op, 0, 0, 0, 0, 0),	RT | SIMM },
 	{ insn_lw,  M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_lwx, M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD },
@@ -104,11 +117,20 @@
 	{ insn_mul, M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
 	{ insn_ori,  M(ori_op, 0, 0, 0, 0, 0),	RS | RT | UIMM },
 	{ insn_or,  M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD },
+#ifndef CONFIG_CPU_MIPSR6
 	{ insn_pref,  M(pref_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+#else
+	{ insn_pref,  M6(spec3_op, 0, 0, 0, pref6_op),  RS | RT | SIMM9 },
+#endif
 	{ insn_rfe,  M(cop0_op, cop_op, 0, 0, 0, rfe_op),  0 },
 	{ insn_rotr,  M(spec_op, 1, 0, 0, 0, srl_op),  RT | RD | RE },
+#ifndef CONFIG_CPU_MIPSR6
 	{ insn_scd,  M(scd_op, 0, 0, 0, 0, 0),	RS | RT | SIMM },
 	{ insn_sc,  M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
+#else
+	{ insn_scd,  M6(spec3_op, 0, 0, 0, scd6_op),  RS | RT | SIMM9 },
+	{ insn_sc,  M6(spec3_op, 0, 0, 0, sc6_op),  RS | RT | SIMM9 },
+#endif
 	{ insn_sd,  M(sd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_sll,  M(spec_op, 0, 0, 0, 0, sll_op),  RT | RD | RE },
 	{ insn_sllv,  M(spec_op, 0, 0, 0, 0, sllv_op),  RS | RT | RD },
@@ -198,6 +220,8 @@
 		op |= build_set(va_arg(ap, u32));
 	if (ip->fields & SCIMM)
 		op |= build_scimm(va_arg(ap, u32));
+	if (ip->fields & SIMM9)
+		op |= build_scimm9(va_arg(ap, u32));
 	va_end(ap);
 
 	**buf = op;
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 4adf302..319051c 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -24,7 +24,8 @@
 	JIMM = 0x080,
 	FUNC = 0x100,
 	SET = 0x200,
-	SCIMM = 0x400
+	SCIMM = 0x400,
+	SIMM9 = 0x800,
 };
 
 #define OP_MASK		0x3f
@@ -41,6 +42,8 @@
 #define FUNC_SH		0
 #define SET_MASK	0x7
 #define SET_SH		0
+#define SIMM9_SH	7
+#define SIMM9_MASK	0x1ff
 
 enum opcode {
 	insn_invalid,
@@ -116,6 +119,14 @@
 	return (arg & SCIMM_MASK) << SCIMM_SH;
 }
 
+static inline u32 build_scimm9(s32 arg)
+{
+	WARN((arg > 0xff || arg < -0x100),
+	       KERN_WARNING "Micro-assembler field overflow\n");
+
+	return (arg & SIMM9_MASK) << SIMM9_SH;
+}
+
 static inline u32 build_func(u32 arg)
 {
 	WARN(arg & ~FUNC_MASK, KERN_WARNING "Micro-assembler field overflow\n");
@@ -330,7 +341,7 @@
 void ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b,
 			    unsigned int c)
 {
-	if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X) && a <= 24 && a != 5)
+	if (CAVIUM_OCTEON_DCACHE_PREFETCH_WAR && a <= 24 && a != 5)
 		/*
 		 * As per erratum Core-14449, replace prefetches 0-4,
 		 * 6-24 with 'pref 28'.
diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c
index ec1dd24..e1d6989 100644
--- a/arch/mips/mti-sead3/sead3-time.c
+++ b/arch/mips/mti-sead3/sead3-time.c
@@ -72,7 +72,7 @@
 int get_c0_perfcount_int(void)
 {
 	if (gic_present)
-		return gic_get_c0_compare_int();
+		return gic_get_c0_perfcount_int();
 	if (cp0_perfcount_irq >= 0)
 		return MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
 	return -1;
diff --git a/arch/mips/pci/pci-bcm1480.c b/arch/mips/pci/pci-bcm1480.c
index f2355e3..f97e169 100644
--- a/arch/mips/pci/pci-bcm1480.c
+++ b/arch/mips/pci/pci-bcm1480.c
@@ -173,8 +173,8 @@
 }
 
 struct pci_ops bcm1480_pci_ops = {
-	.read = bcm1480_pcibios_read,
-	.write = bcm1480_pcibios_write,
+	.read	= bcm1480_pcibios_read,
+	.write	= bcm1480_pcibios_write,
 };
 
 static struct resource bcm1480_mem_resource = {
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index bedb72b..a04af55 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -327,8 +327,8 @@
 
 
 static struct pci_ops octeon_pci_ops = {
-	.read = octeon_read_config,
-	.write = octeon_write_config,
+	.read	= octeon_read_config,
+	.write	= octeon_write_config,
 };
 
 static struct resource octeon_pci_mem_resource = {
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index eb4a17b..1bb0b2b 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -1792,8 +1792,8 @@
 }
 
 static struct pci_ops octeon_pcie0_ops = {
-	.read = octeon_pcie0_read_config,
-	.write = octeon_pcie0_write_config,
+	.read	= octeon_pcie0_read_config,
+	.write	= octeon_pcie0_write_config,
 };
 
 static struct resource octeon_pcie0_mem_resource = {
@@ -1813,8 +1813,8 @@
 };
 
 static struct pci_ops octeon_pcie1_ops = {
-	.read = octeon_pcie1_read_config,
-	.write = octeon_pcie1_write_config,
+	.read	= octeon_pcie1_read_config,
+	.write	= octeon_pcie1_write_config,
 };
 
 static struct resource octeon_pcie1_mem_resource = {
@@ -1834,8 +1834,8 @@
 };
 
 static struct pci_ops octeon_dummy_ops = {
-	.read = octeon_dummy_read_config,
-	.write = octeon_dummy_write_config,
+	.read	= octeon_dummy_read_config,
+	.write	= octeon_dummy_write_config,
 };
 
 static struct resource octeon_dummy_mem_resource = {
diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c
index 8f1b86d..cdf1876 100644
--- a/arch/mips/sgi-ip22/ip22-gio.c
+++ b/arch/mips/sgi-ip22/ip22-gio.c
@@ -152,28 +152,6 @@
 	return 0;
 }
 
-static int gio_device_suspend(struct device *dev, pm_message_t state)
-{
-	struct gio_device *gio_dev = to_gio_device(dev);
-	struct gio_driver *drv = to_gio_driver(dev->driver);
-	int error = 0;
-
-	if (dev->driver && drv->suspend)
-		error = drv->suspend(gio_dev, state);
-	return error;
-}
-
-static int gio_device_resume(struct device *dev)
-{
-	struct gio_device *gio_dev = to_gio_device(dev);
-	struct gio_driver *drv = to_gio_driver(dev->driver);
-	int error = 0;
-
-	if (dev->driver && drv->resume)
-		error = drv->resume(gio_dev);
-	return error;
-}
-
 static void gio_device_shutdown(struct device *dev)
 {
 	struct gio_device *gio_dev = to_gio_device(dev);
@@ -400,8 +378,6 @@
 	.match	   = gio_bus_match,
 	.probe	   = gio_device_probe,
 	.remove	   = gio_device_remove,
-	.suspend   = gio_device_suspend,
-	.resume	   = gio_device_resume,
 	.shutdown  = gio_device_shutdown,
 	.uevent	   = gio_device_uevent,
 };
diff --git a/arch/mips/sgi-ip27/ip27-reset.c b/arch/mips/sgi-ip27/ip27-reset.c
index ac37e54..e44a15d 100644
--- a/arch/mips/sgi-ip27/ip27-reset.c
+++ b/arch/mips/sgi-ip27/ip27-reset.c
@@ -8,6 +8,7 @@
  * Copyright (C) 1997, 1998, 1999, 2000, 06 by Ralf Baechle
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
+#include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
@@ -25,9 +26,9 @@
 #include <asm/sn/gda.h>
 #include <asm/sn/sn0/hub.h>
 
-void machine_restart(char *command) __attribute__((noreturn));
-void machine_halt(void) __attribute__((noreturn));
-void machine_power_off(void) __attribute__((noreturn));
+void machine_restart(char *command) __noreturn;
+void machine_halt(void) __noreturn;
+void machine_power_off(void) __noreturn;
 
 #define noreturn while(1);				/* Silence gcc.	 */
 
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 1f823da..44b3470 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -8,6 +8,7 @@
  * Copyright (C) 2003 Guido Guenther <agx@sigxcpu.org>
  */
 
+#include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -35,9 +36,9 @@
 static struct timer_list power_timer, blink_timer, debounce_timer;
 static int has_panicked, shuting_down;
 
-static void ip32_machine_restart(char *command) __attribute__((noreturn));
-static void ip32_machine_halt(void) __attribute__((noreturn));
-static void ip32_machine_power_off(void) __attribute__((noreturn));
+static void ip32_machine_restart(char *command) __noreturn;
+static void ip32_machine_halt(void) __noreturn;
+static void ip32_machine_power_off(void) __noreturn;
 
 static void ip32_machine_restart(char *cmd)
 {
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 1daa7ca..9acdc08 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -192,14 +192,6 @@
 	}
 }
 
-unsigned int gic_get_timer_pending(void)
-{
-	unsigned int vpe_pending;
-
-	vpe_pending = gic_read(GIC_REG(VPE_LOCAL, GIC_VPE_PEND));
-	return vpe_pending & GIC_VPE_PEND_TIMER_MSK;
-}
-
 static void gic_bind_eic_interrupt(int irq, int set)
 {
 	/* Convert irq vector # to hw int # */
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index 420f77b..e6a6aac 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -243,7 +243,6 @@
 extern void gic_send_ipi(unsigned int intr);
 extern unsigned int plat_ipi_call_int_xlate(unsigned int);
 extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
-extern unsigned int gic_get_timer_pending(void);
 extern int gic_get_c0_compare_int(void);
 extern int gic_get_c0_perfcount_int(void);
 #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 89f6350..31891d9 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -185,4 +185,9 @@
 #define PR_MPX_ENABLE_MANAGEMENT  43
 #define PR_MPX_DISABLE_MANAGEMENT 44
 
+#define PR_SET_FP_MODE		45
+#define PR_GET_FP_MODE		46
+# define PR_FP_MODE_FR		(1 << 0)	/* 64b FP registers */
+# define PR_FP_MODE_FRE		(1 << 1)	/* 32b compatibility */
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index ea9c881..667b2e6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -97,6 +97,12 @@
 #ifndef MPX_DISABLE_MANAGEMENT
 # define MPX_DISABLE_MANAGEMENT(a)	(-EINVAL)
 #endif
+#ifndef GET_FP_MODE
+# define GET_FP_MODE(a)		(-EINVAL)
+#endif
+#ifndef SET_FP_MODE
+# define SET_FP_MODE(a,b)	(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -2219,6 +2225,12 @@
 			return -EINVAL;
 		error = MPX_DISABLE_MANAGEMENT(me);
 		break;
+	case PR_SET_FP_MODE:
+		error = SET_FP_MODE(me, arg2);
+		break;
+	case PR_GET_FP_MODE:
+		error = GET_FP_MODE(me);
+		break;
 	default:
 		error = -EINVAL;
 		break;