Merge tag 'sh-for-4.8' of git://git.libc.org/linux-sh

Pull arch/sh updates from Rich Felker:
 "These changes improve device tree support (including builtin DTB), add
  support for the J-Core J2 processor, an open source synthesizable
  reimplementation of the SH-2 ISA, resolve a longstanding sigcontext
  ABI mismatch issue, and fix various bugs including nommu-specific
  issues and minor regressions introduced in 4.6.

  The J-Core arch support is included here but to be usable it needs
  drivers that are waiting on approval/inclusion from their subsystem
  maintainers"

* tag 'sh-for-4.8' of git://git.libc.org/linux-sh: (23 commits)
  sh: add device tree source for J2 FPGA on Mimas v2 board
  sh: add defconfig for J-Core J2
  sh: use common clock framework with device tree boards
  sh: system call wire up
  sh: Delete unnecessary checks before the function call "mempool_destroy"
  sh: do not perform IPI-based cache flush except on boards that need it
  sh: add SMP support for J2
  sh: SMP support for SH2 entry.S
  sh: add working futex atomic ops on userspace addresses for smp
  sh: add J2 atomics using the cas.l instruction
  sh: add AT_HWCAP flag for J-Core cas.l instruction
  sh: add support for J-Core J2 processor
  sh: fix build regression with CONFIG_OF && !CONFIG_OF_FLATTREE
  sh: allow clocksource drivers to register sched_clock backends
  sh: make heartbeat driver explicitly non-modular
  sh: make board-secureedge5410 explicitly non-modular
  sh: make mm/asids-debugfs explicitly non-modular
  sh: make time.c explicitly non-modular
  sh: fix futex/robust_list on nommu models
  sh: disable aliased page logic on NOMMU models
  ...
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 0d5f3a9..ee08695 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -38,6 +38,7 @@
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST
+	select GENERIC_SCHED_CLOCK
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
@@ -45,6 +46,7 @@
 	select OLD_SIGSUSPEND
 	select OLD_SIGACTION
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_NMI
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
@@ -184,6 +186,12 @@
 	select CPU_SH2
 	select UNCACHED_MAPPING
 
+config CPU_J2
+	bool
+	select CPU_SH2
+	select OF
+	select OF_EARLY_FLATTREE
+
 config CPU_SH3
 	bool
 	select CPU_HAS_INTEVT
@@ -250,6 +258,12 @@
 	select CPU_SH2
 	select SYS_SUPPORTS_SH_CMT
 
+config CPU_SUBTYPE_J2
+	bool "Support J2 processor"
+	select CPU_J2
+	select SYS_SUPPORTS_SMP
+	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+
 # SH-2A Processor Support
 
 config CPU_SUBTYPE_SH7201
@@ -739,6 +753,26 @@
 
 menu "Boot options"
 
+config USE_BUILTIN_DTB
+	bool "Use builtin DTB"
+	default n
+	depends on SH_DEVICE_TREE
+	help
+	  Link a device tree blob for particular hardware into the kernel,
+	  suppressing use of the DTB pointer provided by the bootloader.
+	  This option should only be used with legacy bootloaders that are
+	  not capable of providing a DTB to the kernel, or for experimental
+	  hardware without stable device tree bindings.
+
+config BUILTIN_DTB_SOURCE
+	string "Source file for builtin DTB"
+	default ""
+	depends on USE_BUILTIN_DTB
+	help
+	  Base name (without suffix, relative to arch/sh/boot/dts) for the
+	  a DTS file that will be used to produce the DTB linked into the
+	  kernel.
+
 config ZERO_PAGE_OFFSET
 	hex
 	default "0x00010000" if PAGE_SIZE_64KB || SH_RTS7751R2D || \
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index bf5b3f5..0047666 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -31,6 +31,7 @@
 endif
 
 cflags-$(CONFIG_CPU_SH2)		:= $(call cc-option,-m2,)
+cflags-$(CONFIG_CPU_J2)			:= $(call cc-option,-mj2,)
 cflags-$(CONFIG_CPU_SH2A)		+= $(call cc-option,-m2a,) \
 					   $(call cc-option,-m2a-nofpu,) \
 					   $(call cc-option,-m4-nofpu,)
@@ -130,6 +131,8 @@
 core-y				+= arch/sh/kernel/ arch/sh/mm/ arch/sh/boards/
 core-$(CONFIG_SH_FPU_EMU)	+= arch/sh/math-emu/
 
+core-$(CONFIG_USE_BUILTIN_DTB)	+= arch/sh/boot/dts/
+
 # Mach groups
 machdir-$(CONFIG_SOLUTION_ENGINE)		+= mach-se
 machdir-$(CONFIG_SH_HP6XX)			+= mach-hp6xx
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index e0db046..e9c2c42 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -11,6 +11,7 @@
 	select OF
 	select OF_EARLY_FLATTREE
 	select CLKSRC_OF
+	select COMMON_CLK
 	select GENERIC_CALIBRATE_DELAY
 	help
 	  Select Board Described by Device Tree to build a kernel that
diff --git a/arch/sh/boards/board-secureedge5410.c b/arch/sh/boards/board-secureedge5410.c
index 98b3620..97ec67f 100644
--- a/arch/sh/boards/board-secureedge5410.c
+++ b/arch/sh/boards/board-secureedge5410.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/timer.h>
 #include <linux/delay.h>
-#include <linux/module.h>
 #include <linux/sched.h>
 #include <asm/machvec.h>
 #include <mach/secureedge5410.h>
@@ -49,7 +48,7 @@
 				irq);
 	return 0;
 }
-module_init(eraseconfig_init);
+device_initcall(eraseconfig_init);
 
 /*
  * Initialize IRQ setting
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 911ffb9..1fb6d57 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -124,13 +124,22 @@
 
 static void __init sh_of_setup(char **cmdline_p)
 {
+	struct device_node *root;
+
+#ifdef CONFIG_USE_BUILTIN_DTB
+	unflatten_and_copy_device_tree();
+#else
 	unflatten_device_tree();
+#endif
 
 	board_time_init = sh_of_time_init;
 
-	sh_mv.mv_name = of_flat_dt_get_machine_name();
-	if (!sh_mv.mv_name)
-		sh_mv.mv_name = "Unknown SH model";
+	sh_mv.mv_name = "Unknown SH model";
+	root = of_find_node_by_path("/");
+	if (root) {
+		of_property_read_string(root, "model", &sh_mv.mv_name);
+		of_node_put(root);
+	}
 
 	sh_of_smp_probe();
 }
diff --git a/arch/sh/boot/dts/Makefile b/arch/sh/boot/dts/Makefile
new file mode 100644
index 0000000..e5ce3a0
--- /dev/null
+++ b/arch/sh/boot/dts/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_USE_BUILTIN_DTB) += $(patsubst "%",%,$(CONFIG_BUILTIN_DTB_SOURCE)).dtb.o
+
+clean-files := *.dtb.S
diff --git a/arch/sh/boot/dts/j2_mimas_v2.dts b/arch/sh/boot/dts/j2_mimas_v2.dts
new file mode 100755
index 0000000..880de75
--- /dev/null
+++ b/arch/sh/boot/dts/j2_mimas_v2.dts
@@ -0,0 +1,96 @@
+/dts-v1/;
+
+/ {
+	compatible = "jcore,j2-soc";
+	model = "J2 FPGA SoC on Mimas v2 board";
+
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	interrupt-parent = <&aic>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "jcore,j2";
+			reg = <0>;
+			clock-frequency = <50000000>;
+			d-cache-size = <8192>;
+			i-cache-size = <8192>;
+			d-cache-block-size = <16>;
+			i-cache-block-size = <16>;
+		};
+	};
+
+	memory@10000000 {
+		device_type = "memory";
+		reg = <0x10000000 0x4000000>;
+	};
+
+	aliases {
+		serial0 = &uart0;
+		spi0 = &spi0;
+	};
+
+	chosen {
+		stdout-path = "serial0";
+	};
+
+	soc@abcd0000 {
+		compatible = "simple-bus";
+		ranges = <0 0xabcd0000 0x100000>;
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		aic: interrupt-controller@200 {
+			compatible = "jcore,aic1";
+			reg = <0x200 0x10>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		cache-controller@c0 {
+			compatible = "jcore,cache";
+			reg = <0xc0 4>;
+		};
+
+		timer@200 {
+			compatible = "jcore,pit";
+			reg = <0x200 0x30>;
+			interrupts = <0x48>;
+		};
+
+		spi0: spi@40 {
+			compatible = "jcore,spi2";
+
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			spi-max-frequency = <25000000>;
+
+			reg = <0x40 0x8>;
+
+			sdcard@0 {
+				compatible = "mmc-spi-slot";
+				reg = <0>;
+				spi-max-frequency = <25000000>;
+				voltage-ranges = <3200 3400>;
+				mode = <0>;
+			};
+		};
+
+		uart0: serial@100 {
+			clock-frequency = <125000000>;
+			compatible = "xlnx,xps-uartlite-1.00.a";
+			current-speed = <19200>;
+			device_type = "serial";
+			interrupts = <0x12>;
+			port-number = <0>;
+			reg = <0x100 0x10>;
+		};
+	};
+};
diff --git a/arch/sh/configs/j2_defconfig b/arch/sh/configs/j2_defconfig
new file mode 100644
index 0000000..94d1eca
--- /dev/null
+++ b/arch/sh/configs/j2_defconfig
@@ -0,0 +1,40 @@
+CONFIG_SMP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_CPU_SUBTYPE_J2=y
+CONFIG_MEMORY_START=0x10000000
+CONFIG_MEMORY_SIZE=0x04000000
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_SH_DEVICE_TREE=y
+CONFIG_HZ_100=y
+CONFIG_CMDLINE_OVERWRITE=y
+CONFIG_CMDLINE="console=ttyUL0 earlycon"
+CONFIG_BINFMT_ELF_FDPIC=y
+CONFIG_BINFMT_FLAT=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_NETDEVICES=y
+CONFIG_SERIAL_UARTLITE=y
+CONFIG_SERIAL_UARTLITE_CONSOLE=y
+CONFIG_I2C=y
+CONFIG_SPI=y
+CONFIG_SPI_JCORE=y
+CONFIG_WATCHDOG=y
+CONFIG_MMC=y
+CONFIG_MMC_SPI=y
+CONFIG_CLKSRC_JCORE_PIT=y
+CONFIG_JCORE_AIC=y
+CONFIG_EXT4_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
+CONFIG_FAT_DEFAULT_UTF8=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_UTF8=y
diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c
index 7efc9c3..49bace4 100644
--- a/arch/sh/drivers/heartbeat.c
+++ b/arch/sh/drivers/heartbeat.c
@@ -19,7 +19,6 @@
  * for more details.
  */
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
@@ -139,26 +138,11 @@
 	return mod_timer(&hd->timer, jiffies + 1);
 }
 
-static int heartbeat_drv_remove(struct platform_device *pdev)
-{
-	struct heartbeat_data *hd = platform_get_drvdata(pdev);
-
-	del_timer_sync(&hd->timer);
-	iounmap(hd->base);
-
-	platform_set_drvdata(pdev, NULL);
-
-	if (!pdev->dev.platform_data)
-		kfree(hd);
-
-	return 0;
-}
-
 static struct platform_driver heartbeat_driver = {
 	.probe		= heartbeat_drv_probe,
-	.remove		= heartbeat_drv_remove,
 	.driver		= {
-		.name	= DRV_NAME,
+		.name			= DRV_NAME,
+		.suppress_bind_attrs	= true,
 	},
 };
 
@@ -167,14 +151,4 @@
 	printk(KERN_NOTICE DRV_NAME ": version %s loaded\n", DRV_VERSION);
 	return platform_driver_register(&heartbeat_driver);
 }
-
-static void __exit heartbeat_exit(void)
-{
-	platform_driver_unregister(&heartbeat_driver);
-}
-module_init(heartbeat_init);
-module_exit(heartbeat_exit);
-
-MODULE_VERSION(DRV_VERSION);
-MODULE_AUTHOR("Paul Mundt");
-MODULE_LICENSE("GPL v2");
+device_initcall(heartbeat_init);
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index c399e1c..8a7bd80 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -1,6 +1,12 @@
 #ifndef __ASM_SH_ATOMIC_H
 #define __ASM_SH_ATOMIC_H
 
+#if defined(CONFIG_CPU_J2)
+
+#include <asm-generic/atomic.h>
+
+#else
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
@@ -63,4 +69,6 @@
 	return c;
 }
 
+#endif /* CONFIG_CPU_J2 */
+
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 8a84e05..3c30b6e 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -29,6 +29,11 @@
 #define wmb()		mb()
 #define ctrl_barrier()	__icbi(PAGE_OFFSET)
 #else
+#if defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#define __smp_mb()	do { int tmp = 0; __asm__ __volatile__ ("cas.l %0,%0,@%1" : "+r"(tmp) : "z"(&tmp) : "memory", "t"); } while(0)
+#define __smp_rmb()	__smp_mb()
+#define __smp_wmb()	__smp_mb()
+#endif
 #define ctrl_barrier()	__asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
 #endif
 
diff --git a/arch/sh/include/asm/bitops-cas.h b/arch/sh/include/asm/bitops-cas.h
new file mode 100644
index 0000000..88f793c
--- /dev/null
+++ b/arch/sh/include/asm/bitops-cas.h
@@ -0,0 +1,93 @@
+#ifndef __ASM_SH_BITOPS_CAS_H
+#define __ASM_SH_BITOPS_CAS_H
+
+static inline unsigned __bo_cas(volatile unsigned *p, unsigned old, unsigned new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(p)
+		: "t", "memory" );
+	return new;
+}
+
+static inline void set_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old|mask) != old);
+}
+
+static inline void clear_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old&~mask) != old);
+}
+
+static inline void change_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old^mask) != old);
+}
+
+static inline int test_and_set_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old|mask) != old);
+
+	return !!(old & mask);
+}
+
+static inline int test_and_clear_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old&~mask) != old);
+
+	return !!(old & mask);
+}
+
+static inline int test_and_change_bit(int nr, volatile void *addr)
+{
+	unsigned mask, old;
+	volatile unsigned *a = addr;
+
+	a += nr >> 5;
+	mask = 1U << (nr & 0x1f);
+
+	do old = *a;
+	while (__bo_cas(a, old, old^mask) != old);
+
+	return !!(old & mask);
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+#endif /* __ASM_SH_BITOPS_CAS_H */
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index fc8e652..a8699d6 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -18,6 +18,8 @@
 #include <asm/bitops-op32.h>
 #elif defined(CONFIG_CPU_SH4A)
 #include <asm/bitops-llsc.h>
+#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#include <asm/bitops-cas.h>
 #else
 #include <asm-generic/bitops/atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
diff --git a/arch/sh/include/asm/cmpxchg-cas.h b/arch/sh/include/asm/cmpxchg-cas.h
new file mode 100644
index 0000000..d0d8664
--- /dev/null
+++ b/arch/sh/include/asm/cmpxchg-cas.h
@@ -0,0 +1,24 @@
+#ifndef __ASM_SH_CMPXCHG_CAS_H
+#define __ASM_SH_CMPXCHG_CAS_H
+
+static inline unsigned long
+__cmpxchg_u32(volatile u32 *m, unsigned long old, unsigned long new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(m)
+		: "t", "memory" );
+	return new;
+}
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+	unsigned long old;
+	do old = *m;
+	while (__cmpxchg_u32(m, old, val) != old);
+	return old;
+}
+
+#include <asm/cmpxchg-xchg.h>
+
+#endif /* __ASM_SH_CMPXCHG_CAS_H */
diff --git a/arch/sh/include/asm/cmpxchg-xchg.h b/arch/sh/include/asm/cmpxchg-xchg.h
index 7219719..1e881f5 100644
--- a/arch/sh/include/asm/cmpxchg-xchg.h
+++ b/arch/sh/include/asm/cmpxchg-xchg.h
@@ -21,7 +21,7 @@
 	int off = (unsigned long)ptr % sizeof(u32);
 	volatile u32 *p = ptr - off;
 #ifdef __BIG_ENDIAN
-	int bitoff = (sizeof(u32) - 1 - off) * BITS_PER_BYTE;
+	int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE;
 #else
 	int bitoff = off * BITS_PER_BYTE;
 #endif
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index 5225916..3dfe046 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -13,6 +13,8 @@
 #include <asm/cmpxchg-grb.h>
 #elif defined(CONFIG_CPU_SH4A)
 #include <asm/cmpxchg-llsc.h>
+#elif defined(CONFIG_CPU_J2) && defined(CONFIG_SMP)
+#include <asm/cmpxchg-cas.h>
 #else
 #include <asm/cmpxchg-irq.h>
 #endif
diff --git a/arch/sh/include/asm/futex-cas.h b/arch/sh/include/asm/futex-cas.h
new file mode 100644
index 0000000..267cb7a
--- /dev/null
+++ b/arch/sh/include/asm/futex-cas.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_SH_FUTEX_CAS_H
+#define __ASM_SH_FUTEX_CAS_H
+
+static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
+						   u32 __user *uaddr,
+						   u32 oldval, u32 newval)
+{
+	int err = 0;
+	__asm__ __volatile__(
+		"1:\n\t"
+		"cas.l	%2, %1, @r0\n"
+		"2:\n\t"
+#ifdef CONFIG_MMU
+		".section	.fixup,\"ax\"\n"
+		"3:\n\t"
+		"mov.l	4f, %0\n\t"
+		"jmp	@%0\n\t"
+		" mov	%3, %0\n\t"
+		".balign	4\n"
+		"4:	.long	2b\n\t"
+		".previous\n"
+		".section	__ex_table,\"a\"\n\t"
+		".long	1b, 3b\n\t"
+		".previous"
+#endif
+		:"+r" (err), "+r" (newval)
+		:"r" (oldval), "i" (-EFAULT), "z" (uaddr)
+		:"t", "memory");
+	if (err) return err;
+	*uval = newval;
+	return 0;
+}
+
+#endif /* __ASM_SH_FUTEX_CAS_H */
diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h
index 63d3312..ab01dbe 100644
--- a/arch/sh/include/asm/futex-irq.h
+++ b/arch/sh/include/asm/futex-irq.h
@@ -1,92 +1,6 @@
 #ifndef __ASM_SH_FUTEX_IRQ_H
 #define __ASM_SH_FUTEX_IRQ_H
 
-
-static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval + oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr,
-					  int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval | oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval & oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr,
-					   int *oldval)
-{
-	unsigned long flags;
-	int ret;
-
-	local_irq_save(flags);
-
-	ret = get_user(*oldval, uaddr);
-	if (!ret)
-		ret = put_user(*oldval ^ oparg, uaddr);
-
-	local_irq_restore(flags);
-
-	return ret;
-}
-
 static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
 						   u32 __user *uaddr,
 						   u32 oldval, u32 newval)
diff --git a/arch/sh/include/asm/futex-llsc.h b/arch/sh/include/asm/futex-llsc.h
new file mode 100644
index 0000000..23591703
--- /dev/null
+++ b/arch/sh/include/asm/futex-llsc.h
@@ -0,0 +1,41 @@
+#ifndef __ASM_SH_FUTEX_LLSC_H
+#define __ASM_SH_FUTEX_LLSC_H
+
+static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
+						   u32 __user *uaddr,
+						   u32 oldval, u32 newval)
+{
+	int err = 0;
+	__asm__ __volatile__(
+		"synco\n"
+		"1:\n\t"
+		"movli.l	@%2, r0\n\t"
+		"mov	r0, %1\n\t"
+		"cmp/eq	%1, %4\n\t"
+		"bf	2f\n\t"
+		"mov	%5, r0\n\t"
+		"movco.l	r0, @%2\n\t"
+		"bf	1b\n"
+		"2:\n\t"
+		"synco\n\t"
+#ifdef CONFIG_MMU
+		".section	.fixup,\"ax\"\n"
+		"3:\n\t"
+		"mov.l	4f, %0\n\t"
+		"jmp	@%0\n\t"
+		" mov	%3, %0\n\t"
+		".balign	4\n"
+		"4:	.long	2b\n\t"
+		".previous\n"
+		".section	__ex_table,\"a\"\n\t"
+		".long	1b, 3b\n\t"
+		".previous"
+#endif
+		:"+r" (err), "=&r" (*uval)
+		:"r" (uaddr), "i" (-EFAULT), "r" (oldval), "r" (newval)
+		:"t", "memory", "r0");
+	if (err) return err;
+	return 0;
+}
+
+#endif /* __ASM_SH_FUTEX_LLSC_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 7be39a6..d007874 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -7,62 +7,15 @@
 #include <linux/uaccess.h>
 #include <asm/errno.h>
 
-/* XXX: UP variants, fix for SH-4A and SMP.. */
+#if !defined(CONFIG_SMP)
 #include <asm/futex-irq.h>
-
-static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret;
-
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
-		return -EFAULT;
-
-	pagefault_disable();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-		ret = atomic_futex_op_xchg_set(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_ADD:
-		ret = atomic_futex_op_xchg_add(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_OR:
-		ret = atomic_futex_op_xchg_or(oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_ANDN:
-		ret = atomic_futex_op_xchg_and(~oparg, uaddr, &oldval);
-		break;
-	case FUTEX_OP_XOR:
-		ret = atomic_futex_op_xchg_xor(oparg, uaddr, &oldval);
-		break;
-	default:
-		ret = -ENOSYS;
-		break;
-	}
-
-	pagefault_enable();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-
-	return ret;
-}
+#elif defined(CONFIG_CPU_J2)
+#include <asm/futex-cas.h>
+#elif defined(CONFIG_CPU_SH4A)
+#include <asm/futex-llsc.h>
+#else
+#error SMP not supported on this configuration.
+#endif
 
 static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
@@ -74,5 +27,73 @@
 	return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
 }
 
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	u32 oparg = (encoded_op << 8) >> 20;
+	u32 cmparg = (encoded_op << 20) >> 20;
+	u32 oldval, newval, prev;
+	int ret;
+
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	pagefault_disable();
+
+	do {
+		if (op == FUTEX_OP_SET)
+			ret = oldval = 0;
+		else
+			ret = get_user(oldval, uaddr);
+
+		if (ret) break;
+
+		switch (op) {
+		case FUTEX_OP_SET:
+			newval = oparg;
+			break;
+		case FUTEX_OP_ADD:
+			newval = oldval + oparg;
+			break;
+		case FUTEX_OP_OR:
+			newval = oldval | oparg;
+			break;
+		case FUTEX_OP_ANDN:
+			newval = oldval & ~oparg;
+			break;
+		case FUTEX_OP_XOR:
+			newval = oldval ^ oparg;
+			break;
+		default:
+			ret = -ENOSYS;
+			break;
+		}
+
+		if (ret) break;
+
+		ret = futex_atomic_cmpxchg_inatomic(&prev, uaddr, oldval, newval);
+	} while (!ret && prev != oldval);
+
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+
+	return ret;
+}
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_FUTEX_H */
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 1506897..f9a0994 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -15,7 +15,7 @@
  */
 enum cpu_type {
 	/* SH-2 types */
-	CPU_SH7619,
+	CPU_SH7619, CPU_J2,
 
 	/* SH-2A types */
 	CPU_SH7201, CPU_SH7203, CPU_SH7206, CPU_SH7263, CPU_SH7264, CPU_SH7269,
diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h
new file mode 100644
index 0000000..c46e8cc
--- /dev/null
+++ b/arch/sh/include/asm/spinlock-cas.h
@@ -0,0 +1,117 @@
+/*
+ * include/asm-sh/spinlock-cas.h
+ *
+ * Copyright (C) 2015 SEI
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_SH_SPINLOCK_CAS_H
+#define __ASM_SH_SPINLOCK_CAS_H
+
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new)
+{
+	__asm__ __volatile__("cas.l %1,%0,@r0"
+		: "+r"(new)
+		: "r"(old), "z"(p)
+		: "t", "memory" );
+	return new;
+}
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+#define arch_spin_is_locked(x)		((x)->lock <= 0)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	while (!__sl_cas(&lock->lock, 1, 0));
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	__sl_cas(&lock->lock, 0, 1);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	return __sl_cas(&lock->lock, 1, 0);
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts but no interrupt
+ * writers. For those circumstances we can "mix" irq-safe locks - any writer
+ * needs to get a irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_read_can_lock(x)	((x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (!old || __sl_cas(&rw->lock, old, old-1) != old);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (__sl_cas(&rw->lock, old, old+1) != old);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	while (__sl_cas(&rw->lock, RW_LOCK_BIAS, 0) != RW_LOCK_BIAS);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__sl_cas(&rw->lock, 0, RW_LOCK_BIAS);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned old;
+	do old = rw->lock;
+	while (old && __sl_cas(&rw->lock, old, old-1) != old);
+	return !!old;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	return __sl_cas(&rw->lock, RW_LOCK_BIAS, 0) == RW_LOCK_BIAS;
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SH_SPINLOCK_CAS_H */
diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h
new file mode 100644
index 0000000..cec7814
--- /dev/null
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -0,0 +1,224 @@
+/*
+ * include/asm-sh/spinlock-llsc.h
+ *
+ * Copyright (C) 2002, 2003 Paul Mundt
+ * Copyright (C) 2006, 2007 Akio Idehara
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_SH_SPINLOCK_LLSC_H
+#define __ASM_SH_SPINLOCK_LLSC_H
+
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+#define arch_spin_is_locked(x)		((x)->lock <= 0)
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions.  They have a cost.
+ */
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned long tmp;
+	unsigned long oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_spin_lock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"cmp/pl		%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"mov		#1, %0 ! arch_spin_unlock	\n\t"
+		"mov.l		%0, @%1				\n\t"
+		: "=&z" (tmp)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_spin_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+
+	return oldval;
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts but no interrupt
+ * writers. For those circumstances we can "mix" irq-safe locks - any writer
+ * needs to get a irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_read_can_lock(x)	((x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_read_lock	\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		1b				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_read_unlock	\n\t"
+		"add		#1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! arch_write_lock	\n\t"
+		"cmp/hs		%2, %0				\n\t"
+		"bf		1b				\n\t"
+		"sub		%2, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__asm__ __volatile__ (
+		"mov.l		%1, @%0 ! arch_write_unlock	\n\t"
+		:
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_read_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		2f				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"2:						\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+
+	return (oldval > 0);
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! arch_write_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/hs		%3, %0				\n\t"
+		"bf		2f				\n\t"
+		"sub		%3, %0				\n\t"
+		"2:						\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+
+	return (oldval > (RW_LOCK_BIAS - 1));
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
+#endif /* __ASM_SH_SPINLOCK_LLSC_H */
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index 416834b..c2c61ea 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -11,222 +11,12 @@
 #ifndef __ASM_SH_SPINLOCK_H
 #define __ASM_SH_SPINLOCK_H
 
-/*
- * The only locking implemented here uses SH-4A opcodes. For others,
- * split this out as per atomic-*.h.
- */
-#ifndef CONFIG_CPU_SH4A
-#error "Need movli.l/movco.l for spinlocks"
+#if defined(CONFIG_CPU_SH4A)
+#include <asm/spinlock-llsc.h>
+#elif defined(CONFIG_CPU_J2)
+#include <asm/spinlock-cas.h>
+#else
+#error "The configured cpu type does not support spinlocks"
 #endif
 
-#include <asm/barrier.h>
-#include <asm/processor.h>
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- */
-
-#define arch_spin_is_locked(x)		((x)->lock <= 0)
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	smp_cond_load_acquire(&lock->lock, VAL > 0);
-}
-
-/*
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions.  They have a cost.
- */
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	unsigned long tmp;
-	unsigned long oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_spin_lock	\n\t"
-		"mov		%0, %1				\n\t"
-		"mov		#0, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"cmp/pl		%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"mov		#1, %0 ! arch_spin_unlock	\n\t"
-		"mov.l		%0, @%1				\n\t"
-		: "=&z" (tmp)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_spin_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"mov		#0, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&lock->lock)
-		: "t", "memory"
-	);
-
-	return oldval;
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts but no interrupt
- * writers. For those circumstances we can "mix" irq-safe locks - any writer
- * needs to get a irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-
-/**
- * read_can_lock - would read_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define arch_read_can_lock(x)	((x)->lock > 0)
-
-/**
- * write_can_lock - would write_trylock() succeed?
- * @lock: the rwlock in question.
- */
-#define arch_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
-
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_read_lock	\n\t"
-		"cmp/pl		%0				\n\t"
-		"bf		1b				\n\t"
-		"add		#-1, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_read_unlock	\n\t"
-		"add		#1, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%1, %0	! arch_write_lock	\n\t"
-		"cmp/hs		%2, %0				\n\t"
-		"bf		1b				\n\t"
-		"sub		%2, %0				\n\t"
-		"movco.l	%0, @%1				\n\t"
-		"bf		1b				\n\t"
-		: "=&z" (tmp)
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-	__asm__ __volatile__ (
-		"mov.l		%1, @%0 ! arch_write_unlock	\n\t"
-		:
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_read_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"cmp/pl		%0				\n\t"
-		"bf		2f				\n\t"
-		"add		#-1, %0				\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"2:						\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&rw->lock)
-		: "t", "memory"
-	);
-
-	return (oldval > 0);
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-	unsigned long tmp, oldval;
-
-	__asm__ __volatile__ (
-		"1:						\n\t"
-		"movli.l	@%2, %0	! arch_write_trylock	\n\t"
-		"mov		%0, %1				\n\t"
-		"cmp/hs		%3, %0				\n\t"
-		"bf		2f				\n\t"
-		"sub		%3, %0				\n\t"
-		"2:						\n\t"
-		"movco.l	%0, @%2				\n\t"
-		"bf		1b				\n\t"
-		"synco						\n\t"
-		: "=&z" (tmp), "=&r" (oldval)
-		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
-		: "t", "memory"
-	);
-
-	return (oldval > (RW_LOCK_BIAS - 1));
-}
-
-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* __ASM_SH_SPINLOCK_H */
diff --git a/arch/sh/include/uapi/asm/cpu-features.h b/arch/sh/include/uapi/asm/cpu-features.h
index 694abe4..2f1bc85 100644
--- a/arch/sh/include/uapi/asm/cpu-features.h
+++ b/arch/sh/include/uapi/asm/cpu-features.h
@@ -22,5 +22,6 @@
 #define CPU_HAS_L2_CACHE	0x0080	/* Secondary cache / URAM */
 #define CPU_HAS_OP32		0x0100	/* 32-bit instruction support */
 #define CPU_HAS_PTEAEX		0x0200	/* PTE ASID Extension support */
+#define CPU_HAS_CAS_L		0x0400	/* cas.l atomic compare-and-swap */
 
 #endif /* __ASM_SH_CPU_FEATURES_H */
diff --git a/arch/sh/include/uapi/asm/sigcontext.h b/arch/sh/include/uapi/asm/sigcontext.h
index 8ce1435..faa5d08 100644
--- a/arch/sh/include/uapi/asm/sigcontext.h
+++ b/arch/sh/include/uapi/asm/sigcontext.h
@@ -25,8 +25,6 @@
 	unsigned long sc_mach;
 	unsigned long sc_macl;
 
-#if defined(__SH4__) || defined(CONFIG_CPU_SH4) || \
-    defined(__SH2A__) || defined(CONFIG_CPU_SH2A)
 	/* FPU registers */
 	unsigned long sc_fpregs[16];
 	unsigned long sc_xfpregs[16];
@@ -34,7 +32,6 @@
 	unsigned int sc_fpul;
 	unsigned int sc_ownedfp;
 #endif
-#endif
 };
 
 #endif /* __ASM_SH_SIGCONTEXT_H */
diff --git a/arch/sh/include/uapi/asm/unistd_32.h b/arch/sh/include/uapi/asm/unistd_32.h
index d13a1d6..c801bde 100644
--- a/arch/sh/include/uapi/asm/unistd_32.h
+++ b/arch/sh/include/uapi/asm/unistd_32.h
@@ -380,7 +380,21 @@
 #define __NR_process_vm_writev	366
 #define __NR_kcmp		367
 #define __NR_finit_module	368
+#define __NR_sched_getattr	369
+#define __NR_sched_setattr	370
+#define __NR_renameat2		371
+#define __NR_seccomp		372
+#define __NR_getrandom		373
+#define __NR_memfd_create	374
+#define __NR_bpf		375
+#define __NR_execveat		376
+#define __NR_userfaultfd	377
+#define __NR_membarrier		378
+#define __NR_mlock2		379
+#define __NR_copy_file_range	380
+#define __NR_preadv2		381
+#define __NR_pwritev2		382
 
-#define NR_syscalls 369
+#define NR_syscalls 383
 
 #endif /* __ASM_SH_UNISTD_32_H */
diff --git a/arch/sh/include/uapi/asm/unistd_64.h b/arch/sh/include/uapi/asm/unistd_64.h
index 47ebd5b..ce0cb35 100644
--- a/arch/sh/include/uapi/asm/unistd_64.h
+++ b/arch/sh/include/uapi/asm/unistd_64.h
@@ -400,7 +400,21 @@
 #define __NR_process_vm_writev	377
 #define __NR_kcmp		378
 #define __NR_finit_module	379
+#define __NR_sched_getattr	380
+#define __NR_sched_setattr	381
+#define __NR_renameat2		382
+#define __NR_seccomp		383
+#define __NR_getrandom		384
+#define __NR_memfd_create	385
+#define __NR_bpf		386
+#define __NR_execveat		387
+#define __NR_userfaultfd	388
+#define __NR_membarrier		389
+#define __NR_mlock2		390
+#define __NR_copy_file_range	391
+#define __NR_preadv2		392
+#define __NR_pwritev2		393
 
-#define NR_syscalls 380
+#define NR_syscalls 394
 
 #endif /* __ASM_SH_UNISTD_64_H */
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 4187cf4..fca9b1e 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -24,11 +24,13 @@
 {
 	int ret;
 
+#ifndef CONFIG_COMMON_CLK
 	ret = arch_clk_init();
 	if (unlikely(ret)) {
 		pr_err("%s: CPU clock registration failed.\n", __func__);
 		return ret;
 	}
+#endif
 
 	if (sh_mv.mv_clk_init) {
 		ret = sh_mv.mv_clk_init();
@@ -39,11 +41,13 @@
 		}
 	}
 
+#ifndef CONFIG_COMMON_CLK
 	/* Kick the child clocks.. */
 	recalculate_root_clocks();
 
 	/* Enable the necessary init clocks */
 	clk_enable_init_clocks();
+#endif
 
 	return ret;
 }
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index 0d7360d..c8b3be1 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -106,7 +106,7 @@
 /*
  * Generic first-level cache init
  */
-#ifdef CONFIG_SUPERH32
+#if defined(CONFIG_SUPERH32) && !defined(CONFIG_CPU_J2)
 static void cache_init(void)
 {
 	unsigned long ccr, flags;
@@ -323,9 +323,13 @@
 	cache_init();
 
 	if (raw_smp_processor_id() == 0) {
+#ifdef CONFIG_MMU
 		shm_align_mask = max_t(unsigned long,
 				       current_cpu_data.dcache.way_size - 1,
 				       PAGE_SIZE - 1);
+#else
+		shm_align_mask = PAGE_SIZE - 1;
+#endif
 
 		/* Boot CPU sets the cache shape */
 		detect_cache_shape();
diff --git a/arch/sh/kernel/cpu/proc.c b/arch/sh/kernel/cpu/proc.c
index 9e6624c..4df4b28 100644
--- a/arch/sh/kernel/cpu/proc.c
+++ b/arch/sh/kernel/cpu/proc.c
@@ -27,6 +27,7 @@
 	[CPU_MXG]	= "MX-G",	[CPU_SH7723]	= "SH7723",
 	[CPU_SH7366]	= "SH7366",	[CPU_SH7724]	= "SH7724",
 	[CPU_SH7372]	= "SH7372",	[CPU_SH7734]	= "SH7734",
+	[CPU_J2]	= "J2",
 	[CPU_SH_NONE]	= "Unknown"
 };
 
diff --git a/arch/sh/kernel/cpu/sh2/Makefile b/arch/sh/kernel/cpu/sh2/Makefile
index f0f059a..904c428 100644
--- a/arch/sh/kernel/cpu/sh2/Makefile
+++ b/arch/sh/kernel/cpu/sh2/Makefile
@@ -5,3 +5,7 @@
 obj-y	:= ex.o probe.o entry.o
 
 obj-$(CONFIG_CPU_SUBTYPE_SH7619) += setup-sh7619.o clock-sh7619.o
+
+# SMP setup
+smp-$(CONFIG_CPU_J2)			:= smp-j2.o
+obj-$(CONFIG_SMP)			+= $(smp-y)
diff --git a/arch/sh/kernel/cpu/sh2/entry.S b/arch/sh/kernel/cpu/sh2/entry.S
index a150595..1ee0a6e 100644
--- a/arch/sh/kernel/cpu/sh2/entry.S
+++ b/arch/sh/kernel/cpu/sh2/entry.S
@@ -47,6 +47,13 @@
 	mov.l	r3,@-sp
 	cli
 	mov.l	$cpu_mode,r2
+#ifdef CONFIG_SMP
+	mov.l	$cpuid,r3
+	mov.l	@r3,r3
+	mov.l	@r3,r3
+	shll2	r3
+	add	r3,r2
+#endif
 	mov.l	@r2,r0
 	mov.l	@(5*4,r15),r3	! previous SR
 	or	r0,r3		! set MD
@@ -57,6 +64,13 @@
 	mov.l	__md_bit,r0
 	mov.l	r0,@r2		! enter kernel mode
 	mov.l	$current_thread_info,r2
+#ifdef CONFIG_SMP
+	mov.l	$cpuid,r0
+	mov.l	@r0,r0
+	mov.l	@r0,r0
+	shll2	r0
+	add	r0,r2
+#endif
 	mov.l	@r2,r2
 	mov	#(THREAD_SIZE >> 8),r0
 	shll8	r0
@@ -147,6 +161,11 @@
 	mov	#31,r8
 	cmp/hs	r8,r9
 	bt	trap_entry	! 64 > vec >= 31  is trap
+#ifdef CONFIG_CPU_J2
+	mov	#16,r8
+	cmp/hs	r8,r9
+	bt	interrupt_entry	! 31 > vec >= 16 is interrupt
+#endif
 
 	mov.l	4f,r8
 	mov	r9,r4
@@ -260,6 +279,13 @@
 	lds.l	@r0+,macl
 	mov	r15,r0
 	mov.l	$cpu_mode,r2
+#ifdef CONFIG_SMP
+	mov.l	$cpuid,r3
+	mov.l	@r3,r3
+	mov.l	@r3,r3
+	shll2	r3
+	add	r3,r2
+#endif
 	mov	#OFF_SR,r3
 	mov.l	@(r0,r3),r1
 	mov.l	__md_bit,r3
@@ -276,6 +302,13 @@
 	mov.l	r1,@r2				! set pc
 	get_current_thread_info r0, r1
 	mov.l	$current_thread_info,r1
+#ifdef CONFIG_SMP
+	mov.l	$cpuid,r3
+	mov.l	@r3,r3
+	mov.l	@r3,r3
+	shll2	r3
+	add	r3,r1
+#endif
 	mov.l	r0,@r1
 	mov.l	@r15+,r0
 	mov.l	@r15+,r1
@@ -303,19 +336,41 @@
 	.long	__current_thread_info
 $cpu_mode:	
 	.long	__cpu_mode
+#ifdef CONFIG_SMP
+$cpuid:
+	.long sh2_cpuid_addr
+#endif
 		
 ! common exception handler
 #include "../../entry-common.S"
+
+#ifdef CONFIG_NR_CPUS
+#define NR_CPUS CONFIG_NR_CPUS
+#else
+#define NR_CPUS 1
+#endif
 	
 	.data
 ! cpu operation mode 
 ! bit30 = MD (compatible SH3/4)
 __cpu_mode:
+	.rept	NR_CPUS
 	.long	0x40000000
+	.endr
+
+#ifdef CONFIG_SMP
+.global sh2_cpuid_addr
+sh2_cpuid_addr:
+	.long	dummy_cpuid
+dummy_cpuid:
+	.long	0
+#endif
 		
 	.section	.bss
 __current_thread_info:
+	.rept	NR_CPUS
 	.long	0
+	.endr
 
 ENTRY(exception_handling_table)
 	.space	4*32
diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c
index 6c687ae..4205f6d 100644
--- a/arch/sh/kernel/cpu/sh2/probe.c
+++ b/arch/sh/kernel/cpu/sh2/probe.c
@@ -10,10 +10,27 @@
  * for more details.
  */
 #include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <linux/smp.h>
+#include <linux/io.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 
-void cpu_probe(void)
+#if defined(CONFIG_CPU_J2)
+extern u32 __iomem *j2_ccr_base;
+static int __init scan_cache(unsigned long node, const char *uname,
+			     int depth, void *data)
+{
+	if (!of_flat_dt_is_compatible(node, "jcore,cache"))
+		return 0;
+
+	j2_ccr_base = (u32 __iomem *)of_flat_dt_translate_address(node);
+
+	return 1;
+}
+#endif
+
+void __ref cpu_probe(void)
 {
 #if defined(CONFIG_CPU_SUBTYPE_SH7619)
 	boot_cpu_data.type			= CPU_SH7619;
@@ -24,10 +41,30 @@
 	boot_cpu_data.dcache.linesz		= L1_CACHE_BYTES;
 	boot_cpu_data.dcache.flags		= 0;
 #endif
+
+#if defined(CONFIG_CPU_J2)
+	unsigned cpu = hard_smp_processor_id();
+	if (cpu == 0) of_scan_flat_dt(scan_cache, NULL);
+	if (j2_ccr_base) __raw_writel(0x80000303, j2_ccr_base + 4*cpu);
+	if (cpu != 0) return;
+	boot_cpu_data.type			= CPU_J2;
+
+	/* These defaults are appropriate for the original/current
+	 * J2 cache. Once there is a proper framework for getting cache
+	 * info from device tree, we should switch to that. */
+	boot_cpu_data.dcache.ways		= 1;
+	boot_cpu_data.dcache.sets		= 256;
+	boot_cpu_data.dcache.entry_shift	= 5;
+	boot_cpu_data.dcache.linesz		= 32;
+	boot_cpu_data.dcache.flags		= 0;
+
+	boot_cpu_data.flags |= CPU_HAS_CAS_L;
+#else
 	/*
 	 * SH-2 doesn't have separate caches
 	 */
 	boot_cpu_data.dcache.flags |= SH_CACHE_COMBINED;
+#endif
 	boot_cpu_data.icache = boot_cpu_data.dcache;
 	boot_cpu_data.family = CPU_FAMILY_SH2;
 }
diff --git a/arch/sh/kernel/cpu/sh2/smp-j2.c b/arch/sh/kernel/cpu/sh2/smp-j2.c
new file mode 100644
index 0000000..6ccd7e4
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh2/smp-j2.c
@@ -0,0 +1,139 @@
+/*
+ * SMP support for J2 processor
+ *
+ * Copyright (C) 2015-2016 Smart Energy Instruments, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/cmpxchg.h>
+
+DEFINE_PER_CPU(unsigned, j2_ipi_messages);
+
+extern u32 *sh2_cpuid_addr;
+static u32 *j2_ipi_trigger;
+static int j2_ipi_irq;
+
+static irqreturn_t j2_ipi_interrupt_handler(int irq, void *arg)
+{
+	unsigned cpu = hard_smp_processor_id();
+	volatile unsigned *pmsg = &per_cpu(j2_ipi_messages, cpu);
+	unsigned messages, i;
+
+	do messages = *pmsg;
+	while (cmpxchg(pmsg, messages, 0) != messages);
+
+	if (!messages) return IRQ_NONE;
+
+	for (i=0; i<SMP_MSG_NR; i++)
+		if (messages & (1U<<i))
+			smp_message_recv(i);
+
+	return IRQ_HANDLED;
+}
+
+static void j2_smp_setup(void)
+{
+}
+
+static void j2_prepare_cpus(unsigned int max_cpus)
+{
+	struct device_node *np;
+	unsigned i, max = 1;
+
+	np = of_find_compatible_node(NULL, NULL, "jcore,ipi-controller");
+	if (!np)
+		goto out;
+
+	j2_ipi_irq = irq_of_parse_and_map(np, 0);
+	j2_ipi_trigger = of_iomap(np, 0);
+	if (!j2_ipi_irq || !j2_ipi_trigger)
+		goto out;
+
+	np = of_find_compatible_node(NULL, NULL, "jcore,cpuid-mmio");
+	if (!np)
+		goto out;
+
+	sh2_cpuid_addr = of_iomap(np, 0);
+	if (!sh2_cpuid_addr)
+		goto out;
+
+	if (request_irq(j2_ipi_irq, j2_ipi_interrupt_handler, IRQF_PERCPU,
+			"ipi", (void *)j2_ipi_interrupt_handler) != 0)
+		goto out;
+
+	max = max_cpus;
+out:
+	/* Disable any cpus past max_cpus, or all secondaries if we didn't
+	 * get the necessary resources to support SMP. */
+	for (i=max; i<NR_CPUS; i++) {
+		set_cpu_possible(i, false);
+		set_cpu_present(i, false);
+	}
+}
+
+static void j2_start_cpu(unsigned int cpu, unsigned long entry_point)
+{
+	struct device_node *np;
+	u32 regs[2];
+	void __iomem *release, *initpc;
+
+	if (!cpu) return;
+
+	np = of_get_cpu_node(cpu, NULL);
+	if (!np) return;
+
+	if (of_property_read_u32_array(np, "cpu-release-addr", regs, 2)) return;
+	release = ioremap_nocache(regs[0], sizeof(u32));
+	initpc = ioremap_nocache(regs[1], sizeof(u32));
+
+	__raw_writel(entry_point, initpc);
+	__raw_writel(1, release);
+
+	iounmap(initpc);
+	iounmap(release);
+
+	pr_info("J2 SMP: requested start of cpu %u\n", cpu);
+}
+
+static unsigned int j2_smp_processor_id(void)
+{
+	return __raw_readl(sh2_cpuid_addr);
+}
+
+static void j2_send_ipi(unsigned int cpu, unsigned int message)
+{
+	volatile unsigned *pmsg;
+	unsigned old;
+	unsigned long val;
+
+	/* There is only one IPI interrupt shared by all messages, so
+	 * we keep a separate interrupt flag per message type in sw. */
+	pmsg = &per_cpu(j2_ipi_messages, cpu);
+	do old = *pmsg;
+	while (cmpxchg(pmsg, old, old|(1U<<message)) != old);
+
+	/* Generate the actual interrupt by writing to CCRn bit 28. */
+	val = __raw_readl(j2_ipi_trigger + cpu);
+	__raw_writel(val | (1U<<28), j2_ipi_trigger + cpu);
+}
+
+static struct plat_smp_ops j2_smp_ops = {
+	.smp_setup		= j2_smp_setup,
+	.prepare_cpus		= j2_prepare_cpus,
+	.start_cpu		= j2_start_cpu,
+	.smp_processor_id	= j2_smp_processor_id,
+	.send_ipi		= j2_send_ipi,
+	.cpu_die		= native_cpu_die,
+	.cpu_disable		= native_cpu_disable,
+	.play_dead		= native_play_dead,
+};
+
+CPU_METHOD_OF_DECLARE(j2_cpu_method, "jcore,spin-table", &j2_smp_ops);
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index 9d209a0..e1d751a 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -1009,10 +1009,8 @@
 	rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
 		kfree(cie);
 
-	if (dwarf_reg_pool)
-		mempool_destroy(dwarf_reg_pool);
-	if (dwarf_frame_pool)
-		mempool_destroy(dwarf_frame_pool);
+	mempool_destroy(dwarf_reg_pool);
+	mempool_destroy(dwarf_frame_pool);
 	kmem_cache_destroy(dwarf_reg_cachep);
 	kmem_cache_destroy(dwarf_frame_cachep);
 }
diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S
index 974bc15..4e352c3 100644
--- a/arch/sh/kernel/head_32.S
+++ b/arch/sh/kernel/head_32.S
@@ -67,7 +67,7 @@
 	ldc	r0, r6_bank
 #endif
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_FLATTREE
 	mov	r4, r12		! Store device tree blob pointer in r12
 #endif
 	
@@ -318,7 +318,7 @@
 10:		
 #endif
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_FLATTREE
 	mov.l	8f, r0		! Make flat device tree available early.
 	jsr	@r0
 	 mov	r12, r4
@@ -349,7 +349,7 @@
 5:	.long	start_kernel
 6:	.long	cpu_init
 7:	.long	init_thread_union
-#if defined(CONFIG_OF)
+#if defined(CONFIG_OF_FLATTREE)
 8:	.long	sh_fdt_init
 #endif
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 5d34605..e7b49d8 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -242,7 +242,7 @@
 {
 }
 
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_FLATTREE
 void __ref sh_fdt_init(phys_addr_t dt_phys)
 {
 	static int done = 0;
@@ -251,7 +251,11 @@
 	/* Avoid calling an __init function on secondary cpus. */
 	if (done) return;
 
+#ifdef CONFIG_USE_BUILTIN_DTB
+	dt_virt = __dtb_start;
+#else
 	dt_virt = phys_to_virt(dt_phys);
+#endif
 
 	if (!dt_virt || !early_init_dt_scan(dt_virt)) {
 		pr_crit("Error: invalid device tree blob"
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 734234b..254bc22 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -386,3 +386,17 @@
 	.long sys_process_vm_writev
 	.long sys_kcmp
 	.long sys_finit_module
+	.long sys_sched_getattr
+	.long sys_sched_setattr		/* 370 */
+	.long sys_renameat2
+	.long sys_seccomp
+	.long sys_getrandom
+	.long sys_memfd_create
+	.long sys_bpf			/* 375 */
+	.long sys_execveat
+	.long sys_userfaultfd
+	.long sys_membarrier
+	.long sys_mlock2
+	.long sys_copy_file_range	/* 380 */
+	.long sys_preadv2
+	.long sys_pwritev2
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 579fcb9..d6a27f7 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -406,3 +406,17 @@
 	.long sys_process_vm_writev
 	.long sys_kcmp
 	.long sys_finit_module
+	.long sys_sched_getattr		/* 380 */
+	.long sys_sched_setattr
+	.long sys_renameat2
+	.long sys_seccomp
+	.long sys_getrandom
+	.long sys_memfd_create		/* 385 */
+	.long sys_bpf
+	.long sys_execveat
+	.long sys_userfaultfd
+	.long sys_membarrier
+	.long sys_mlock2		/* 390 */
+	.long sys_copy_file_range
+	.long sys_preadv2
+	.long sys_pwritev2
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index a4a7862..fcd5e41 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -11,7 +11,6 @@
  * for more details.
  */
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/profile.h>
 #include <linux/timex.h>
@@ -90,7 +89,7 @@
 
 	return PTR_ERR_OR_ZERO(pdev);
 }
-module_init(rtc_generic_init);
+device_initcall(rtc_generic_init);
 
 void (*board_time_init)(void);
 
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index cee6b99..92c3bd9 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -4,7 +4,8 @@
 
 obj-y			:= alignment.o cache.o init.o consistent.o mmap.o
 
-cacheops-$(CONFIG_CPU_SH2)		:= cache-sh2.o
+cacheops-$(CONFIG_CPU_J2)		:= cache-j2.o
+cacheops-$(CONFIG_CPU_SUBTYPE_SH7619)	:= cache-sh2.o
 cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
 cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
 cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index ecfc6b0..bf95fda 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -17,7 +17,6 @@
  * for more details.
  */
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
@@ -70,6 +69,4 @@
 
 	return PTR_ERR_OR_ZERO(asids_dentry);
 }
-module_init(asids_debugfs_init);
-
-MODULE_LICENSE("GPL v2");
+device_initcall(asids_debugfs_init);
diff --git a/arch/sh/mm/cache-j2.c b/arch/sh/mm/cache-j2.c
new file mode 100644
index 0000000..391698b
--- /dev/null
+++ b/arch/sh/mm/cache-j2.c
@@ -0,0 +1,65 @@
+/*
+ * arch/sh/mm/cache-j2.c
+ *
+ * Copyright (C) 2015-2016 Smart Energy Instruments, Inc.
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/cpumask.h>
+
+#include <asm/cache.h>
+#include <asm/addrspace.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+#define ICACHE_ENABLE	0x1
+#define DCACHE_ENABLE	0x2
+#define CACHE_ENABLE	(ICACHE_ENABLE | DCACHE_ENABLE)
+#define ICACHE_FLUSH	0x100
+#define DCACHE_FLUSH	0x200
+#define CACHE_FLUSH	(ICACHE_FLUSH | DCACHE_FLUSH)
+
+u32 __iomem *j2_ccr_base;
+
+static void j2_flush_icache(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | ICACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+static void j2_flush_dcache(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | DCACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+static void j2_flush_both(void *args)
+{
+	unsigned cpu;
+	for_each_possible_cpu(cpu)
+		__raw_writel(CACHE_ENABLE | CACHE_FLUSH, j2_ccr_base + cpu);
+}
+
+void __init j2_cache_init(void)
+{
+	if (!j2_ccr_base)
+		return;
+
+	local_flush_cache_all = j2_flush_both;
+	local_flush_cache_mm = j2_flush_both;
+	local_flush_cache_dup_mm = j2_flush_both;
+	local_flush_cache_page = j2_flush_both;
+	local_flush_cache_range = j2_flush_both;
+	local_flush_dcache_page = j2_flush_dcache;
+	local_flush_icache_range = j2_flush_icache;
+	local_flush_icache_page = j2_flush_icache;
+	local_flush_cache_sigtramp = j2_flush_icache;
+
+	pr_info("Initial J2 CCR is %.8x\n", __raw_readl(j2_ccr_base));
+}
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index e58cfbf..36554a9 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -42,6 +42,8 @@
 {
 	preempt_disable();
 
+	/* Needing IPI for cross-core flush is SHX3-specific. */
+#ifdef CONFIG_CPU_SHX3
 	/*
 	 * It's possible that this gets called early on when IRQs are
 	 * still disabled due to ioremapping by the boot CPU, so don't
@@ -49,6 +51,7 @@
 	 */
 	if (num_online_cpus() > 1)
 		smp_call_function(func, info, wait);
+#endif
 
 	func(info);
 
@@ -244,7 +247,11 @@
 
 static void compute_alias(struct cache_info *c)
 {
+#ifdef CONFIG_MMU
 	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
+#else
+	c->alias_mask = 0;
+#endif
 	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
 }
 
@@ -305,7 +312,11 @@
 	if (unlikely(cache_disabled))
 		goto skip;
 
-	if (boot_cpu_data.family == CPU_FAMILY_SH2) {
+	if (boot_cpu_data.type == CPU_J2) {
+		extern void __weak j2_cache_init(void);
+
+		j2_cache_init();
+	} else if (boot_cpu_data.family == CPU_FAMILY_SH2) {
 		extern void __weak sh2_cache_init(void);
 
 		sh2_cache_init();