Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
diff --git a/arch/sh64/Kconfig b/arch/sh64/Kconfig
new file mode 100644
index 0000000..76eb81f
--- /dev/null
+++ b/arch/sh64/Kconfig
@@ -0,0 +1,293 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/config-language.txt.
+#
+
+mainmenu "Linux/SH64 Kernel Configuration"
+
+config SUPERH
+	bool
+	default y
+
+config SUPERH64
+	bool
+	default y
+
+config MMU
+	bool
+	default y
+
+config UID16
+	bool
+	default y
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config LOG_BUF_SHIFT
+	int
+	default 14
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+
+config GENERIC_ISA_DMA
+	bool
+
+source init/Kconfig
+
+menu "System type"
+
+choice
+	prompt "SuperH system type"
+	default SH_SIMULATOR
+
+config SH_GENERIC
+	bool "Generic"
+
+config SH_SIMULATOR
+	bool "Simulator"
+
+config SH_CAYMAN
+	bool "Cayman"
+
+config SH_ROMRAM
+	bool "ROM/RAM"
+
+config SH_HARP
+	bool "ST50-Harp"
+
+endchoice
+
+choice
+	prompt "Processor family"
+	default CPU_SH5
+
+config CPU_SH5
+	bool "SH-5"
+
+endchoice
+
+choice
+	prompt "Processor type"
+
+config CPU_SUBTYPE_SH5_101
+	bool "SH5-101"
+	depends on CPU_SH5
+
+config CPU_SUBTYPE_SH5_103
+	bool "SH5-103"
+	depends on CPU_SH5
+
+endchoice
+
+choice
+	prompt "Endianness"
+	default LITTLE_ENDIAN
+
+config LITTLE_ENDIAN
+	bool "Little-Endian"
+
+config BIG_ENDIAN
+	bool "Big-Endian"
+
+endchoice
+
+config SH_FPU
+	bool "FPU support"
+	default y
+
+config SH64_FPU_DENORM_FLUSH
+	depends on SH_FPU
+	bool "Flush floating point denorms to zero"
+
+choice
+	prompt "Page table levels"
+	default SH64_PGTABLE_2_LEVEL
+
+config SH64_PGTABLE_2_LEVEL
+	bool "2"
+
+config SH64_PGTABLE_3_LEVEL
+	bool "3"
+
+endchoice
+
+choice
+	prompt "HugeTLB page size"
+	depends on HUGETLB_PAGE && MMU
+	default HUGETLB_PAGE_SIZE_64K
+
+config HUGETLB_PAGE_SIZE_64K
+	bool "64K"
+
+config HUGETLB_PAGE_SIZE_1MB
+	bool "1MB"
+
+config HUGETLB_PAGE_SIZE_512MB
+	bool "512MB"
+
+endchoice
+
+config SH64_USER_MISALIGNED_FIXUP
+	bool "Fixup misaligned loads/stores occurring in user mode"
+
+comment "Memory options"
+
+config CACHED_MEMORY_OFFSET
+	hex "Cached Area Offset"
+	depends on SH_HARP || SH_CAYMAN || SH_SIMULATOR
+	default "20000000"
+
+config MEMORY_START
+	hex "Physical memory start address"
+	depends on SH_HARP || SH_CAYMAN || SH_SIMULATOR
+	default "80000000"
+
+config MEMORY_SIZE_IN_MB
+	int "Memory size (in MB)" if SH_HARP || SH_CAYMAN || SH_SIMULATOR
+	default "64" if SH_HARP || SH_CAYMAN
+	default "8" if SH_SIMULATOR
+
+comment "Cache options"
+
+config DCACHE_DISABLED
+	bool "DCache Disabling"
+	depends on SH_HARP || SH_CAYMAN || SH_SIMULATOR
+
+choice
+	prompt "DCache mode"
+	depends on !DCACHE_DISABLED && !SH_SIMULATOR
+	default DCACHE_WRITE_BACK
+
+config DCACHE_WRITE_BACK
+	bool "Write-back"
+
+config DCACHE_WRITE_THROUGH
+	bool "Write-through"
+
+endchoice
+
+config ICACHE_DISABLED
+	bool "ICache Disabling"
+	depends on SH_HARP || SH_CAYMAN || SH_SIMULATOR
+
+config PCIDEVICE_MEMORY_START
+	hex
+	depends on SH_HARP || SH_CAYMAN || SH_SIMULATOR
+	default "C0000000"
+
+config DEVICE_MEMORY_START
+	hex
+	depends on SH_HARP || SH_CAYMAN || SH_SIMULATOR
+	default "E0000000"
+
+config FLASH_MEMORY_START
+	hex "Flash memory/on-chip devices start address"
+	depends on SH_HARP || SH_CAYMAN || SH_SIMULATOR
+	default "00000000"
+
+config PCI_BLOCK_START
+	hex "PCI block start address"
+	depends on SH_HARP || SH_CAYMAN || SH_SIMULATOR
+	default "40000000"
+
+comment "CPU Subtype specific options"
+
+config SH64_ID2815_WORKAROUND
+	bool "Include workaround for SH5-101 cut2 silicon defect ID2815"
+
+comment "Misc options"
+config HEARTBEAT
+	bool "Heartbeat LED"
+
+config HDSP253_LED
+	bool "Support for HDSP-253 LED"
+	depends on SH_CAYMAN
+
+config SH_DMA
+	tristate "DMA controller (DMAC) support"
+
+config PREEMPT
+	bool "Preemptible Kernel (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+
+endmenu
+
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+
+config ISA
+	bool
+
+config SBUS
+	bool
+
+config PCI
+	bool "PCI support"
+	help
+	  Find out whether you have a PCI motherboard. PCI is the name of a
+	  bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+	  VESA. If you have PCI, say Y, otherwise N.
+
+	  The PCI-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>, contains valuable
+	  information about which PCI hardware does work under Linux and which
+	  doesn't.
+
+config SH_PCIDMA_NONCOHERENT
+	bool "Cache and PCI noncoherent"
+	depends on PCI
+	default y
+	help
+	  Enable this option if your platform does not have a CPU cache which
+	  remains coherent with PCI DMA. It is safest to say 'Y', although you
+	  will see better performance if you can say 'N', because the PCI DMA
+	  code will not have to flush the CPU's caches. If you have a PCI host
+	  bridge integrated with your SH CPU, refer carefully to the chip specs
+	  to see if you can say 'N' here. Otherwise, leave it as 'Y'.
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+endmenu
+
+menu "Executable file formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/sh64/oprofile/Kconfig"
+
+source "arch/sh64/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config GENERIC_IRQ_PROBE
+	bool
+	default y
+
diff --git a/arch/sh64/Kconfig.debug b/arch/sh64/Kconfig.debug
new file mode 100644
index 0000000..26d842c
--- /dev/null
+++ b/arch/sh64/Kconfig.debug
@@ -0,0 +1,44 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config EARLY_PRINTK
+	bool "Early SCIF console support"
+
+config DEBUG_KERNEL_WITH_GDB_STUB
+	bool "GDB Stub kernel debug"
+
+config SH64_PROC_TLB
+	bool "Debug: report TLB fill/purge activity through /proc/tlb"
+	depends on PROC_FS
+
+config SH64_PROC_ASIDS
+	bool "Debug: report ASIDs through /proc/asids"
+	depends on PROC_FS
+
+config SH64_SR_WATCH
+	bool "Debug: set SR.WATCH to enable hardware watchpoints and trace"
+
+config POOR_MANS_STRACE
+	bool "Debug: enable rudimentary strace facility"
+	help
+	  This option allows system calls to be traced to the console.  It also
+	  aids in detecting kernel stack underflow.  It is useful for debugging
+	  early-userland problems (e.g. init incurring fatal exceptions.)
+
+config SH_ALPHANUMERIC
+	bool "Enable debug outputs to on-board alphanumeric display"
+
+config SH_NO_BSS_INIT
+	bool "Avoid zeroing BSS (to speed-up startup on suitable platforms)"
+
+config FRAME_POINTER
+	bool "Compile the kernel with frame pointers"
+	default y if KGDB
+	help
+	  If you say Y here the resulting kernel image will be slightly larger
+	  and slower, but it will give very useful debugging information.
+	  If you don't debug the kernel, you can say N, but we may not be able
+	  to solve problems without frame pointers.
+
+endmenu
diff --git a/arch/sh64/Makefile b/arch/sh64/Makefile
new file mode 100644
index 0000000..b4fd8e1
--- /dev/null
+++ b/arch/sh64/Makefile
@@ -0,0 +1,116 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2000, 2001  Paolo Alberelli
+# Copyright (C) 2003, 2004  Paul Mundt
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+
+cpu-y				:= -mb
+cpu-$(CONFIG_LITTLE_ENDIAN)	:= -ml
+
+cpu-$(CONFIG_CPU_SH5)		+= -m5-32media-nofpu
+
+ifdef CONFIG_LITTLE_ENDIAN
+LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64'
+LDFLAGS			+= -EL  -mshlelf32_linux
+else
+LDFLAGS_vmlinux		+= --defsym 'jiffies=jiffies_64+4'
+LDFLAGS			+= -EB  -mshelf32_linux
+endif
+
+# No requirements for endianess support from AFLAGS, 'as' always run through gcc
+AFLAGS		+= -m5 -isa=sh64 -traditional
+CFLAGS		+= $(cpu-y)
+
+LDFLAGS_vmlinux	+= --defsym phys_stext=_stext-$(CONFIG_CACHED_MEMORY_OFFSET) \
+		  --defsym phys_stext_shmedia=phys_stext+1 \
+		  -e phys_stext_shmedia
+
+OBJCOPYFLAGS	:= -O binary -R .note -R .comment -R .stab -R .stabstr -S
+
+#
+# arch/sh64/defconfig never had any hope of being
+# frequently updated, so use one that does
+#
+KBUILD_DEFCONFIG	:= cayman_defconfig
+
+ifdef LOADADDR
+LINKFLAGS     += -Ttext $(word 1,$(LOADADDR))
+endif
+
+machine-$(CONFIG_SH_CAYMAN)	:= cayman
+machine-$(CONFIG_SH_SIMULATOR)	:= sim
+machine-$(CONFIG_SH_HARP)	:= harp
+machine-$(CONFIG_SH_ROMRAM)	:= romram
+
+head-y := arch/$(ARCH)/kernel/head.o arch/$(ARCH)/kernel/init_task.o
+
+core-y	+= arch/sh64/kernel/ arch/sh64/mm/
+
+ifneq ($(machine-y),)
+core-y	+= arch/sh64/mach-$(machine-y)/
+endif
+
+LIBGCC := $(shell $(CC) $(CFLAGS) -print-libgcc-file-name)
+libs-y	+= arch/$(ARCH)/lib/ $(LIBGCC)
+
+drivers-$(CONFIG_OPROFILE)	+= arch/sh64/oprofile/
+
+boot := arch/$(ARCH)/boot
+
+zImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+compressed: zImage
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+prepare: include/asm-$(ARCH)/asm-offsets.h arch/$(ARCH)/lib/syscalltab.h
+
+include/asm-$(ARCH)/asm-offsets.h: arch/$(ARCH)/kernel/asm-offsets.s \
+				   include/asm include/linux/version.h
+	$(call filechk,gen-asm-offsets)
+
+define filechk_gen-syscalltab
+       (set -e; \
+	echo "/*"; \
+	echo " * DO NOT MODIFY."; \
+	echo " *"; \
+	echo " * This file was generated by arch/$(ARCH)/Makefile"; \
+	echo " * Any changes will be reverted at build time."; \
+	echo " */"; \
+	echo ""; \
+	echo "#ifndef __SYSCALLTAB_H"; \
+	echo "#define __SYSCALLTAB_H"; \
+	echo ""; \
+	echo "#include <linux/kernel.h>"; \
+	echo ""; \
+	echo "struct syscall_info {"; \
+	echo "	const char *name;"; \
+	echo "} syscall_info_table[] = {"; \
+	sed -e '/^.*\.long /!d;s//    { "/;s/\(\([^/]*\)\/\)\{1\}.*/\2/; \
+		s/[ \t]*$$//g;s/$$/" },/;s/\("\)sys_/\1/g'; \
+	echo "};"; \
+	echo ""; \
+	echo "#define NUM_SYSCALL_INFO_ENTRIES	ARRAY_SIZE(syscall_info_table)"; \
+	echo ""; \
+	echo "#endif /* __SYSCALLTAB_H */" )
+endef
+
+arch/$(ARCH)/lib/syscalltab.h: arch/sh64/kernel/syscalls.S
+	$(call filechk,gen-syscalltab)
+
+CLEAN_FILES += include/asm-$(ARCH)/asm-offsets.h arch/$(ARCH)/lib/syscalltab.h
+
+define archhelp
+	@echo '  zImage 	           - Compressed kernel image (arch/sh64/boot/zImage)'
+endef
+
diff --git a/arch/sh64/boot/Makefile b/arch/sh64/boot/Makefile
new file mode 100644
index 0000000..fb71087
--- /dev/null
+++ b/arch/sh64/boot/Makefile
@@ -0,0 +1,20 @@
+#
+# arch/sh64/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2002 Stuart Menefy
+#
+
+targets := zImage
+subdir- := compressed
+
+$(obj)/zImage: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,objcopy)
+	@echo 'Kernel: $@ is ready'
+
+$(obj)/compressed/vmlinux: FORCE
+	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
diff --git a/arch/sh64/boot/compressed/Makefile b/arch/sh64/boot/compressed/Makefile
new file mode 100644
index 0000000..9cd2167
--- /dev/null
+++ b/arch/sh64/boot/compressed/Makefile
@@ -0,0 +1,46 @@
+#
+# linux/arch/sh64/boot/compressed/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2002 Stuart Menefy
+# Copyright (C) 2004 Paul Mundt
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
+		   head.o misc.o cache.o piggy.o vmlinux.lds
+
+EXTRA_AFLAGS	:= -traditional
+
+OBJECTS		:= $(obj)/head.o $(obj)/misc.o $(obj)/cache.o
+
+#
+# ZIMAGE_OFFSET is the load offset of the compression loader
+# (4M for the kernel plus 64K for this loader)
+#
+ZIMAGE_OFFSET = $(shell printf "0x%8x" $$[$(CONFIG_MEMORY_START)+0x400000+0x10000])
+
+LDFLAGS_vmlinux := -Ttext $(ZIMAGE_OFFSET) -e startup \
+		    -T $(obj)/../../kernel/vmlinux.lds \
+		    --no-warn-mismatch
+
+$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o FORCE
+	$(call if_changed,ld)
+	@:
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+
+LDFLAGS_piggy.o := -r --format binary --oformat elf32-sh64-linux -T
+OBJCOPYFLAGS += -R .empty_zero_page
+
+$(obj)/piggy.o: $(obj)/vmlinux.lds $(obj)/vmlinux.bin.gz FORCE
+	$(call if_changed,ld)
+
diff --git a/arch/sh64/boot/compressed/cache.c b/arch/sh64/boot/compressed/cache.c
new file mode 100644
index 0000000..7087073
--- /dev/null
+++ b/arch/sh64/boot/compressed/cache.c
@@ -0,0 +1,39 @@
+/*
+ * arch/shmedia/boot/compressed/cache.c -- simple cache management functions
+ *
+ * Code extracted from sh-ipl+g, sh-stub.c, which has the copyright:
+ *
+ *   This is originally based on an m68k software stub written by Glenn
+ *   Engel at HP, but has changed quite a bit.
+ *
+ *   Modifications for the SH by Ben Lee and Steve Chamberlain
+ *
+****************************************************************************
+
+		THIS SOFTWARE IS NOT COPYRIGHTED
+
+   HP offers the following for use in the public domain.  HP makes no
+   warranty with regard to the software or it's performance and the
+   user accepts the software "AS IS" with all faults.
+
+   HP DISCLAIMS ANY WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD
+   TO THIS SOFTWARE INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+****************************************************************************/
+
+#define CACHE_ENABLE      0
+#define CACHE_DISABLE     1
+
+int cache_control(unsigned int command)
+{
+	volatile unsigned int *p = (volatile unsigned int *) 0x80000000;
+	int i;
+
+	for (i = 0; i < (32 * 1024); i += 32) {
+		(void *) *p;
+		p += (32 / sizeof (int));
+	}
+
+	return 0;
+}
diff --git a/arch/sh64/boot/compressed/head.S b/arch/sh64/boot/compressed/head.S
new file mode 100644
index 0000000..82040b1
--- /dev/null
+++ b/arch/sh64/boot/compressed/head.S
@@ -0,0 +1,164 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/shmedia/boot/compressed/head.S
+ *
+ * Copied from
+ *   arch/shmedia/kernel/head.S
+ * which carried the copyright:
+ *   Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * Modification for compressed loader:
+ *   Copyright (C) 2002 Stuart Menefy (stuart.menefy@st.com)
+ */
+
+#include <linux/linkage.h>
+#include <asm/registers.h>
+#include <asm/cache.h>
+#include <asm/mmu_context.h>
+
+/*
+ * Fixed TLB entries to identity map the beginning of RAM
+ */
+#define MMUIR_TEXT_H	0x0000000000000003 | CONFIG_MEMORY_START
+			/* Enabled, Shared, ASID 0, Eff. Add. 0xA0000000 */
+#define MMUIR_TEXT_L	0x000000000000009a | CONFIG_MEMORY_START
+			/* 512 Mb, Cacheable (Write-back), execute, Not User, Ph. Add. */
+
+#define MMUDR_CACHED_H	0x0000000000000003 | CONFIG_MEMORY_START
+			/* Enabled, Shared, ASID 0, Eff. Add. 0xA0000000 */
+#define MMUDR_CACHED_L	0x000000000000015a | CONFIG_MEMORY_START
+			/* 512 Mb, Cacheable (Write-back), read/write, Not User, Ph. Add. */
+
+#define	ICCR0_INIT_VAL	ICCR0_ON | ICCR0_ICI		/* ICE + ICI */
+#define	ICCR1_INIT_VAL	ICCR1_NOLOCK			/* No locking */
+
+#if 1
+#define	OCCR0_INIT_VAL	OCCR0_ON | OCCR0_OCI | OCCR0_WB	/* OCE + OCI + WB */
+#else
+#define	OCCR0_INIT_VAL	OCCR0_OFF
+#endif
+#define	OCCR1_INIT_VAL	OCCR1_NOLOCK			/* No locking */
+
+	.text
+
+	.global	startup
+startup:
+	/*
+	 * Prevent speculative fetch on device memory due to
+	 * uninitialized target registers.
+	 * This must be executed before the first branch.
+	 */
+	ptabs/u	ZERO, tr0
+	ptabs/u	ZERO, tr1
+	ptabs/u	ZERO, tr2
+	ptabs/u	ZERO, tr3
+	ptabs/u	ZERO, tr4
+	ptabs/u	ZERO, tr5
+	ptabs/u	ZERO, tr6
+	ptabs/u	ZERO, tr7
+	synci
+
+	/*
+	 * Set initial TLB entries for cached and uncached regions.
+	 * Note: PTA/BLINK is PIC code, PTABS/BLINK isn't !
+	 */
+	/* Clear ITLBs */
+	pta	1f, tr1
+	movi	ITLB_FIXED, r21
+	movi	ITLB_LAST_VAR_UNRESTRICTED+TLB_STEP, r22
+1:	putcfg	r21, 0, ZERO		/* Clear MMUIR[n].PTEH.V */
+	addi	r21, TLB_STEP, r21
+        bne	r21, r22, tr1
+
+	/* Clear DTLBs */
+	pta	1f, tr1
+	movi	DTLB_FIXED, r21
+	movi	DTLB_LAST_VAR_UNRESTRICTED+TLB_STEP, r22
+1:	putcfg	r21, 0, ZERO		/* Clear MMUDR[n].PTEH.V */
+	addi	r21, TLB_STEP, r21
+        bne	r21, r22, tr1
+
+	/* Map one big (512Mb) page for ITLB */
+	movi	ITLB_FIXED, r21
+	movi	MMUIR_TEXT_L, r22	/* PTEL first */
+	putcfg	r21, 1, r22		/* Set MMUIR[0].PTEL */
+	movi	MMUIR_TEXT_H, r22	/* PTEH last */
+	putcfg	r21, 0, r22		/* Set MMUIR[0].PTEH */
+
+	/* Map one big CACHED (512Mb) page for DTLB */
+	movi	DTLB_FIXED, r21
+	movi	MMUDR_CACHED_L, r22	/* PTEL first */
+	putcfg	r21, 1, r22		/* Set MMUDR[0].PTEL */
+	movi	MMUDR_CACHED_H, r22	/* PTEH last */
+	putcfg	r21, 0, r22		/* Set MMUDR[0].PTEH */
+
+	/* ICache */
+	movi	ICCR_BASE, r21
+	movi	ICCR0_INIT_VAL, r22
+	movi	ICCR1_INIT_VAL, r23
+	putcfg	r21, ICCR_REG0, r22
+	putcfg	r21, ICCR_REG1, r23
+	synci
+
+	/* OCache */
+	movi	OCCR_BASE, r21
+	movi	OCCR0_INIT_VAL, r22
+	movi	OCCR1_INIT_VAL, r23
+	putcfg	r21, OCCR_REG0, r22
+	putcfg	r21, OCCR_REG1, r23
+	synco
+
+	/*
+	 * Enable the MMU.
+	 * From here-on code can be non-PIC.
+	 */
+	movi	SR_HARMLESS | SR_ENABLE_MMU, r22
+	putcon	r22, SSR
+	movi	1f, r22
+	putcon	r22, SPC
+	synco
+	rte				/* And now go into the hyperspace ... */
+1:					/* ... that's the next instruction ! */
+
+	/* Set initial stack pointer */
+	movi	datalabel stack_start, r0
+	ld.l	r0, 0, r15
+
+	/*
+	 * Clear bss
+	 */
+	pt	1f, tr1
+	movi	datalabel __bss_start, r22
+	movi	datalabel _end, r23
+1:	st.l	r22, 0, ZERO
+	addi	r22, 4, r22
+	bne	r22, r23, tr1
+
+	/*
+	 * Decompress the kernel.
+	 */
+	pt	decompress_kernel, tr0
+	blink	tr0, r18
+
+	/*
+	 * Disable the MMU.
+	 */
+	movi	SR_HARMLESS, r22
+	putcon	r22, SSR
+	movi	1f, r22
+	putcon	r22, SPC
+	synco
+	rte				/* And now go into the hyperspace ... */
+1:					/* ... that's the next instruction ! */
+
+	/* Jump into the decompressed kernel */
+	movi	datalabel (CONFIG_MEMORY_START + 0x2000)+1, r19
+	ptabs	r19, tr0
+	blink	tr0, r18
+
+	/* Shouldn't return here, but just in case, loop forever */
+	pt	1f, tr0
+1:	blink	tr0, ZERO
diff --git a/arch/sh64/boot/compressed/install.sh b/arch/sh64/boot/compressed/install.sh
new file mode 100644
index 0000000..90589f0
--- /dev/null
+++ b/arch/sh64/boot/compressed/install.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# arch/sh/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+# Adapted from code in arch/i386/boot/install.sh by Russell King
+# Adapted from code in arch/arm/boot/install.sh by Stuart Menefy
+#
+# "make install" script for sh architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x /sbin/installkernel ]; then
+  exec /sbin/installkernel "$@"
+fi
+
+if [ "$2" = "zImage" ]; then
+# Compressed install
+  echo "Installing compressed kernel"
+  if [ -f $4/vmlinuz-$1 ]; then
+    mv $4/vmlinuz-$1 $4/vmlinuz.old
+  fi
+
+  if [ -f $4/System.map-$1 ]; then
+    mv $4/System.map-$1 $4/System.old
+  fi
+
+  cat $2 > $4/vmlinuz-$1
+  cp $3 $4/System.map-$1
+else
+# Normal install
+  echo "Installing normal kernel"
+  if [ -f $4/vmlinux-$1 ]; then
+    mv $4/vmlinux-$1 $4/vmlinux.old
+  fi
+
+  if [ -f $4/System.map ]; then
+    mv $4/System.map $4/System.old
+  fi
+
+  cat $2 > $4/vmlinux-$1
+  cp $3 $4/System.map
+fi
diff --git a/arch/sh64/boot/compressed/misc.c b/arch/sh64/boot/compressed/misc.c
new file mode 100644
index 0000000..89dbf45
--- /dev/null
+++ b/arch/sh64/boot/compressed/misc.c
@@ -0,0 +1,251 @@
+/*
+ * arch/shmedia/boot/compressed/misc.c
+ *
+ * This is a collection of several routines from gzip-1.0.3
+ * adapted for Linux.
+ *
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ *
+ * Adapted for SHmedia from sh by Stuart Menefy, May 2002
+ */
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+
+/* cache.c */
+#define CACHE_ENABLE      0
+#define CACHE_DISABLE     1
+int cache_control(unsigned int command);
+
+/*
+ * gzip declarations
+ */
+
+#define OF(args)  args
+#define STATIC static
+
+#undef memset
+#undef memcpy
+#define memzero(s, n)     memset ((s), 0, (n))
+
+typedef unsigned char uch;
+typedef unsigned short ush;
+typedef unsigned long ulg;
+
+#define WSIZE 0x8000		/* Window size must be at least 32k, */
+				/* and a power of two */
+
+static uch *inbuf;		/* input buffer */
+static uch window[WSIZE];	/* Sliding window buffer */
+
+static unsigned insize = 0;	/* valid bytes in inbuf */
+static unsigned inptr = 0;	/* index of next byte to be processed in inbuf */
+static unsigned outcnt = 0;	/* bytes in output buffer */
+
+/* gzip flag byte */
+#define ASCII_FLAG   0x01	/* bit 0 set: file probably ASCII text */
+#define CONTINUATION 0x02	/* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD  0x04	/* bit 2 set: extra field present */
+#define ORIG_NAME    0x08	/* bit 3 set: original file name present */
+#define COMMENT      0x10	/* bit 4 set: file comment present */
+#define ENCRYPTED    0x20	/* bit 5 set: file is encrypted */
+#define RESERVED     0xC0	/* bit 6,7:   reserved */
+
+#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond,msg) {if(!(cond)) error(msg);}
+#  define Trace(x) fprintf x
+#  define Tracev(x) {if (verbose) fprintf x ;}
+#  define Tracevv(x) {if (verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+static int fill_inbuf(void);
+static void flush_window(void);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+
+extern char input_data[];
+extern int input_len;
+
+static long bytes_out = 0;
+static uch *output_data;
+static unsigned long output_ptr = 0;
+
+static void *malloc(int size);
+static void free(void *where);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+
+static void puts(const char *);
+
+extern int _text;		/* Defined in vmlinux.lds.S */
+extern int _end;
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
+
+#define HEAP_SIZE             0x10000
+
+#include "../../../../lib/inflate.c"
+
+static void *malloc(int size)
+{
+	void *p;
+
+	if (size < 0)
+		error("Malloc error\n");
+	if (free_mem_ptr == 0)
+		error("Memory error\n");
+
+	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
+
+	p = (void *) free_mem_ptr;
+	free_mem_ptr += size;
+
+	if (free_mem_ptr >= free_mem_end_ptr)
+		error("\nOut of memory\n");
+
+	return p;
+}
+
+static void free(void *where)
+{				/* Don't care */
+}
+
+static void gzip_mark(void **ptr)
+{
+	*ptr = (void *) free_mem_ptr;
+}
+
+static void gzip_release(void **ptr)
+{
+	free_mem_ptr = (long) *ptr;
+}
+
+void puts(const char *s)
+{
+}
+
+void *memset(void *s, int c, size_t n)
+{
+	int i;
+	char *ss = (char *) s;
+
+	for (i = 0; i < n; i++)
+		ss[i] = c;
+	return s;
+}
+
+void *memcpy(void *__dest, __const void *__src, size_t __n)
+{
+	int i;
+	char *d = (char *) __dest, *s = (char *) __src;
+
+	for (i = 0; i < __n; i++)
+		d[i] = s[i];
+	return __dest;
+}
+
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+static int fill_inbuf(void)
+{
+	if (insize != 0) {
+		error("ran out of input data\n");
+	}
+
+	inbuf = input_data;
+	insize = input_len;
+	inptr = 1;
+	return inbuf[0];
+}
+
+/* ===========================================================================
+ * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+ * (Used for the decompressed data only.)
+ */
+static void flush_window(void)
+{
+	ulg c = crc;		/* temporary variable */
+	unsigned n;
+	uch *in, *out, ch;
+
+	in = window;
+	out = &output_data[output_ptr];
+	for (n = 0; n < outcnt; n++) {
+		ch = *out++ = *in++;
+		c = crc_32_tab[((int) c ^ ch) & 0xff] ^ (c >> 8);
+	}
+	crc = c;
+	bytes_out += (ulg) outcnt;
+	output_ptr += (ulg) outcnt;
+	outcnt = 0;
+	puts(".");
+}
+
+static void error(char *x)
+{
+	puts("\n\n");
+	puts(x);
+	puts("\n\n -- System halted");
+
+	while (1) ;		/* Halt */
+}
+
+#define STACK_SIZE (4096)
+long __attribute__ ((aligned(8))) user_stack[STACK_SIZE];
+long *stack_start = &user_stack[STACK_SIZE];
+
+void decompress_kernel(void)
+{
+	output_data = (uch *) (CONFIG_MEMORY_START + 0x2000);
+	free_mem_ptr = (unsigned long) &_end;
+	free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
+
+	makecrc();
+	puts("Uncompressing Linux... ");
+	cache_control(CACHE_ENABLE);
+	gunzip();
+	puts("\n");
+
+#if 0
+	/* When booting from ROM may want to do something like this if the
+	 * boot loader doesn't.
+	 */
+
+	/* Set up the parameters and command line */
+	{
+		volatile unsigned int *parambase =
+		    (int *) (CONFIG_MEMORY_START + 0x1000);
+
+		parambase[0] = 0x1;	/* MOUNT_ROOT_RDONLY */
+		parambase[1] = 0x0;	/* RAMDISK_FLAGS */
+		parambase[2] = 0x0200;	/* ORIG_ROOT_DEV */
+		parambase[3] = 0x0;	/* LOADER_TYPE */
+		parambase[4] = 0x0;	/* INITRD_START */
+		parambase[5] = 0x0;	/* INITRD_SIZE */
+		parambase[6] = 0;
+
+		strcpy((char *) ((int) parambase + 0x100),
+		       "console=ttySC0,38400");
+	}
+#endif
+
+	puts("Ok, booting the kernel.\n");
+
+	cache_control(CACHE_DISABLE);
+}
diff --git a/arch/sh64/boot/compressed/vmlinux.lds.S b/arch/sh64/boot/compressed/vmlinux.lds.S
new file mode 100644
index 0000000..15a737d
--- /dev/null
+++ b/arch/sh64/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,65 @@
+/*
+ * ld script to make compressed SuperH/shmedia Linux kernel+decompression
+ *		bootstrap
+ * Modified by Stuart Menefy from arch/sh/vmlinux.lds.S written by Niibe Yutaka
+ */
+
+#include <linux/config.h>
+
+#ifdef CONFIG_LITTLE_ENDIAN
+/* OUTPUT_FORMAT("elf32-sh64l-linux", "elf32-sh64l-linux", "elf32-sh64l-linux") */
+#define NOP 0x6ff0fff0
+#else
+/* OUTPUT_FORMAT("elf32-sh64", "elf32-sh64", "elf32-sh64") */
+#define NOP 0xf0fff06f
+#endif
+
+OUTPUT_FORMAT("elf32-sh64-linux")
+OUTPUT_ARCH(sh)
+ENTRY(_start)
+
+#define ALIGNED_GAP(section, align) (((ADDR(section)+SIZEOF(section)+(align)-1) & ~((align)-1))-ADDR(section))
+#define FOLLOWING(section, align) AT (LOADADDR(section) + ALIGNED_GAP(section,align))
+
+SECTIONS
+{
+  _text = .;			/* Text and read-only data */
+
+  .text : {
+	*(.text)
+	*(.text64)
+	*(.text..SHmedia32)
+	*(.fixup)
+	*(.gnu.warning)
+	} = NOP
+  . = ALIGN(4);
+  .rodata : { *(.rodata) }
+
+  /* There is no 'real' reason for eight byte alignment, four would work
+   * as well, but gdb downloads much (*4) faster with this.
+   */
+  . = ALIGN(8);
+  .image : { *(.image) }
+  . = ALIGN(4);
+  _etext = .;			/* End of text section */
+
+  .data :			/* Data */
+	FOLLOWING(.image, 4)
+	{
+	_data = .;
+	*(.data)
+	}
+  _data_image = LOADADDR(.data);/* Address of data section in ROM */
+
+  _edata = .;			/* End of data section */
+
+  .stack : { stack = .;  _stack = .; }
+
+  . = ALIGN(4);
+  __bss_start = .;		/* BSS */
+  .bss : {
+	*(.bss)
+	}
+  . = ALIGN(4);
+  _end = . ;
+}
diff --git a/arch/sh64/configs/cayman_defconfig b/arch/sh64/configs/cayman_defconfig
new file mode 100644
index 0000000..48f2740
--- /dev/null
+++ b/arch/sh64/configs/cayman_defconfig
@@ -0,0 +1,837 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.11
+# Fri Feb 25 18:14:31 2005
+#
+CONFIG_SUPERH=y
+CONFIG_SUPERH64=y
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_LOG_BUF_SHIFT=14
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+CONFIG_BROKEN_ON_SMP=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+# CONFIG_SYSVIPC is not set
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_HOTPLUG is not set
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+# CONFIG_MODULES is not set
+
+#
+# System type
+#
+# CONFIG_SH_GENERIC is not set
+# CONFIG_SH_SIMULATOR is not set
+CONFIG_SH_CAYMAN=y
+# CONFIG_SH_ROMRAM is not set
+# CONFIG_SH_HARP is not set
+CONFIG_CPU_SH5=y
+CONFIG_CPU_SUBTYPE_SH5_101=y
+# CONFIG_CPU_SUBTYPE_SH5_103 is not set
+CONFIG_LITTLE_ENDIAN=y
+# CONFIG_BIG_ENDIAN is not set
+CONFIG_SH_FPU=y
+# CONFIG_SH64_FPU_DENORM_FLUSH is not set
+CONFIG_SH64_PGTABLE_2_LEVEL=y
+# CONFIG_SH64_PGTABLE_3_LEVEL is not set
+CONFIG_HUGETLB_PAGE_SIZE_64K=y
+# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_512MB is not set
+CONFIG_SH64_USER_MISALIGNED_FIXUP=y
+
+#
+# Memory options
+#
+CONFIG_CACHED_MEMORY_OFFSET=0x20000000
+CONFIG_MEMORY_START=0x80000000
+CONFIG_MEMORY_SIZE_IN_MB=128
+
+#
+# Cache options
+#
+# CONFIG_DCACHE_DISABLED is not set
+CONFIG_DCACHE_WRITE_BACK=y
+# CONFIG_DCACHE_WRITE_THROUGH is not set
+# CONFIG_ICACHE_DISABLED is not set
+CONFIG_PCIDEVICE_MEMORY_START=C0000000
+CONFIG_DEVICE_MEMORY_START=E0000000
+CONFIG_FLASH_MEMORY_START=0x00000000
+CONFIG_PCI_BLOCK_START=0x40000000
+
+#
+# CPU Subtype specific options
+#
+CONFIG_SH64_ID2815_WORKAROUND=y
+
+#
+# Misc options
+#
+CONFIG_HEARTBEAT=y
+CONFIG_HDSP253_LED=y
+CONFIG_SH_DMA=y
+CONFIG_PREEMPT=y
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_SUPERHYWAY=y
+CONFIG_PCI=y
+CONFIG_SH_PCIDMA_NONCOHERENT=y
+CONFIG_PCI_LEGACY_PROC=y
+CONFIG_PCI_NAMES=y
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PC-card bridges
+#
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_FLAT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_INITRD is not set
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_LBD is not set
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_MULTI_LUN=y
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+CONFIG_SCSI_QLA2XXX=y
+# CONFIG_SCSI_QLA21XX is not set
+# CONFIG_SCSI_QLA22XX is not set
+# CONFIG_SCSI_QLA2300 is not set
+# CONFIG_SCSI_QLA2322 is not set
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+# CONFIG_NETLINK_DEV is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_PNP=y
+# CONFIG_IP_PNP_DHCP is not set
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+# CONFIG_MII is not set
+# CONFIG_STNIC is not set
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_NET_VENDOR_3COM is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_HP100 is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+# CONFIG_E100 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+# CONFIG_E1000 is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+# CONFIG_TIGON3 is not set
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_SH_SCI_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+# CONFIG_SH_WDT is not set
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+CONFIG_FB=y
+CONFIG_FB_MODE_HELPERS=y
+# CONFIG_FB_TILEBLITTING is not set
+# CONFIG_FB_CIRRUS is not set
+# CONFIG_FB_PM2 is not set
+# CONFIG_FB_CYBER2000 is not set
+# CONFIG_FB_ASILIANT is not set
+# CONFIG_FB_IMSTT is not set
+# CONFIG_FB_EPSON1355 is not set
+# CONFIG_FB_RIVA is not set
+# CONFIG_FB_MATROX is not set
+# CONFIG_FB_RADEON_OLD is not set
+# CONFIG_FB_RADEON is not set
+# CONFIG_FB_ATY128 is not set
+# CONFIG_FB_ATY is not set
+# CONFIG_FB_SAVAGE is not set
+# CONFIG_FB_SIS is not set
+# CONFIG_FB_NEOMAGIC is not set
+CONFIG_FB_KYRO=y
+# CONFIG_FB_3DFX is not set
+# CONFIG_FB_VOODOO1 is not set
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FONTS=y
+# CONFIG_FONT_8x8 is not set
+CONFIG_FONT_8x16=y
+# CONFIG_FONT_6x11 is not set
+# CONFIG_FONT_PEARL_8x8 is not set
+# CONFIG_FONT_ACORN_8x8 is not set
+# CONFIG_FONT_MINI_4x6 is not set
+# CONFIG_FONT_SUN8x16 is not set
+# CONFIG_FONT_SUN12x22 is not set
+
+#
+# Logo configuration
+#
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+# CONFIG_LOGO_SUPERH_MONO is not set
+# CONFIG_LOGO_SUPERH_VGA16 is not set
+CONFIG_LOGO_SUPERH_CLUT224=y
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB is not set
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+
+#
+# XFS support
+#
+# CONFIG_XFS_FS is not set
+CONFIG_MINIX_FS=y
+CONFIG_ROMFS_FS=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_XATTR is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+
+#
+# Native Language Support
+#
+# CONFIG_NLS is not set
+
+#
+# Profiling support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_SCHEDSTATS=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_FS=y
+CONFIG_FRAME_POINTER=y
+# CONFIG_EARLY_PRINTK is not set
+# CONFIG_DEBUG_KERNEL_WITH_GDB_STUB is not set
+CONFIG_SH64_PROC_TLB=y
+CONFIG_SH64_PROC_ASIDS=y
+CONFIG_SH64_SR_WATCH=y
+# CONFIG_POOR_MANS_STRACE is not set
+# CONFIG_SH_ALPHANUMERIC is not set
+# CONFIG_SH_NO_BSS_INIT is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Hardware crypto devices
+#
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
diff --git a/arch/sh64/kernel/Makefile b/arch/sh64/kernel/Makefile
new file mode 100644
index 0000000..5816657
--- /dev/null
+++ b/arch/sh64/kernel/Makefile
@@ -0,0 +1,36 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2000, 2001  Paolo Alberelli
+# Copyright (C) 2003  Paul Mundt
+#
+# Makefile for the Linux sh64 kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+
+extra-y	:= head.o init_task.o vmlinux.lds
+
+obj-y	:= process.o signal.o entry.o traps.o irq.o irq_intc.o \
+	   ptrace.o setup.o time.o sys_sh64.o semaphore.o sh_ksyms.o \
+	   switchto.o syscalls.o
+
+obj-$(CONFIG_HEARTBEAT)		+= led.o
+obj-$(CONFIG_SH_ALPHANUMERIC)	+= alphanum.o
+obj-$(CONFIG_SH_DMA)		+= dma.o
+obj-$(CONFIG_SH_FPU)		+= fpu.o
+obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+obj-$(CONFIG_KALLSYMS)		+= unwind.o
+obj-$(CONFIG_PCI)		+= pci-dma.o pcibios.o
+obj-$(CONFIG_MODULES)		+= module.o
+
+ifeq ($(CONFIG_PCI),y)
+obj-$(CONFIG_CPU_SH5)		+= pci_sh5.o
+endif
+
+USE_STANDARD_AS_RULE := true
+
diff --git a/arch/sh64/kernel/alphanum.c b/arch/sh64/kernel/alphanum.c
new file mode 100644
index 0000000..56d6f9f
--- /dev/null
+++ b/arch/sh64/kernel/alphanum.c
@@ -0,0 +1,45 @@
+/*
+ * arch/sh64/kernel/alpanum.c
+ *
+ * Copyright (C) 2002 Stuart Menefy <stuart.menefy@st.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Machine-independent functions for handling 8-digit alphanumeric display
+ * (e.g. Agilent HDSP-253x)
+ */
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+
+void mach_alphanum(int pos, unsigned char val);
+void mach_led(int pos, int val);
+
+void print_seg(char *file, int line)
+{
+	int i;
+	unsigned int nibble;
+
+	for (i = 0; i < 5; i++) {
+		mach_alphanum(i, file[i]);
+	}
+
+	for (i = 0; i < 3; i++) {
+		nibble = ((line >> (i * 4)) & 0xf);
+		mach_alphanum(7 - i, nibble + ((nibble > 9) ? 55 : 48));
+	}
+}
+
+void print_seg_num(unsigned num)
+{
+	int i;
+	unsigned int nibble;
+
+	for (i = 0; i < 8; i++) {
+		nibble = ((num >> (i * 4)) & 0xf);
+
+		mach_alphanum(7 - i, nibble + ((nibble > 9) ? 55 : 48));
+	}
+}
+
diff --git a/arch/sh64/kernel/asm-offsets.c b/arch/sh64/kernel/asm-offsets.c
new file mode 100644
index 0000000..ca76537
--- /dev/null
+++ b/arch/sh64/kernel/asm-offsets.c
@@ -0,0 +1,33 @@
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ * We use the technique used in the OSF Mach kernel code:
+ * generate asm statements containing #defines,
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <asm/thread_info.h>
+
+#define DEFINE(sym, val) \
+        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+int main(void)
+{
+	/* offsets into the thread_info struct */
+	DEFINE(TI_TASK,		offsetof(struct thread_info, task));
+	DEFINE(TI_EXEC_DOMAIN,	offsetof(struct thread_info, exec_domain));
+	DEFINE(TI_FLAGS,	offsetof(struct thread_info, flags));
+	DEFINE(TI_PRE_COUNT,	offsetof(struct thread_info, preempt_count));
+	DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
+	DEFINE(TI_ADDR_LIMIT,	offsetof(struct thread_info, addr_limit));
+	DEFINE(TI_RESTART_BLOCK,offsetof(struct thread_info, restart_block));
+
+	return 0;
+}
diff --git a/arch/sh64/kernel/dma.c b/arch/sh64/kernel/dma.c
new file mode 100644
index 0000000..09cd9f4
--- /dev/null
+++ b/arch/sh64/kernel/dma.c
@@ -0,0 +1,297 @@
+/*
+ * arch/sh64/kernel/dma.c
+ *
+ * DMA routines for the SH-5 DMAC.
+ *
+ * Copyright (C) 2003  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <asm/hardware.h>
+#include <asm/dma.h>
+#include <asm/signal.h>
+#include <asm/errno.h>
+#include <asm/io.h>
+
+typedef struct {
+	unsigned long dev_addr;
+	unsigned long mem_addr;
+
+	unsigned int mode;
+	unsigned int count;
+} dma_info_t;
+
+static dma_info_t dma_info[MAX_DMA_CHANNELS];
+static DEFINE_SPINLOCK(dma_spin_lock);
+
+/* arch/sh64/kernel/irq_intc.c */
+extern void make_intc_irq(unsigned int irq);
+
+/* DMAC Interrupts */
+#define DMA_IRQ_DMTE0	18
+#define DMA_IRQ_DERR	22
+
+#define DMAC_COMMON_BASE	(dmac_base + 0x08)
+#define DMAC_SAR_BASE		(dmac_base + 0x10)
+#define DMAC_DAR_BASE		(dmac_base + 0x18)
+#define DMAC_COUNT_BASE		(dmac_base + 0x20)
+#define DMAC_CTRL_BASE		(dmac_base + 0x28)
+#define DMAC_STATUS_BASE	(dmac_base + 0x30)
+
+#define DMAC_SAR(n)	(DMAC_SAR_BASE    + ((n) * 0x28))
+#define DMAC_DAR(n)	(DMAC_DAR_BASE    + ((n) * 0x28))
+#define DMAC_COUNT(n)	(DMAC_COUNT_BASE  + ((n) * 0x28))
+#define DMAC_CTRL(n)	(DMAC_CTRL_BASE   + ((n) * 0x28))
+#define DMAC_STATUS(n)	(DMAC_STATUS_BASE + ((n) * 0x28))
+
+/* DMAC.COMMON Bit Definitions */
+#define DMAC_COMMON_PR	0x00000001	/* Priority */
+					/* Bits 1-2 Reserved */
+#define DMAC_COMMON_ME	0x00000008	/* Master Enable */
+#define DMAC_COMMON_NMI	0x00000010	/* NMI Flag */
+					/* Bits 5-6 Reserved */
+#define DMAC_COMMON_ER	0x00000780	/* Error Response */
+#define DMAC_COMMON_AAE	0x00007800	/* Address Alignment Error */
+					/* Bits 15-63 Reserved */
+
+/* DMAC.SAR Bit Definitions */
+#define DMAC_SAR_ADDR	0xffffffff	/* Source Address */
+
+/* DMAC.DAR Bit Definitions */
+#define DMAC_DAR_ADDR	0xffffffff	/* Destination Address */
+
+/* DMAC.COUNT Bit Definitions */
+#define DMAC_COUNT_CNT	0xffffffff	/* Transfer Count */
+
+/* DMAC.CTRL Bit Definitions */
+#define DMAC_CTRL_TS	0x00000007	/* Transfer Size */
+#define DMAC_CTRL_SI	0x00000018	/* Source Increment */
+#define DMAC_CTRL_DI	0x00000060	/* Destination Increment */
+#define DMAC_CTRL_RS	0x00000780	/* Resource Select */
+#define DMAC_CTRL_IE	0x00000800	/* Interrupt Enable */
+#define DMAC_CTRL_TE	0x00001000	/* Transfer Enable */
+					/* Bits 15-63 Reserved */
+
+/* DMAC.STATUS Bit Definitions */
+#define DMAC_STATUS_TE	0x00000001	/* Transfer End */
+#define DMAC_STATUS_AAE	0x00000002	/* Address Alignment Error */
+					/* Bits 2-63 Reserved */
+
+static unsigned long dmac_base;
+
+void set_dma_count(unsigned int chan, unsigned int count);
+void set_dma_addr(unsigned int chan, unsigned int addr);
+
+static irqreturn_t dma_mte(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned int chan = irq - DMA_IRQ_DMTE0;
+	dma_info_t *info = dma_info + chan;
+	u64 status;
+
+	if (info->mode & DMA_MODE_WRITE) {
+		sh64_out64(info->mem_addr & DMAC_SAR_ADDR, DMAC_SAR(chan));
+	} else {
+		sh64_out64(info->mem_addr & DMAC_DAR_ADDR, DMAC_DAR(chan));
+	}
+
+	set_dma_count(chan, info->count);
+
+	/* Clear the TE bit */
+	status = sh64_in64(DMAC_STATUS(chan));
+	status &= ~DMAC_STATUS_TE;
+	sh64_out64(status, DMAC_STATUS(chan));
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_dmte = {
+	.handler	= dma_mte,
+	.flags		= SA_INTERRUPT,
+	.name		= "DMA MTE",
+};
+
+static irqreturn_t dma_err(int irq, void *dev_id, struct pt_regs *regs)
+{
+	u64 tmp;
+	u8 chan;
+
+	printk(KERN_NOTICE "DMAC: Got a DMA Error!\n");
+
+	tmp = sh64_in64(DMAC_COMMON_BASE);
+
+	/* Check for the type of error */
+	if ((chan = tmp & DMAC_COMMON_AAE)) {
+		/* It's an address alignment error.. */
+		printk(KERN_NOTICE "DMAC: Alignment error on channel %d, ", chan);
+
+		printk(KERN_NOTICE "SAR: 0x%08llx, DAR: 0x%08llx, COUNT: %lld\n",
+		       (sh64_in64(DMAC_SAR(chan)) & DMAC_SAR_ADDR),
+		       (sh64_in64(DMAC_DAR(chan)) & DMAC_DAR_ADDR),
+		       (sh64_in64(DMAC_COUNT(chan)) & DMAC_COUNT_CNT));
+
+	} else if ((chan = tmp & DMAC_COMMON_ER)) {
+		/* Something else went wrong.. */
+		printk(KERN_NOTICE "DMAC: Error on channel %d\n", chan);
+	}
+
+	/* Reset the ME bit to clear the interrupt */
+	tmp |= DMAC_COMMON_ME;
+	sh64_out64(tmp, DMAC_COMMON_BASE);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_derr = {
+	.handler	= dma_err,
+	.flags		= SA_INTERRUPT,
+	.name		= "DMA Error",
+};
+
+static inline unsigned long calc_xmit_shift(unsigned int chan)
+{
+	return sh64_in64(DMAC_CTRL(chan)) & 0x03;
+}
+
+void setup_dma(unsigned int chan, dma_info_t *info)
+{
+	unsigned int irq = DMA_IRQ_DMTE0 + chan;
+	dma_info_t *dma = dma_info + chan;
+
+	make_intc_irq(irq);
+	setup_irq(irq, &irq_dmte);
+	dma = info;
+}
+
+void enable_dma(unsigned int chan)
+{
+	u64 ctrl;
+
+	ctrl = sh64_in64(DMAC_CTRL(chan));
+	ctrl |= DMAC_CTRL_TE;
+	sh64_out64(ctrl, DMAC_CTRL(chan));
+}
+
+void disable_dma(unsigned int chan)
+{
+	u64 ctrl;
+
+	ctrl = sh64_in64(DMAC_CTRL(chan));
+	ctrl &= ~DMAC_CTRL_TE;
+	sh64_out64(ctrl, DMAC_CTRL(chan));
+}
+
+void set_dma_mode(unsigned int chan, char mode)
+{
+	dma_info_t *info = dma_info + chan;
+
+	info->mode = mode;
+
+	set_dma_addr(chan, info->mem_addr);
+	set_dma_count(chan, info->count);
+}
+
+void set_dma_addr(unsigned int chan, unsigned int addr)
+{
+	dma_info_t *info = dma_info + chan;
+	unsigned long sar, dar;
+
+	info->mem_addr = addr;
+	sar = (info->mode & DMA_MODE_WRITE) ? info->mem_addr : info->dev_addr;
+	dar = (info->mode & DMA_MODE_WRITE) ? info->dev_addr : info->mem_addr;
+
+	sh64_out64(sar & DMAC_SAR_ADDR, DMAC_SAR(chan));
+	sh64_out64(dar & DMAC_SAR_ADDR, DMAC_DAR(chan));
+}
+
+void set_dma_count(unsigned int chan, unsigned int count)
+{
+	dma_info_t *info = dma_info + chan;
+	u64 tmp;
+
+	info->count = count;
+
+	tmp = (info->count >> calc_xmit_shift(chan)) & DMAC_COUNT_CNT;
+
+	sh64_out64(tmp, DMAC_COUNT(chan));
+}
+
+unsigned long claim_dma_lock(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dma_spin_lock, flags);
+
+	return flags;
+}
+
+void release_dma_lock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+int get_dma_residue(unsigned int chan)
+{
+	return sh64_in64(DMAC_COUNT(chan) << calc_xmit_shift(chan));
+}
+
+int __init init_dma(void)
+{
+	struct vcr_info vcr;
+	u64 tmp;
+
+	/* Remap the DMAC */
+	dmac_base = onchip_remap(PHYS_DMAC_BLOCK, 1024, "DMAC");
+	if (!dmac_base) {
+		printk(KERN_ERR "Unable to remap DMAC\n");
+		return -ENOMEM;
+	}
+
+	/* Report DMAC.VCR Info */
+	vcr = sh64_get_vcr_info(dmac_base);
+	printk("DMAC: Module ID: 0x%04x, Module version: 0x%04x\n",
+	       vcr.mod_id, vcr.mod_vers);
+
+	/* Set the ME bit */
+	tmp = sh64_in64(DMAC_COMMON_BASE);
+	tmp |= DMAC_COMMON_ME;
+	sh64_out64(tmp, DMAC_COMMON_BASE);
+
+	/* Enable the DMAC Error Interrupt */
+	make_intc_irq(DMA_IRQ_DERR);
+	setup_irq(DMA_IRQ_DERR, &irq_derr);
+
+	return 0;
+}
+
+static void __exit exit_dma(void)
+{
+	onchip_unmap(dmac_base);
+	free_irq(DMA_IRQ_DERR, 0);
+}
+
+module_init(init_dma);
+module_exit(exit_dma);
+
+MODULE_AUTHOR("Paul Mundt");
+MODULE_DESCRIPTION("DMA API for SH-5 DMAC");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(setup_dma);
+EXPORT_SYMBOL(claim_dma_lock);
+EXPORT_SYMBOL(release_dma_lock);
+EXPORT_SYMBOL(enable_dma);
+EXPORT_SYMBOL(disable_dma);
+EXPORT_SYMBOL(set_dma_mode);
+EXPORT_SYMBOL(set_dma_addr);
+EXPORT_SYMBOL(set_dma_count);
+EXPORT_SYMBOL(get_dma_residue);
+
diff --git a/arch/sh64/kernel/early_printk.c b/arch/sh64/kernel/early_printk.c
new file mode 100644
index 0000000..8c8a76e
--- /dev/null
+++ b/arch/sh64/kernel/early_printk.c
@@ -0,0 +1,105 @@
+/*
+ * arch/sh64/kernel/early_printk.c
+ *
+ * SH-5 Early SCIF console (cloned and hacked from sh implementation)
+ *
+ * Copyright (C) 2003, 2004  Paul Mundt <lethal@linux-sh.org>
+ * Copyright (C) 2002  M. R. Brown <mrbrown@0xd6.org>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/console.h>
+#include <linux/tty.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/hardware.h>
+
+#define SCIF_BASE_ADDR	0x01030000
+#define SCIF_ADDR_SH5	PHYS_PERIPHERAL_BLOCK+SCIF_BASE_ADDR
+
+/*
+ * Fixed virtual address where SCIF is mapped (should already be done
+ * in arch/sh64/kernel/head.S!).
+ */
+#define SCIF_REG	0xfa030000
+
+enum {
+	SCIF_SCSMR2	= SCIF_REG + 0x00,
+	SCIF_SCBRR2	= SCIF_REG + 0x04,
+	SCIF_SCSCR2	= SCIF_REG + 0x08,
+	SCIF_SCFTDR2	= SCIF_REG + 0x0c,
+	SCIF_SCFSR2	= SCIF_REG + 0x10,
+	SCIF_SCFRDR2	= SCIF_REG + 0x14,
+	SCIF_SCFCR2	= SCIF_REG + 0x18,
+	SCIF_SCFDR2	= SCIF_REG + 0x1c,
+	SCIF_SCSPTR2	= SCIF_REG + 0x20,
+	SCIF_SCLSR2	= SCIF_REG + 0x24,
+};
+
+static void sh_console_putc(int c)
+{
+	while (!(ctrl_inw(SCIF_SCFSR2) & 0x20))
+		cpu_relax();
+
+	ctrl_outb(c, SCIF_SCFTDR2);
+	ctrl_outw((ctrl_inw(SCIF_SCFSR2) & 0x9f), SCIF_SCFSR2);
+
+	if (c == '\n')
+		sh_console_putc('\r');
+}
+
+static void sh_console_flush(void)
+{
+	ctrl_outw((ctrl_inw(SCIF_SCFSR2) & 0xbf), SCIF_SCFSR2);
+
+	while (!(ctrl_inw(SCIF_SCFSR2) & 0x40))
+		cpu_relax();
+
+	ctrl_outw((ctrl_inw(SCIF_SCFSR2) & 0xbf), SCIF_SCFSR2);
+}
+
+static void sh_console_write(struct console *con, const char *s, unsigned count)
+{
+	while (count-- > 0)
+		sh_console_putc(*s++);
+
+	sh_console_flush();
+}
+
+static int __init sh_console_setup(struct console *con, char *options)
+{
+	con->cflag = CREAD | HUPCL | CLOCAL | B19200 | CS8;
+
+	return 0;
+}
+
+static struct console sh_console = {
+	.name		= "scifcon",
+	.write		= sh_console_write,
+	.setup		= sh_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+};
+
+void __init enable_early_printk(void)
+{
+	ctrl_outb(0x2a, SCIF_SCBRR2);	/* 19200bps */
+
+	ctrl_outw(0x04, SCIF_SCFCR2);	/* Reset TFRST */
+	ctrl_outw(0x10, SCIF_SCFCR2);	/* TTRG0=1 */
+
+	ctrl_outw(0, SCIF_SCSPTR2);
+	ctrl_outw(0x60, SCIF_SCFSR2);
+	ctrl_outw(0, SCIF_SCLSR2);
+	ctrl_outw(0x30, SCIF_SCSCR2);
+
+	register_console(&sh_console);
+}
+
+void disable_early_printk(void)
+{
+	unregister_console(&sh_console);
+}
+
diff --git a/arch/sh64/kernel/entry.S b/arch/sh64/kernel/entry.S
new file mode 100644
index 0000000..2e2cfe2
--- /dev/null
+++ b/arch/sh64/kernel/entry.S
@@ -0,0 +1,2103 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/entry.S
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2004, 2005  Paul Mundt
+ * Copyright (C) 2003, 2004 Richard Curnow
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sys.h>
+
+#include <asm/processor.h>
+#include <asm/registers.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * SR fields.
+ */
+#define SR_ASID_MASK	0x00ff0000
+#define SR_FD_MASK	0x00008000
+#define SR_SS		0x08000000
+#define SR_BL		0x10000000
+#define SR_MD		0x40000000
+
+/*
+ * Event code.
+ */
+#define	EVENT_INTERRUPT		0
+#define	EVENT_FAULT_TLB		1
+#define	EVENT_FAULT_NOT_TLB	2
+#define	EVENT_DEBUG		3
+
+/* EXPEVT values */
+#define	RESET_CAUSE		0x20
+#define DEBUGSS_CAUSE		0x980
+
+/*
+ * Frame layout. Quad index.
+ */
+#define	FRAME_T(x)	FRAME_TBASE+(x*8)
+#define	FRAME_R(x)	FRAME_RBASE+(x*8)
+#define	FRAME_S(x)	FRAME_SBASE+(x*8)
+#define FSPC		0
+#define FSSR		1
+#define FSYSCALL_ID	2
+
+/* Arrange the save frame to be a multiple of 32 bytes long */
+#define FRAME_SBASE	0
+#define FRAME_RBASE	(FRAME_SBASE+(3*8))	/* SYSCALL_ID - SSR - SPC */
+#define FRAME_TBASE	(FRAME_RBASE+(63*8))	/* r0 - r62 */
+#define FRAME_PBASE	(FRAME_TBASE+(8*8))	/* tr0 -tr7 */
+#define	FRAME_SIZE	(FRAME_PBASE+(2*8))	/* pad0-pad1 */
+
+#define FP_FRAME_SIZE	FP_FRAME_BASE+(33*8)	/* dr0 - dr31 + fpscr */
+#define FP_FRAME_BASE	0
+
+#define	SAVED_R2	0*8
+#define	SAVED_R3	1*8
+#define	SAVED_R4	2*8
+#define	SAVED_R5	3*8
+#define	SAVED_R18	4*8
+#define	SAVED_R6	5*8
+#define	SAVED_TR0	6*8
+
+/* These are the registers saved in the TLB path that aren't saved in the first
+   level of the normal one. */
+#define	TLB_SAVED_R25	7*8
+#define	TLB_SAVED_TR1	8*8
+#define	TLB_SAVED_TR2	9*8
+#define	TLB_SAVED_TR3	10*8
+#define	TLB_SAVED_TR4	11*8
+/* Save R0/R1 : PT-migrating compiler currently dishounours -ffixed-r0 and -ffixed-r1 causing
+   breakage otherwise. */
+#define	TLB_SAVED_R0	12*8
+#define	TLB_SAVED_R1	13*8
+
+#define CLI()				\
+	getcon	SR, r6;			\
+	ori	r6, 0xf0, r6;		\
+	putcon	r6, SR;
+
+#define STI()				\
+	getcon	SR, r6;			\
+	andi	r6, ~0xf0, r6;		\
+	putcon	r6, SR;
+
+#ifdef CONFIG_PREEMPT
+#  define preempt_stop()	CLI()
+#else
+#  define preempt_stop()
+#  define resume_kernel		restore_all
+#endif
+
+	.section	.data, "aw"
+
+#define FAST_TLBMISS_STACK_CACHELINES 4
+#define FAST_TLBMISS_STACK_QUADWORDS (4*FAST_TLBMISS_STACK_CACHELINES)
+
+/* Register back-up area for all exceptions */
+	.balign	32
+	/* Allow for 16 quadwords to be pushed by fast tlbmiss handling
+	 * register saves etc. */
+	.fill FAST_TLBMISS_STACK_QUADWORDS, 8, 0x0
+/* This is 32 byte aligned by construction */
+/* Register back-up area for all exceptions */
+reg_save_area:
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+
+	.quad	0
+	.quad   0
+
+/* Save area for RESVEC exceptions. We cannot use reg_save_area because of
+ * reentrancy. Note this area may be accessed via physical address.
+ * Align so this fits a whole single cache line, for ease of purging.
+ */
+	.balign 32,0,32
+resvec_save_area:
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.balign 32,0,32
+
+/* Jump table of 3rd level handlers  */
+trap_jtable:
+	.long	do_exception_error		/* 0x000 */
+	.long	do_exception_error		/* 0x020 */
+	.long	tlb_miss_load				/* 0x040 */
+	.long	tlb_miss_store				/* 0x060 */
+	! ARTIFICIAL pseudo-EXPEVT setting
+	.long	do_debug_interrupt		/* 0x080 */
+	.long	tlb_miss_load				/* 0x0A0 */
+	.long	tlb_miss_store				/* 0x0C0 */
+	.long	do_address_error_load	/* 0x0E0 */
+	.long	do_address_error_store	/* 0x100 */
+#ifdef CONFIG_SH_FPU
+	.long	do_fpu_error		/* 0x120 */
+#else
+	.long	do_exception_error		/* 0x120 */
+#endif
+	.long	do_exception_error		/* 0x140 */
+	.long	system_call				/* 0x160 */
+	.long	do_reserved_inst		/* 0x180 */
+	.long	do_illegal_slot_inst	/* 0x1A0 */
+	.long	do_NMI			/* 0x1C0 */
+	.long	do_exception_error		/* 0x1E0 */
+	.rept 15
+		.long do_IRQ		/* 0x200 - 0x3C0 */
+	.endr
+	.long	do_exception_error		/* 0x3E0 */
+	.rept 32
+		.long do_IRQ		/* 0x400 - 0x7E0 */
+	.endr
+	.long	fpu_error_or_IRQA			/* 0x800 */
+	.long	fpu_error_or_IRQB			/* 0x820 */
+	.long	do_IRQ			/* 0x840 */
+	.long	do_IRQ			/* 0x860 */
+	.rept 6
+		.long do_exception_error	/* 0x880 - 0x920 */
+	.endr
+	.long	do_software_break_point	/* 0x940 */
+	.long	do_exception_error		/* 0x960 */
+	.long	do_single_step		/* 0x980 */
+
+	.rept 3
+		.long do_exception_error	/* 0x9A0 - 0x9E0 */
+	.endr
+	.long	do_IRQ			/* 0xA00 */
+	.long	do_IRQ			/* 0xA20 */
+	.long	itlb_miss_or_IRQ			/* 0xA40 */
+	.long	do_IRQ			/* 0xA60 */
+	.long	do_IRQ			/* 0xA80 */
+	.long	itlb_miss_or_IRQ			/* 0xAA0 */
+	.long	do_exception_error		/* 0xAC0 */
+	.long	do_address_error_exec	/* 0xAE0 */
+	.rept 8
+		.long do_exception_error	/* 0xB00 - 0xBE0 */
+	.endr
+	.rept 18
+		.long do_IRQ		/* 0xC00 - 0xE20 */
+	.endr
+
+	.section	.text64, "ax"
+
+/*
+ * --- Exception/Interrupt/Event Handling Section
+ */
+
+/*
+ * VBR and RESVEC blocks.
+ *
+ * First level handler for VBR-based exceptions.
+ *
+ * To avoid waste of space, align to the maximum text block size.
+ * This is assumed to be at most 128 bytes or 32 instructions.
+ * DO NOT EXCEED 32 instructions on the first level handlers !
+ *
+ * Also note that RESVEC is contained within the VBR block
+ * where the room left (1KB - TEXT_SIZE) allows placing
+ * the RESVEC block (at most 512B + TEXT_SIZE).
+ *
+ * So first (and only) level handler for RESVEC-based exceptions.
+ *
+ * Where the fault/interrupt is handled (not_a_tlb_miss, tlb_miss
+ * and interrupt) we are a lot tight with register space until
+ * saving onto the stack frame, which is done in handle_exception().
+ *
+ */
+
+#define	TEXT_SIZE 	128
+#define	BLOCK_SIZE 	1664 		/* Dynamic check, 13*128 */
+
+	.balign TEXT_SIZE
+LVBR_block:
+	.space	256, 0			/* Power-on class handler, */
+					/* not required here       */
+not_a_tlb_miss:
+	synco	/* TAKum03020 (but probably a good idea anyway.) */
+	/* Save original stack pointer into KCR1 */
+	putcon	SP, KCR1
+
+	/* Save other original registers into reg_save_area */
+        movi  reg_save_area, SP
+	st.q	SP, SAVED_R2, r2
+	st.q	SP, SAVED_R3, r3
+	st.q	SP, SAVED_R4, r4
+	st.q	SP, SAVED_R5, r5
+	st.q	SP, SAVED_R6, r6
+	st.q	SP, SAVED_R18, r18
+	gettr	tr0, r3
+	st.q	SP, SAVED_TR0, r3
+
+	/* Set args for Non-debug, Not a TLB miss class handler */
+	getcon	EXPEVT, r2
+	movi	ret_from_exception, r3
+	ori	r3, 1, r3
+	movi	EVENT_FAULT_NOT_TLB, r4
+	or	SP, ZERO, r5
+	getcon	KCR1, SP
+	pta	handle_exception, tr0
+	blink	tr0, ZERO
+
+	.balign 256
+	! VBR+0x200
+	nop
+	.balign 256
+	! VBR+0x300
+	nop
+	.balign 256
+	/*
+	 * Instead of the natural .balign 1024 place RESVEC here
+	 * respecting the final 1KB alignment.
+	 */
+	.balign TEXT_SIZE
+	/*
+	 * Instead of '.space 1024-TEXT_SIZE' place the RESVEC
+	 * block making sure the final alignment is correct.
+	 */
+tlb_miss:
+	synco	/* TAKum03020 (but probably a good idea anyway.) */
+	putcon	SP, KCR1
+	movi	reg_save_area, SP
+	/* SP is guaranteed 32-byte aligned. */
+	st.q	SP, TLB_SAVED_R0 , r0
+	st.q	SP, TLB_SAVED_R1 , r1
+	st.q	SP, SAVED_R2 , r2
+	st.q	SP, SAVED_R3 , r3
+	st.q	SP, SAVED_R4 , r4
+	st.q	SP, SAVED_R5 , r5
+	st.q	SP, SAVED_R6 , r6
+	st.q	SP, SAVED_R18, r18
+
+	/* Save R25 for safety; as/ld may want to use it to achieve the call to
+	 * the code in mm/tlbmiss.c */
+	st.q	SP, TLB_SAVED_R25, r25
+	gettr	tr0, r2
+	gettr	tr1, r3
+	gettr	tr2, r4
+	gettr	tr3, r5
+	gettr	tr4, r18
+	st.q	SP, SAVED_TR0 , r2
+	st.q	SP, TLB_SAVED_TR1 , r3
+	st.q	SP, TLB_SAVED_TR2 , r4
+	st.q	SP, TLB_SAVED_TR3 , r5
+	st.q	SP, TLB_SAVED_TR4 , r18
+
+	pt	do_fast_page_fault, tr0
+	getcon	SSR, r2
+	getcon	EXPEVT, r3
+	getcon	TEA, r4
+	shlri	r2, 30, r2
+	andi	r2, 1, r2	/* r2 = SSR.MD */
+	blink 	tr0, LINK
+
+	pt	fixup_to_invoke_general_handler, tr1
+
+	/* If the fast path handler fixed the fault, just drop through quickly
+	   to the restore code right away to return to the excepting context.
+	   */
+	beqi/u	r2, 0, tr1
+
+fast_tlb_miss_restore:
+	ld.q	SP, SAVED_TR0, r2
+	ld.q	SP, TLB_SAVED_TR1, r3
+	ld.q	SP, TLB_SAVED_TR2, r4
+
+	ld.q	SP, TLB_SAVED_TR3, r5
+	ld.q	SP, TLB_SAVED_TR4, r18
+
+	ptabs	r2, tr0
+	ptabs	r3, tr1
+	ptabs	r4, tr2
+	ptabs	r5, tr3
+	ptabs	r18, tr4
+
+	ld.q	SP, TLB_SAVED_R0, r0
+	ld.q	SP, TLB_SAVED_R1, r1
+	ld.q	SP, SAVED_R2, r2
+	ld.q	SP, SAVED_R3, r3
+	ld.q	SP, SAVED_R4, r4
+	ld.q	SP, SAVED_R5, r5
+	ld.q	SP, SAVED_R6, r6
+	ld.q	SP, SAVED_R18, r18
+	ld.q	SP, TLB_SAVED_R25, r25
+
+	getcon	KCR1, SP
+	rte
+	nop /* for safety, in case the code is run on sh5-101 cut1.x */
+
+fixup_to_invoke_general_handler:
+
+	/* OK, new method.  Restore stuff that's not expected to get saved into
+	   the 'first-level' reg save area, then just fall through to setting
+	   up the registers and calling the second-level handler. */
+
+	/* 2nd level expects r2,3,4,5,6,18,tr0 to be saved.  So we must restore
+	   r25,tr1-4 and save r6 to get into the right state.  */
+
+	ld.q	SP, TLB_SAVED_TR1, r3
+	ld.q	SP, TLB_SAVED_TR2, r4
+	ld.q	SP, TLB_SAVED_TR3, r5
+	ld.q	SP, TLB_SAVED_TR4, r18
+	ld.q	SP, TLB_SAVED_R25, r25
+
+	ld.q	SP, TLB_SAVED_R0, r0
+	ld.q	SP, TLB_SAVED_R1, r1
+
+	ptabs/u	r3, tr1
+	ptabs/u	r4, tr2
+	ptabs/u	r5, tr3
+	ptabs/u	r18, tr4
+
+	/* Set args for Non-debug, TLB miss class handler */
+	getcon	EXPEVT, r2
+	movi	ret_from_exception, r3
+	ori	r3, 1, r3
+	movi	EVENT_FAULT_TLB, r4
+	or	SP, ZERO, r5
+	getcon	KCR1, SP
+	pta	handle_exception, tr0
+	blink	tr0, ZERO
+
+/* NB TAKE GREAT CARE HERE TO ENSURE THAT THE INTERRUPT CODE
+   DOES END UP AT VBR+0x600 */
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	.balign 256
+	/* VBR + 0x600 */
+
+interrupt:
+	synco	/* TAKum03020 (but probably a good idea anyway.) */
+	/* Save original stack pointer into KCR1 */
+	putcon	SP, KCR1
+
+	/* Save other original registers into reg_save_area */
+        movi  reg_save_area, SP
+	st.q	SP, SAVED_R2, r2
+	st.q	SP, SAVED_R3, r3
+	st.q	SP, SAVED_R4, r4
+	st.q	SP, SAVED_R5, r5
+	st.q	SP, SAVED_R6, r6
+	st.q	SP, SAVED_R18, r18
+	gettr	tr0, r3
+	st.q	SP, SAVED_TR0, r3
+
+	/* Set args for interrupt class handler */
+	getcon	INTEVT, r2
+	movi	ret_from_irq, r3
+	ori	r3, 1, r3
+	movi	EVENT_INTERRUPT, r4
+	or	SP, ZERO, r5
+	getcon	KCR1, SP
+	pta	handle_exception, tr0
+	blink	tr0, ZERO
+	.balign	TEXT_SIZE		/* let's waste the bare minimum */
+
+LVBR_block_end:				/* Marker. Used for total checking */
+
+	.balign 256
+LRESVEC_block:
+	/* Panic handler. Called with MMU off. Possible causes/actions:
+	 * - Reset:		Jump to program start.
+	 * - Single Step:	Turn off Single Step & return.
+	 * - Others:		Call panic handler, passing PC as arg.
+	 *			(this may need to be extended...)
+	 */
+reset_or_panic:
+	synco	/* TAKum03020 (but probably a good idea anyway.) */
+	putcon	SP, DCR
+	/* First save r0-1 and tr0, as we need to use these */
+	movi	resvec_save_area-CONFIG_CACHED_MEMORY_OFFSET, SP
+	st.q	SP, 0, r0
+	st.q	SP, 8, r1
+	gettr	tr0, r0
+	st.q	SP, 32, r0
+
+	/* Check cause */
+	getcon	EXPEVT, r0
+	movi	RESET_CAUSE, r1
+	sub	r1, r0, r1		/* r1=0 if reset */
+	movi	_stext-CONFIG_CACHED_MEMORY_OFFSET, r0
+	ori	r0, 1, r0
+	ptabs	r0, tr0
+	beqi	r1, 0, tr0		/* Jump to start address if reset */
+
+	getcon	EXPEVT, r0
+	movi	DEBUGSS_CAUSE, r1
+	sub	r1, r0, r1		/* r1=0 if single step */
+	pta	single_step_panic, tr0
+	beqi	r1, 0, tr0		/* jump if single step */
+
+	/* Now jump to where we save the registers. */
+	movi	panic_stash_regs-CONFIG_CACHED_MEMORY_OFFSET, r1
+	ptabs	r1, tr0
+	blink	tr0, r63
+
+single_step_panic:
+	/* We are in a handler with Single Step set. We need to resume the
+	 * handler, by turning on MMU & turning off Single Step. */
+	getcon	SSR, r0
+	movi	SR_MMU, r1
+	or	r0, r1, r0
+	movi	~SR_SS, r1
+	and	r0, r1, r0
+	putcon	r0, SSR
+	/* Restore EXPEVT, as the rte won't do this */
+	getcon	PEXPEVT, r0
+	putcon	r0, EXPEVT
+	/* Restore regs */
+	ld.q	SP, 32, r0
+	ptabs	r0, tr0
+	ld.q	SP, 0, r0
+	ld.q	SP, 8, r1
+	getcon	DCR, SP
+	synco
+	rte
+
+
+	.balign	256
+debug_exception:
+	synco	/* TAKum03020 (but probably a good idea anyway.) */
+	/*
+	 * Single step/software_break_point first level handler.
+	 * Called with MMU off, so the first thing we do is enable it
+	 * by doing an rte with appropriate SSR.
+	 */
+	putcon	SP, DCR
+	/* Save SSR & SPC, together with R0 & R1, as we need to use 2 regs. */
+	movi	resvec_save_area-CONFIG_CACHED_MEMORY_OFFSET, SP
+
+	/* With the MMU off, we are bypassing the cache, so purge any
+         * data that will be made stale by the following stores.
+         */
+	ocbp	SP, 0
+	synco
+
+	st.q	SP, 0, r0
+	st.q	SP, 8, r1
+	getcon	SPC, r0
+	st.q	SP, 16, r0
+	getcon	SSR, r0
+	st.q	SP, 24, r0
+
+	/* Enable MMU, block exceptions, set priv mode, disable single step */
+	movi	SR_MMU | SR_BL | SR_MD, r1
+	or	r0, r1, r0
+	movi	~SR_SS, r1
+	and	r0, r1, r0
+	putcon	r0, SSR
+	/* Force control to debug_exception_2 when rte is executed */
+	movi	debug_exeception_2, r0
+	ori	r0, 1, r0      /* force SHmedia, just in case */
+	putcon	r0, SPC
+	getcon	DCR, SP
+	synco
+	rte
+debug_exeception_2:
+	/* Restore saved regs */
+	putcon	SP, KCR1
+	movi	resvec_save_area, SP
+	ld.q	SP, 24, r0
+	putcon	r0, SSR
+	ld.q	SP, 16, r0
+	putcon	r0, SPC
+	ld.q	SP, 0, r0
+	ld.q	SP, 8, r1
+
+	/* Save other original registers into reg_save_area */
+        movi  reg_save_area, SP
+	st.q	SP, SAVED_R2, r2
+	st.q	SP, SAVED_R3, r3
+	st.q	SP, SAVED_R4, r4
+	st.q	SP, SAVED_R5, r5
+	st.q	SP, SAVED_R6, r6
+	st.q	SP, SAVED_R18, r18
+	gettr	tr0, r3
+	st.q	SP, SAVED_TR0, r3
+
+	/* Set args for debug class handler */
+	getcon	EXPEVT, r2
+	movi	ret_from_exception, r3
+	ori	r3, 1, r3
+	movi	EVENT_DEBUG, r4
+	or	SP, ZERO, r5
+	getcon	KCR1, SP
+	pta	handle_exception, tr0
+	blink	tr0, ZERO
+
+	.balign	256
+debug_interrupt:
+	/* !!! WE COME HERE IN REAL MODE !!! */
+	/* Hook-up debug interrupt to allow various debugging options to be
+	 * hooked into its handler. */
+	/* Save original stack pointer into KCR1 */
+	synco
+	putcon	SP, KCR1
+	movi	resvec_save_area-CONFIG_CACHED_MEMORY_OFFSET, SP
+	ocbp	SP, 0
+	ocbp	SP, 32
+	synco
+
+	/* Save other original registers into reg_save_area thru real addresses */
+	st.q	SP, SAVED_R2, r2
+	st.q	SP, SAVED_R3, r3
+	st.q	SP, SAVED_R4, r4
+	st.q	SP, SAVED_R5, r5
+	st.q	SP, SAVED_R6, r6
+	st.q	SP, SAVED_R18, r18
+	gettr	tr0, r3
+	st.q	SP, SAVED_TR0, r3
+
+	/* move (spc,ssr)->(pspc,pssr).  The rte will shift
+	   them back again, so that they look like the originals
+	   as far as the real handler code is concerned. */
+	getcon	spc, r6
+	putcon	r6, pspc
+	getcon	ssr, r6
+	putcon	r6, pssr
+
+	! construct useful SR for handle_exception
+	movi	3, r6
+	shlli	r6, 30, r6
+	getcon	sr, r18
+	or	r18, r6, r6
+	putcon	r6, ssr
+
+	! SSR is now the current SR with the MD and MMU bits set
+	! i.e. the rte will switch back to priv mode and put
+	! the mmu back on
+
+	! construct spc
+	movi	handle_exception, r18
+	ori	r18, 1, r18		! for safety (do we need this?)
+	putcon	r18, spc
+
+	/* Set args for Non-debug, Not a TLB miss class handler */
+
+	! EXPEVT==0x80 is unused, so 'steal' this value to put the
+	! debug interrupt handler in the vectoring table
+	movi	0x80, r2
+	movi	ret_from_exception, r3
+	ori	r3, 1, r3
+	movi	EVENT_FAULT_NOT_TLB, r4
+
+	or	SP, ZERO, r5
+	movi	CONFIG_CACHED_MEMORY_OFFSET, r6
+	add	r6, r5, r5
+	getcon	KCR1, SP
+
+	synco	! for safety
+	rte	! -> handle_exception, switch back to priv mode again
+
+LRESVEC_block_end:			/* Marker. Unused. */
+
+	.balign	TEXT_SIZE
+
+/*
+ * Second level handler for VBR-based exceptions. Pre-handler.
+ * In common to all stack-frame sensitive handlers.
+ *
+ * Inputs:
+ * (KCR0) Current [current task union]
+ * (KCR1) Original SP
+ * (r2)   INTEVT/EXPEVT
+ * (r3)   appropriate return address
+ * (r4)   Event (0 = interrupt, 1 = TLB miss fault, 2 = Not TLB miss fault, 3=debug)
+ * (r5)   Pointer to reg_save_area
+ * (SP)   Original SP
+ *
+ * Available registers:
+ * (r6)
+ * (r18)
+ * (tr0)
+ *
+ */
+handle_exception:
+	/* Common 2nd level handler. */
+
+	/* First thing we need an appropriate stack pointer */
+	getcon	SSR, r6
+	shlri	r6, 30, r6
+	andi	r6, 1, r6
+	pta	stack_ok, tr0
+	bne	r6, ZERO, tr0		/* Original stack pointer is fine */
+
+	/* Set stack pointer for user fault */
+	getcon	KCR0, SP
+	movi	THREAD_SIZE, r6		/* Point to the end */
+	add	SP, r6, SP
+
+stack_ok:
+
+/* DEBUG : check for underflow/overflow of the kernel stack */
+	pta	no_underflow, tr0
+	getcon  KCR0, r6
+	movi	1024, r18
+	add	r6, r18, r6
+	bge	SP, r6, tr0 	! ? below 1k from bottom of stack : danger zone
+
+/* Just panic to cause a crash. */
+bad_sp:
+	ld.b	r63, 0, r6
+	nop
+
+no_underflow:
+	pta	bad_sp, tr0
+	getcon	kcr0, r6
+	movi	THREAD_SIZE, r18
+	add	r18, r6, r6
+	bgt	SP, r6, tr0	! sp above the stack
+
+	/* Make some room for the BASIC frame. */
+	movi	-(FRAME_SIZE), r6
+	add	SP, r6, SP
+
+/* Could do this with no stalling if we had another spare register, but the
+   code below will be OK. */
+	ld.q	r5, SAVED_R2, r6
+	ld.q	r5, SAVED_R3, r18
+	st.q	SP, FRAME_R(2), r6
+	ld.q	r5, SAVED_R4, r6
+	st.q	SP, FRAME_R(3), r18
+	ld.q	r5, SAVED_R5, r18
+	st.q	SP, FRAME_R(4), r6
+	ld.q	r5, SAVED_R6, r6
+	st.q	SP, FRAME_R(5), r18
+	ld.q	r5, SAVED_R18, r18
+	st.q	SP, FRAME_R(6), r6
+	ld.q	r5, SAVED_TR0, r6
+	st.q	SP, FRAME_R(18), r18
+	st.q	SP, FRAME_T(0), r6
+
+	/* Keep old SP around */
+	getcon	KCR1, r6
+
+	/* Save the rest of the general purpose registers */
+	st.q	SP, FRAME_R(0), r0
+	st.q	SP, FRAME_R(1), r1
+	st.q	SP, FRAME_R(7), r7
+	st.q	SP, FRAME_R(8), r8
+	st.q	SP, FRAME_R(9), r9
+	st.q	SP, FRAME_R(10), r10
+	st.q	SP, FRAME_R(11), r11
+	st.q	SP, FRAME_R(12), r12
+	st.q	SP, FRAME_R(13), r13
+	st.q	SP, FRAME_R(14), r14
+
+	/* SP is somewhere else */
+	st.q	SP, FRAME_R(15), r6
+
+	st.q	SP, FRAME_R(16), r16
+	st.q	SP, FRAME_R(17), r17
+	/* r18 is saved earlier. */
+	st.q	SP, FRAME_R(19), r19
+	st.q	SP, FRAME_R(20), r20
+	st.q	SP, FRAME_R(21), r21
+	st.q	SP, FRAME_R(22), r22
+	st.q	SP, FRAME_R(23), r23
+	st.q	SP, FRAME_R(24), r24
+	st.q	SP, FRAME_R(25), r25
+	st.q	SP, FRAME_R(26), r26
+	st.q	SP, FRAME_R(27), r27
+	st.q	SP, FRAME_R(28), r28
+	st.q	SP, FRAME_R(29), r29
+	st.q	SP, FRAME_R(30), r30
+	st.q	SP, FRAME_R(31), r31
+	st.q	SP, FRAME_R(32), r32
+	st.q	SP, FRAME_R(33), r33
+	st.q	SP, FRAME_R(34), r34
+	st.q	SP, FRAME_R(35), r35
+	st.q	SP, FRAME_R(36), r36
+	st.q	SP, FRAME_R(37), r37
+	st.q	SP, FRAME_R(38), r38
+	st.q	SP, FRAME_R(39), r39
+	st.q	SP, FRAME_R(40), r40
+	st.q	SP, FRAME_R(41), r41
+	st.q	SP, FRAME_R(42), r42
+	st.q	SP, FRAME_R(43), r43
+	st.q	SP, FRAME_R(44), r44
+	st.q	SP, FRAME_R(45), r45
+	st.q	SP, FRAME_R(46), r46
+	st.q	SP, FRAME_R(47), r47
+	st.q	SP, FRAME_R(48), r48
+	st.q	SP, FRAME_R(49), r49
+	st.q	SP, FRAME_R(50), r50
+	st.q	SP, FRAME_R(51), r51
+	st.q	SP, FRAME_R(52), r52
+	st.q	SP, FRAME_R(53), r53
+	st.q	SP, FRAME_R(54), r54
+	st.q	SP, FRAME_R(55), r55
+	st.q	SP, FRAME_R(56), r56
+	st.q	SP, FRAME_R(57), r57
+	st.q	SP, FRAME_R(58), r58
+	st.q	SP, FRAME_R(59), r59
+	st.q	SP, FRAME_R(60), r60
+	st.q	SP, FRAME_R(61), r61
+	st.q	SP, FRAME_R(62), r62
+
+	/*
+	 * Save the S* registers.
+	 */
+	getcon	SSR, r61
+	st.q	SP, FRAME_S(FSSR), r61
+	getcon	SPC, r62
+	st.q	SP, FRAME_S(FSPC), r62
+	movi	-1, r62			/* Reset syscall_nr */
+	st.q	SP, FRAME_S(FSYSCALL_ID), r62
+
+	/* Save the rest of the target registers */
+	gettr	tr1, r6
+	st.q	SP, FRAME_T(1), r6
+	gettr	tr2, r6
+	st.q	SP, FRAME_T(2), r6
+	gettr	tr3, r6
+	st.q	SP, FRAME_T(3), r6
+	gettr	tr4, r6
+	st.q	SP, FRAME_T(4), r6
+	gettr	tr5, r6
+	st.q	SP, FRAME_T(5), r6
+	gettr	tr6, r6
+	st.q	SP, FRAME_T(6), r6
+	gettr	tr7, r6
+	st.q	SP, FRAME_T(7), r6
+
+	! setup FP so that unwinder can wind back through nested kernel mode
+	! exceptions
+	add	SP, ZERO, r14
+
+#ifdef CONFIG_POOR_MANS_STRACE
+	/* We've pushed all the registers now, so only r2-r4 hold anything
+	 * useful. Move them into callee save registers */
+	or	r2, ZERO, r28
+	or	r3, ZERO, r29
+	or	r4, ZERO, r30
+
+	/* Preserve r2 as the event code */
+	movi	evt_debug, r3
+	ori	r3, 1, r3
+	ptabs	r3, tr0
+
+	or	SP, ZERO, r6
+	getcon	TRA, r5
+	blink	tr0, LINK
+
+	or	r28, ZERO, r2
+	or	r29, ZERO, r3
+	or	r30, ZERO, r4
+#endif
+
+	/* For syscall and debug race condition, get TRA now */
+	getcon	TRA, r5
+
+	/* We are in a safe position to turn SR.BL off, but set IMASK=0xf
+	 * Also set FD, to catch FPU usage in the kernel.
+	 *
+	 * benedict.gaster@superh.com 29/07/2002
+	 *
+	 * On all SH5-101 revisions it is unsafe to raise the IMASK and at the
+	 * same time change BL from 1->0, as any pending interrupt of a level
+	 * higher than he previous value of IMASK will leak through and be
+	 * taken unexpectedly.
+	 *
+	 * To avoid this we raise the IMASK and then issue another PUTCON to
+	 * enable interrupts.
+         */
+	getcon	SR, r6
+	movi	SR_IMASK | SR_FD, r7
+	or	r6, r7, r6
+	putcon	r6, SR
+	movi	SR_UNBLOCK_EXC, r7
+	and	r6, r7, r6
+	putcon	r6, SR
+
+
+	/* Now call the appropriate 3rd level handler */
+	or	r3, ZERO, LINK
+	movi	trap_jtable, r3
+	shlri	r2, 3, r2
+	ldx.l	r2, r3, r3
+	shlri	r2, 2, r2
+	ptabs	r3, tr0
+	or	SP, ZERO, r3
+	blink	tr0, ZERO
+
+/*
+ * Second level handler for VBR-based exceptions. Post-handlers.
+ *
+ * Post-handlers for interrupts (ret_from_irq), exceptions
+ * (ret_from_exception) and common reentrance doors (restore_all
+ * to get back to the original context, ret_from_syscall loop to
+ * check kernel exiting).
+ *
+ * ret_with_reschedule and work_notifysig are an inner lables of
+ * the ret_from_syscall loop.
+ *
+ * In common to all stack-frame sensitive handlers.
+ *
+ * Inputs:
+ * (SP)   struct pt_regs *, original register's frame pointer (basic)
+ *
+ */
+	.global ret_from_irq
+ret_from_irq:
+#ifdef CONFIG_POOR_MANS_STRACE
+	pta	evt_debug_ret_from_irq, tr0
+	ori	SP, 0, r2
+	blink	tr0, LINK
+#endif
+	ld.q	SP, FRAME_S(FSSR), r6
+	shlri	r6, 30, r6
+	andi	r6, 1, r6
+	pta	resume_kernel, tr0
+	bne	r6, ZERO, tr0		/* no further checks */
+	STI()
+	pta	ret_with_reschedule, tr0
+	blink	tr0, ZERO		/* Do not check softirqs */
+
+	.global ret_from_exception
+ret_from_exception:
+	preempt_stop()
+
+#ifdef CONFIG_POOR_MANS_STRACE
+	pta	evt_debug_ret_from_exc, tr0
+	ori	SP, 0, r2
+	blink	tr0, LINK
+#endif
+
+	ld.q	SP, FRAME_S(FSSR), r6
+	shlri	r6, 30, r6
+	andi	r6, 1, r6
+	pta	resume_kernel, tr0
+	bne	r6, ZERO, tr0		/* no further checks */
+
+	/* Check softirqs */
+
+#ifdef CONFIG_PREEMPT
+	pta   ret_from_syscall, tr0
+	blink   tr0, ZERO
+
+resume_kernel:
+	pta	restore_all, tr0
+
+	getcon	KCR0, r6
+	ld.l	r6, TI_PRE_COUNT, r7
+	beq/u	r7, ZERO, tr0
+
+need_resched:
+	ld.l	r6, TI_FLAGS, r7
+	movi	(1 << TIF_NEED_RESCHED), r8
+	and	r8, r7, r8
+	bne	r8, ZERO, tr0
+
+	getcon	SR, r7
+	andi	r7, 0xf0, r7
+	bne	r7, ZERO, tr0
+
+	movi	((PREEMPT_ACTIVE >> 16) & 65535), r8
+	shori	(PREEMPT_ACTIVE & 65535), r8
+	st.l	r6, TI_PRE_COUNT, r8
+
+	STI()
+	movi	schedule, r7
+	ori	r7, 1, r7
+	ptabs	r7, tr1
+	blink	tr1, LINK
+
+	st.l	r6, TI_PRE_COUNT, ZERO
+	CLI()
+
+	pta	need_resched, tr1
+	blink	tr1, ZERO
+#endif
+
+	.global ret_from_syscall
+ret_from_syscall:
+
+ret_with_reschedule:
+	getcon	KCR0, r6		! r6 contains current_thread_info
+	ld.l	r6, TI_FLAGS, r7	! r7 contains current_thread_info->flags
+
+	! FIXME:!!!
+	! no handling of TIF_SYSCALL_TRACE yet!!
+
+	movi	(1 << TIF_NEED_RESCHED), r8
+	and	r8, r7, r8
+	pta	work_resched, tr0
+	bne	r8, ZERO, tr0
+
+	pta	restore_all, tr1
+
+	movi	(1 << TIF_SIGPENDING), r8
+	and	r8, r7, r8
+	pta	work_notifysig, tr0
+	bne	r8, ZERO, tr0
+
+	blink	tr1, ZERO
+
+work_resched:
+	pta	ret_from_syscall, tr0
+	gettr	tr0, LINK
+	movi	schedule, r6
+	ptabs	r6, tr0
+	blink	tr0, ZERO		/* Call schedule(), return on top */
+
+work_notifysig:
+	gettr	tr1, LINK
+
+	movi	do_signal, r6
+	ptabs	r6, tr0
+	or	SP, ZERO, r2
+	or	ZERO, ZERO, r3
+	blink	tr0, LINK	    /* Call do_signal(regs, 0), return here */
+
+restore_all:
+	/* Do prefetches */
+
+	ld.q	SP, FRAME_T(0), r6
+	ld.q	SP, FRAME_T(1), r7
+	ld.q	SP, FRAME_T(2), r8
+	ld.q	SP, FRAME_T(3), r9
+	ptabs	r6, tr0
+	ptabs	r7, tr1
+	ptabs	r8, tr2
+	ptabs	r9, tr3
+	ld.q	SP, FRAME_T(4), r6
+	ld.q	SP, FRAME_T(5), r7
+	ld.q	SP, FRAME_T(6), r8
+	ld.q	SP, FRAME_T(7), r9
+	ptabs	r6, tr4
+	ptabs	r7, tr5
+	ptabs	r8, tr6
+	ptabs	r9, tr7
+
+	ld.q	SP, FRAME_R(0), r0
+	ld.q	SP, FRAME_R(1), r1
+	ld.q	SP, FRAME_R(2), r2
+	ld.q	SP, FRAME_R(3), r3
+	ld.q	SP, FRAME_R(4), r4
+	ld.q	SP, FRAME_R(5), r5
+	ld.q	SP, FRAME_R(6), r6
+	ld.q	SP, FRAME_R(7), r7
+	ld.q	SP, FRAME_R(8), r8
+	ld.q	SP, FRAME_R(9), r9
+	ld.q	SP, FRAME_R(10), r10
+	ld.q	SP, FRAME_R(11), r11
+	ld.q	SP, FRAME_R(12), r12
+	ld.q	SP, FRAME_R(13), r13
+	ld.q	SP, FRAME_R(14), r14
+
+	ld.q	SP, FRAME_R(16), r16
+	ld.q	SP, FRAME_R(17), r17
+	ld.q	SP, FRAME_R(18), r18
+	ld.q	SP, FRAME_R(19), r19
+	ld.q	SP, FRAME_R(20), r20
+	ld.q	SP, FRAME_R(21), r21
+	ld.q	SP, FRAME_R(22), r22
+	ld.q	SP, FRAME_R(23), r23
+	ld.q	SP, FRAME_R(24), r24
+	ld.q	SP, FRAME_R(25), r25
+	ld.q	SP, FRAME_R(26), r26
+	ld.q	SP, FRAME_R(27), r27
+	ld.q	SP, FRAME_R(28), r28
+	ld.q	SP, FRAME_R(29), r29
+	ld.q	SP, FRAME_R(30), r30
+	ld.q	SP, FRAME_R(31), r31
+	ld.q	SP, FRAME_R(32), r32
+	ld.q	SP, FRAME_R(33), r33
+	ld.q	SP, FRAME_R(34), r34
+	ld.q	SP, FRAME_R(35), r35
+	ld.q	SP, FRAME_R(36), r36
+	ld.q	SP, FRAME_R(37), r37
+	ld.q	SP, FRAME_R(38), r38
+	ld.q	SP, FRAME_R(39), r39
+	ld.q	SP, FRAME_R(40), r40
+	ld.q	SP, FRAME_R(41), r41
+	ld.q	SP, FRAME_R(42), r42
+	ld.q	SP, FRAME_R(43), r43
+	ld.q	SP, FRAME_R(44), r44
+	ld.q	SP, FRAME_R(45), r45
+	ld.q	SP, FRAME_R(46), r46
+	ld.q	SP, FRAME_R(47), r47
+	ld.q	SP, FRAME_R(48), r48
+	ld.q	SP, FRAME_R(49), r49
+	ld.q	SP, FRAME_R(50), r50
+	ld.q	SP, FRAME_R(51), r51
+	ld.q	SP, FRAME_R(52), r52
+	ld.q	SP, FRAME_R(53), r53
+	ld.q	SP, FRAME_R(54), r54
+	ld.q	SP, FRAME_R(55), r55
+	ld.q	SP, FRAME_R(56), r56
+	ld.q	SP, FRAME_R(57), r57
+	ld.q	SP, FRAME_R(58), r58
+
+	getcon	SR, r59
+	movi	SR_BLOCK_EXC, r60
+	or	r59, r60, r59
+	putcon	r59, SR			/* SR.BL = 1, keep nesting out */
+	ld.q	SP, FRAME_S(FSSR), r61
+	ld.q	SP, FRAME_S(FSPC), r62
+	movi	SR_ASID_MASK, r60
+	and	r59, r60, r59
+	andc	r61, r60, r61		/* Clear out older ASID */
+	or	r59, r61, r61		/* Retain current ASID */
+	putcon	r61, SSR
+	putcon	r62, SPC
+
+	/* Ignore FSYSCALL_ID */
+
+	ld.q	SP, FRAME_R(59), r59
+	ld.q	SP, FRAME_R(60), r60
+	ld.q	SP, FRAME_R(61), r61
+	ld.q	SP, FRAME_R(62), r62
+
+	/* Last touch */
+	ld.q	SP, FRAME_R(15), SP
+	rte
+	nop
+
+/*
+ * Third level handlers for VBR-based exceptions. Adapting args to
+ * and/or deflecting to fourth level handlers.
+ *
+ * Fourth level handlers interface.
+ * Most are C-coded handlers directly pointed by the trap_jtable.
+ * (Third = Fourth level)
+ * Inputs:
+ * (r2)   fault/interrupt code, entry number (e.g. NMI = 14,
+ *	  IRL0-3 (0000) = 16, RTLBMISS = 2, SYSCALL = 11, etc ...)
+ * (r3)   struct pt_regs *, original register's frame pointer
+ * (r4)   Event (0 = interrupt, 1 = TLB miss fault, 2 = Not TLB miss fault)
+ * (r5)   TRA control register (for syscall/debug benefit only)
+ * (LINK) return address
+ * (SP)   = r3
+ *
+ * Kernel TLB fault handlers will get a slightly different interface.
+ * (r2)   struct pt_regs *, original register's frame pointer
+ * (r3)   writeaccess, whether it's a store fault as opposed to load fault
+ * (r4)   execaccess, whether it's a ITLB fault as opposed to DTLB fault
+ * (r5)   Effective Address of fault
+ * (LINK) return address
+ * (SP)   = r2
+ *
+ * fpu_error_or_IRQ? is a helper to deflect to the right cause.
+ *
+ */
+tlb_miss_load:
+	or	SP, ZERO, r2
+	or	ZERO, ZERO, r3		/* Read */
+	or	ZERO, ZERO, r4		/* Data */
+	getcon	TEA, r5
+	pta	call_do_page_fault, tr0
+	beq	ZERO, ZERO, tr0
+
+tlb_miss_store:
+	or	SP, ZERO, r2
+	movi	1, r3			/* Write */
+	or	ZERO, ZERO, r4		/* Data */
+	getcon	TEA, r5
+	pta	call_do_page_fault, tr0
+	beq	ZERO, ZERO, tr0
+
+itlb_miss_or_IRQ:
+	pta	its_IRQ, tr0
+	beqi/u	r4, EVENT_INTERRUPT, tr0
+	or	SP, ZERO, r2
+	or	ZERO, ZERO, r3		/* Read */
+	movi	1, r4			/* Text */
+	getcon	TEA, r5
+	/* Fall through */
+
+call_do_page_fault:
+	movi	do_page_fault, r6
+        ptabs	r6, tr0
+        blink	tr0, ZERO
+
+fpu_error_or_IRQA:
+	pta	its_IRQ, tr0
+	beqi/l	r4, EVENT_INTERRUPT, tr0
+#ifdef CONFIG_SH_FPU
+	movi	do_fpu_state_restore, r6
+#else
+	movi	do_exception_error, r6
+#endif
+	ptabs	r6, tr0
+	blink	tr0, ZERO
+
+fpu_error_or_IRQB:
+	pta	its_IRQ, tr0
+	beqi/l	r4, EVENT_INTERRUPT, tr0
+#ifdef CONFIG_SH_FPU
+	movi	do_fpu_state_restore, r6
+#else
+	movi	do_exception_error, r6
+#endif
+	ptabs	r6, tr0
+	blink	tr0, ZERO
+
+its_IRQ:
+	movi	do_IRQ, r6
+	ptabs	r6, tr0
+	blink	tr0, ZERO
+
+/*
+ * system_call/unknown_trap third level handler:
+ *
+ * Inputs:
+ * (r2)   fault/interrupt code, entry number (TRAP = 11)
+ * (r3)   struct pt_regs *, original register's frame pointer
+ * (r4)   Not used. Event (0=interrupt, 1=TLB miss fault, 2=Not TLB miss fault)
+ * (r5)   TRA Control Reg (0x00xyzzzz: x=1 SYSCALL, y = #args, z=nr)
+ * (SP)   = r3
+ * (LINK) return address: ret_from_exception
+ * (*r3)  Syscall parms: SC#, arg0, arg1, ..., arg5 in order (Saved r2/r7)
+ *
+ * Outputs:
+ * (*r3)  Syscall reply (Saved r2)
+ * (LINK) In case of syscall only it can be scrapped.
+ *        Common second level post handler will be ret_from_syscall.
+ *        Common (non-trace) exit point to that is syscall_ret (saving
+ *        result to r2). Common bad exit point is syscall_bad (returning
+ *        ENOSYS then saved to r2).
+ *
+ */
+
+unknown_trap:
+	/* Unknown Trap or User Trace */
+	movi	do_unknown_trapa, r6
+	ptabs	r6, tr0
+        ld.q    r3, FRAME_R(9), r2	/* r2 = #arg << 16 | syscall # */
+        andi    r2, 0x1ff, r2		/* r2 = syscall # */
+	blink	tr0, LINK
+
+	pta	syscall_ret, tr0
+	blink	tr0, ZERO
+
+        /* New syscall implementation*/
+system_call:
+	pta	unknown_trap, tr0
+        or      r5, ZERO, r4            /* TRA (=r5) -> r4 */
+        shlri   r4, 20, r4
+	bnei	r4, 1, tr0		/* unknown_trap if not 0x1yzzzz */
+
+        /* It's a system call */
+	st.q    r3, FRAME_S(FSYSCALL_ID), r5 	/* ID (0x1yzzzz) -> stack */
+	andi    r5, 0x1ff, r5			/* syscall # -> r5	  */
+
+	STI()
+
+	pta	syscall_allowed, tr0
+	movi	NR_syscalls - 1, r4	/* Last valid */
+	bgeu/l	r4, r5, tr0
+
+syscall_bad:
+	/* Return ENOSYS ! */
+	movi	-(ENOSYS), r2		/* Fall-through */
+
+	.global syscall_ret
+syscall_ret:
+	st.q	SP, FRAME_R(9), r2	/* Expecting SP back to BASIC frame */
+
+#ifdef CONFIG_POOR_MANS_STRACE
+	/* nothing useful in registers at this point */
+
+	movi	evt_debug2, r5
+	ori	r5, 1, r5
+	ptabs	r5, tr0
+	ld.q	SP, FRAME_R(9), r2
+	or	SP, ZERO, r3
+	blink	tr0, LINK
+#endif
+
+	ld.q	SP, FRAME_S(FSPC), r2
+	addi	r2, 4, r2		/* Move PC, being pre-execution event */
+	st.q	SP, FRAME_S(FSPC), r2
+	pta	ret_from_syscall, tr0
+	blink	tr0, ZERO
+
+
+/*  A different return path for ret_from_fork, because we now need
+ *  to call schedule_tail with the later kernels. Because prev is
+ *  loaded into r2 by switch_to() means we can just call it straight  away
+ */
+
+.global	ret_from_fork
+ret_from_fork:
+
+	movi	schedule_tail,r5
+	ori	r5, 1, r5
+	ptabs	r5, tr0
+	blink	tr0, LINK
+
+#ifdef CONFIG_POOR_MANS_STRACE
+	/* nothing useful in registers at this point */
+
+	movi	evt_debug2, r5
+	ori	r5, 1, r5
+	ptabs	r5, tr0
+	ld.q	SP, FRAME_R(9), r2
+	or	SP, ZERO, r3
+	blink	tr0, LINK
+#endif
+
+	ld.q	SP, FRAME_S(FSPC), r2
+	addi	r2, 4, r2		/* Move PC, being pre-execution event */
+	st.q	SP, FRAME_S(FSPC), r2
+	pta	ret_from_syscall, tr0
+	blink	tr0, ZERO
+
+
+
+syscall_allowed:
+	/* Use LINK to deflect the exit point, default is syscall_ret */
+	pta	syscall_ret, tr0
+	gettr	tr0, LINK
+	pta	syscall_notrace, tr0
+
+	getcon	KCR0, r2
+	ld.l	r2, TI_FLAGS, r4
+	movi	(1 << TIF_SYSCALL_TRACE), r6
+	and	r6, r4, r6
+	beq/l	r6, ZERO, tr0
+
+	/* Trace it by calling syscall_trace before and after */
+	movi	syscall_trace, r4
+	ptabs	r4, tr0
+	blink	tr0, LINK
+	/* Reload syscall number as r5 is trashed by syscall_trace */
+	ld.q	SP, FRAME_S(FSYSCALL_ID), r5
+	andi	r5, 0x1ff, r5
+
+	pta	syscall_ret_trace, tr0
+	gettr	tr0, LINK
+
+syscall_notrace:
+	/* Now point to the appropriate 4th level syscall handler */
+	movi	sys_call_table, r4
+	shlli	r5, 2, r5
+	ldx.l	r4, r5, r5
+	ptabs	r5, tr0
+
+	/* Prepare original args */
+	ld.q	SP, FRAME_R(2), r2
+	ld.q	SP, FRAME_R(3), r3
+	ld.q	SP, FRAME_R(4), r4
+	ld.q	SP, FRAME_R(5), r5
+	ld.q	SP, FRAME_R(6), r6
+	ld.q	SP, FRAME_R(7), r7
+
+	/* And now the trick for those syscalls requiring regs * ! */
+	or	SP, ZERO, r8
+
+	/* Call it */
+	blink	tr0, ZERO	/* LINK is already properly set */
+
+syscall_ret_trace:
+	/* We get back here only if under trace */
+	st.q	SP, FRAME_R(9), r2	/* Save return value */
+
+	movi	syscall_trace, LINK
+	ptabs	LINK, tr0
+	blink	tr0, LINK
+
+	/* This needs to be done after any syscall tracing */
+	ld.q	SP, FRAME_S(FSPC), r2
+	addi	r2, 4, r2	/* Move PC, being pre-execution event */
+	st.q	SP, FRAME_S(FSPC), r2
+
+	pta	ret_from_syscall, tr0
+	blink	tr0, ZERO		/* Resume normal return sequence */
+
+/*
+ * --- Switch to running under a particular ASID and return the previous ASID value
+ * --- The caller is assumed to have done a cli before calling this.
+ *
+ * Input r2 : new ASID
+ * Output r2 : old ASID
+ */
+
+	.global switch_and_save_asid
+switch_and_save_asid:
+	getcon	sr, r0
+	movi	255, r4
+	shlli 	r4, 16, r4	/* r4 = mask to select ASID */
+	and	r0, r4, r3	/* r3 = shifted old ASID */
+	andi	r2, 255, r2	/* mask down new ASID */
+	shlli	r2, 16, r2	/* align new ASID against SR.ASID */
+	andc	r0, r4, r0	/* efface old ASID from SR */
+	or	r0, r2, r0	/* insert the new ASID */
+	putcon	r0, ssr
+	movi	1f, r0
+	putcon	r0, spc
+	rte
+	nop
+1:
+	ptabs	LINK, tr0
+	shlri	r3, 16, r2	/* r2 = old ASID */
+	blink tr0, r63
+
+	.global	route_to_panic_handler
+route_to_panic_handler:
+	/* Switch to real mode, goto panic_handler, don't return.  Useful for
+	   last-chance debugging, e.g. if no output wants to go to the console.
+	   */
+
+	movi	panic_handler - CONFIG_CACHED_MEMORY_OFFSET, r1
+	ptabs	r1, tr0
+	pta	1f, tr1
+	gettr	tr1, r0
+	putcon	r0, spc
+	getcon	sr, r0
+	movi	1, r1
+	shlli	r1, 31, r1
+	andc	r0, r1, r0
+	putcon	r0, ssr
+	rte
+	nop
+1:	/* Now in real mode */
+	blink tr0, r63
+	nop
+
+	.global peek_real_address_q
+peek_real_address_q:
+	/* Two args:
+	   r2 : real mode address to peek
+	   r2(out) : result quadword
+
+	   This is provided as a cheapskate way of manipulating device
+	   registers for debugging (to avoid the need to onchip_remap the debug
+	   module, and to avoid the need to onchip_remap the watchpoint
+	   controller in a way that identity maps sufficient bits to avoid the
+	   SH5-101 cut2 silicon defect).
+
+	   This code is not performance critical
+	*/
+
+	add.l	r2, r63, r2	/* sign extend address */
+	getcon	sr, r0		/* r0 = saved original SR */
+	movi	1, r1
+	shlli	r1, 28, r1
+	or	r0, r1, r1	/* r0 with block bit set */
+	putcon	r1, sr		/* now in critical section */
+	movi	1, r36
+	shlli	r36, 31, r36
+	andc	r1, r36, r1	/* turn sr.mmu off in real mode section */
+
+	putcon	r1, ssr
+	movi	.peek0 - CONFIG_CACHED_MEMORY_OFFSET, r36 /* real mode target address */
+	movi	1f, r37		/* virtual mode return addr */
+	putcon	r36, spc
+
+	synco
+	rte
+	nop
+
+.peek0:	/* come here in real mode, don't touch caches!!
+           still in critical section (sr.bl==1) */
+	putcon	r0, ssr
+	putcon	r37, spc
+	/* Here's the actual peek.  If the address is bad, all bets are now off
+	 * what will happen (handlers invoked in real-mode = bad news) */
+	ld.q	r2, 0, r2
+	synco
+	rte	/* Back to virtual mode */
+	nop
+
+1:
+	ptabs	LINK, tr0
+	blink	tr0, r63
+
+	.global poke_real_address_q
+poke_real_address_q:
+	/* Two args:
+	   r2 : real mode address to poke
+	   r3 : quadword value to write.
+
+	   This is provided as a cheapskate way of manipulating device
+	   registers for debugging (to avoid the need to onchip_remap the debug
+	   module, and to avoid the need to onchip_remap the watchpoint
+	   controller in a way that identity maps sufficient bits to avoid the
+	   SH5-101 cut2 silicon defect).
+
+	   This code is not performance critical
+	*/
+
+	add.l	r2, r63, r2	/* sign extend address */
+	getcon	sr, r0		/* r0 = saved original SR */
+	movi	1, r1
+	shlli	r1, 28, r1
+	or	r0, r1, r1	/* r0 with block bit set */
+	putcon	r1, sr		/* now in critical section */
+	movi	1, r36
+	shlli	r36, 31, r36
+	andc	r1, r36, r1	/* turn sr.mmu off in real mode section */
+
+	putcon	r1, ssr
+	movi	.poke0-CONFIG_CACHED_MEMORY_OFFSET, r36 /* real mode target address */
+	movi	1f, r37		/* virtual mode return addr */
+	putcon	r36, spc
+
+	synco
+	rte
+	nop
+
+.poke0:	/* come here in real mode, don't touch caches!!
+           still in critical section (sr.bl==1) */
+	putcon	r0, ssr
+	putcon	r37, spc
+	/* Here's the actual poke.  If the address is bad, all bets are now off
+	 * what will happen (handlers invoked in real-mode = bad news) */
+	st.q	r2, 0, r3
+	synco
+	rte	/* Back to virtual mode */
+	nop
+
+1:
+	ptabs	LINK, tr0
+	blink	tr0, r63
+
+/*
+ * --- User Access Handling Section
+ */
+
+/*
+ * User Access support. It all moved to non inlined Assembler
+ * functions in here.
+ *
+ * __kernel_size_t __copy_user(void *__to, const void *__from,
+ *			       __kernel_size_t __n)
+ *
+ * Inputs:
+ * (r2)  target address
+ * (r3)  source address
+ * (r4)  size in bytes
+ *
+ * Ouputs:
+ * (*r2) target data
+ * (r2)  non-copied bytes
+ *
+ * If a fault occurs on the user pointer, bail out early and return the
+ * number of bytes not copied in r2.
+ * Strategy : for large blocks, call a real memcpy function which can
+ * move >1 byte at a time using unaligned ld/st instructions, and can
+ * manipulate the cache using prefetch + alloco to improve the speed
+ * further.  If a fault occurs in that function, just revert to the
+ * byte-by-byte approach used for small blocks; this is rare so the
+ * performance hit for that case does not matter.
+ *
+ * For small blocks it's not worth the overhead of setting up and calling
+ * the memcpy routine; do the copy a byte at a time.
+ *
+ */
+	.global	__copy_user
+__copy_user:
+	pta	__copy_user_byte_by_byte, tr1
+	movi	16, r0 ! this value is a best guess, should tune it by benchmarking
+	bge/u	r0, r4, tr1
+	pta copy_user_memcpy, tr0
+	addi	SP, -32, SP
+	/* Save arguments in case we have to fix-up unhandled page fault */
+	st.q	SP, 0, r2
+	st.q	SP, 8, r3
+	st.q	SP, 16, r4
+	st.q	SP, 24, r35 ! r35 is callee-save
+	/* Save LINK in a register to reduce RTS time later (otherwise
+	   ld SP,*,LINK;ptabs LINK;trn;blink trn,r63 becomes a critical path) */
+	ori	LINK, 0, r35
+	blink	tr0, LINK
+
+	/* Copy completed normally if we get back here */
+	ptabs	r35, tr0
+	ld.q	SP, 24, r35
+	/* don't restore r2-r4, pointless */
+	/* set result=r2 to zero as the copy must have succeeded. */
+	or	r63, r63, r2
+	addi	SP, 32, SP
+	blink	tr0, r63 ! RTS
+
+	.global __copy_user_fixup
+__copy_user_fixup:
+	/* Restore stack frame */
+	ori	r35, 0, LINK
+	ld.q	SP, 24, r35
+	ld.q	SP, 16, r4
+	ld.q	SP,  8, r3
+	ld.q	SP,  0, r2
+	addi	SP, 32, SP
+	/* Fall through to original code, in the 'same' state we entered with */
+
+/* The slow byte-by-byte method is used if the fast copy traps due to a bad
+   user address.  In that rare case, the speed drop can be tolerated. */
+__copy_user_byte_by_byte:
+	pta	___copy_user_exit, tr1
+	pta	___copy_user1, tr0
+	beq/u	r4, r63, tr1	/* early exit for zero length copy */
+	sub	r2, r3, r0
+	addi	r0, -1, r0
+
+___copy_user1:
+	ld.b	r3, 0, r5		/* Fault address 1 */
+
+	/* Could rewrite this to use just 1 add, but the second comes 'free'
+	   due to load latency */
+	addi	r3, 1, r3
+	addi	r4, -1, r4		/* No real fixup required */
+___copy_user2:
+	stx.b	r3, r0, r5		/* Fault address 2 */
+	bne     r4, ZERO, tr0
+
+___copy_user_exit:
+	or	r4, ZERO, r2
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+/*
+ * __kernel_size_t __clear_user(void *addr, __kernel_size_t size)
+ *
+ * Inputs:
+ * (r2)  target address
+ * (r3)  size in bytes
+ *
+ * Ouputs:
+ * (*r2) zero-ed target data
+ * (r2)  non-zero-ed bytes
+ */
+	.global	__clear_user
+__clear_user:
+	pta	___clear_user_exit, tr1
+	pta	___clear_user1, tr0
+	beq/u	r3, r63, tr1
+
+___clear_user1:
+	st.b	r2, 0, ZERO		/* Fault address */
+	addi	r2, 1, r2
+	addi	r3, -1, r3		/* No real fixup required */
+	bne     r3, ZERO, tr0
+
+___clear_user_exit:
+	or	r3, ZERO, r2
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+
+/*
+ * int __strncpy_from_user(unsigned long __dest, unsigned long __src,
+ *			   int __count)
+ *
+ * Inputs:
+ * (r2)  target address
+ * (r3)  source address
+ * (r4)  maximum size in bytes
+ *
+ * Ouputs:
+ * (*r2) copied data
+ * (r2)  -EFAULT (in case of faulting)
+ *       copied data (otherwise)
+ */
+	.global	__strncpy_from_user
+__strncpy_from_user:
+	pta	___strncpy_from_user1, tr0
+	pta	___strncpy_from_user_done, tr1
+	or	r4, ZERO, r5		/* r5 = original count */
+	beq/u	r4, r63, tr1		/* early exit if r4==0 */
+	movi	-(EFAULT), r6		/* r6 = reply, no real fixup */
+	or	ZERO, ZERO, r7		/* r7 = data, clear top byte of data */
+
+___strncpy_from_user1:
+	ld.b	r3, 0, r7		/* Fault address: only in reading */
+	st.b	r2, 0, r7
+	addi	r2, 1, r2
+	addi	r3, 1, r3
+	beq/u	ZERO, r7, tr1
+	addi	r4, -1, r4		/* return real number of copied bytes */
+	bne/l	ZERO, r4, tr0
+
+___strncpy_from_user_done:
+	sub	r5, r4, r6		/* If done, return copied */
+
+___strncpy_from_user_exit:
+	or	r6, ZERO, r2
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+/*
+ * extern long __strnlen_user(const char *__s, long __n)
+ *
+ * Inputs:
+ * (r2)  source address
+ * (r3)  source size in bytes
+ *
+ * Ouputs:
+ * (r2)  -EFAULT (in case of faulting)
+ *       string length (otherwise)
+ */
+	.global	__strnlen_user
+__strnlen_user:
+	pta	___strnlen_user_set_reply, tr0
+	pta	___strnlen_user1, tr1
+	or	ZERO, ZERO, r5		/* r5 = counter */
+	movi	-(EFAULT), r6		/* r6 = reply, no real fixup */
+	or	ZERO, ZERO, r7		/* r7 = data, clear top byte of data */
+	beq	r3, ZERO, tr0
+
+___strnlen_user1:
+	ldx.b	r2, r5, r7		/* Fault address: only in reading */
+	addi	r3, -1, r3		/* No real fixup */
+	addi	r5, 1, r5
+	beq	r3, ZERO, tr0
+	bne	r7, ZERO, tr1
+! The line below used to be active.  This meant led to a junk byte lying between each pair
+! of entries in the argv & envp structures in memory.  Whilst the program saw the right data
+! via the argv and envp arguments to main, it meant the 'flat' representation visible through
+! /proc/$pid/cmdline was corrupt, causing trouble with ps, for example.
+!	addi	r5, 1, r5		/* Include '\0' */
+
+___strnlen_user_set_reply:
+	or	r5, ZERO, r6		/* If done, return counter */
+
+___strnlen_user_exit:
+	or	r6, ZERO, r2
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+/*
+ * extern long __get_user_asm_?(void *val, long addr)
+ *
+ * Inputs:
+ * (r2)  dest address
+ * (r3)  source address (in User Space)
+ *
+ * Ouputs:
+ * (r2)  -EFAULT (faulting)
+ *       0 	 (not faulting)
+ */
+	.global	__get_user_asm_b
+__get_user_asm_b:
+	or	r2, ZERO, r4
+	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
+
+___get_user_asm_b1:
+	ld.b	r3, 0, r5		/* r5 = data */
+	st.b	r4, 0, r5
+	or	ZERO, ZERO, r2
+
+___get_user_asm_b_exit:
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+
+	.global	__get_user_asm_w
+__get_user_asm_w:
+	or	r2, ZERO, r4
+	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
+
+___get_user_asm_w1:
+	ld.w	r3, 0, r5		/* r5 = data */
+	st.w	r4, 0, r5
+	or	ZERO, ZERO, r2
+
+___get_user_asm_w_exit:
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+
+	.global	__get_user_asm_l
+__get_user_asm_l:
+	or	r2, ZERO, r4
+	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
+
+___get_user_asm_l1:
+	ld.l	r3, 0, r5		/* r5 = data */
+	st.l	r4, 0, r5
+	or	ZERO, ZERO, r2
+
+___get_user_asm_l_exit:
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+
+	.global	__get_user_asm_q
+__get_user_asm_q:
+	or	r2, ZERO, r4
+	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
+
+___get_user_asm_q1:
+	ld.q	r3, 0, r5		/* r5 = data */
+	st.q	r4, 0, r5
+	or	ZERO, ZERO, r2
+
+___get_user_asm_q_exit:
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+/*
+ * extern long __put_user_asm_?(void *pval, long addr)
+ *
+ * Inputs:
+ * (r2)  kernel pointer to value
+ * (r3)  dest address (in User Space)
+ *
+ * Ouputs:
+ * (r2)  -EFAULT (faulting)
+ *       0 	 (not faulting)
+ */
+	.global	__put_user_asm_b
+__put_user_asm_b:
+	ld.b	r2, 0, r4		/* r4 = data */
+	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
+
+___put_user_asm_b1:
+	st.b	r3, 0, r4
+	or	ZERO, ZERO, r2
+
+___put_user_asm_b_exit:
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+
+	.global	__put_user_asm_w
+__put_user_asm_w:
+	ld.w	r2, 0, r4		/* r4 = data */
+	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
+
+___put_user_asm_w1:
+	st.w	r3, 0, r4
+	or	ZERO, ZERO, r2
+
+___put_user_asm_w_exit:
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+
+	.global	__put_user_asm_l
+__put_user_asm_l:
+	ld.l	r2, 0, r4		/* r4 = data */
+	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
+
+___put_user_asm_l1:
+	st.l	r3, 0, r4
+	or	ZERO, ZERO, r2
+
+___put_user_asm_l_exit:
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+
+	.global	__put_user_asm_q
+__put_user_asm_q:
+	ld.q	r2, 0, r4		/* r4 = data */
+	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
+
+___put_user_asm_q1:
+	st.q	r3, 0, r4
+	or	ZERO, ZERO, r2
+
+___put_user_asm_q_exit:
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
+panic_stash_regs:
+	/* The idea is : when we get an unhandled panic, we dump the registers
+	   to a known memory location, the just sit in a tight loop.
+	   This allows the human to look at the memory region through the GDB
+	   session (assuming the debug module's SHwy initiator isn't locked up
+	   or anything), to hopefully analyze the cause of the panic. */
+
+	/* On entry, former r15 (SP) is in DCR
+	   former r0  is at resvec_saved_area + 0
+	   former r1  is at resvec_saved_area + 8
+	   former tr0 is at resvec_saved_area + 32
+	   DCR is the only register whose value is lost altogether.
+	*/
+
+	movi	0xffffffff80000000, r0 ! phy of dump area
+	ld.q	SP, 0x000, r1	! former r0
+	st.q	r0,  0x000, r1
+	ld.q	SP, 0x008, r1	! former r1
+	st.q	r0,  0x008, r1
+	st.q	r0,  0x010, r2
+	st.q	r0,  0x018, r3
+	st.q	r0,  0x020, r4
+	st.q	r0,  0x028, r5
+	st.q	r0,  0x030, r6
+	st.q	r0,  0x038, r7
+	st.q	r0,  0x040, r8
+	st.q	r0,  0x048, r9
+	st.q	r0,  0x050, r10
+	st.q	r0,  0x058, r11
+	st.q	r0,  0x060, r12
+	st.q	r0,  0x068, r13
+	st.q	r0,  0x070, r14
+	getcon	dcr, r14
+	st.q	r0,  0x078, r14
+	st.q	r0,  0x080, r16
+	st.q	r0,  0x088, r17
+	st.q	r0,  0x090, r18
+	st.q	r0,  0x098, r19
+	st.q	r0,  0x0a0, r20
+	st.q	r0,  0x0a8, r21
+	st.q	r0,  0x0b0, r22
+	st.q	r0,  0x0b8, r23
+	st.q	r0,  0x0c0, r24
+	st.q	r0,  0x0c8, r25
+	st.q	r0,  0x0d0, r26
+	st.q	r0,  0x0d8, r27
+	st.q	r0,  0x0e0, r28
+	st.q	r0,  0x0e8, r29
+	st.q	r0,  0x0f0, r30
+	st.q	r0,  0x0f8, r31
+	st.q	r0,  0x100, r32
+	st.q	r0,  0x108, r33
+	st.q	r0,  0x110, r34
+	st.q	r0,  0x118, r35
+	st.q	r0,  0x120, r36
+	st.q	r0,  0x128, r37
+	st.q	r0,  0x130, r38
+	st.q	r0,  0x138, r39
+	st.q	r0,  0x140, r40
+	st.q	r0,  0x148, r41
+	st.q	r0,  0x150, r42
+	st.q	r0,  0x158, r43
+	st.q	r0,  0x160, r44
+	st.q	r0,  0x168, r45
+	st.q	r0,  0x170, r46
+	st.q	r0,  0x178, r47
+	st.q	r0,  0x180, r48
+	st.q	r0,  0x188, r49
+	st.q	r0,  0x190, r50
+	st.q	r0,  0x198, r51
+	st.q	r0,  0x1a0, r52
+	st.q	r0,  0x1a8, r53
+	st.q	r0,  0x1b0, r54
+	st.q	r0,  0x1b8, r55
+	st.q	r0,  0x1c0, r56
+	st.q	r0,  0x1c8, r57
+	st.q	r0,  0x1d0, r58
+	st.q	r0,  0x1d8, r59
+	st.q	r0,  0x1e0, r60
+	st.q	r0,  0x1e8, r61
+	st.q	r0,  0x1f0, r62
+	st.q	r0,  0x1f8, r63	! bogus, but for consistency's sake...
+
+	ld.q	SP, 0x020, r1  ! former tr0
+	st.q	r0,  0x200, r1
+	gettr	tr1, r1
+	st.q	r0,  0x208, r1
+	gettr	tr2, r1
+	st.q	r0,  0x210, r1
+	gettr	tr3, r1
+	st.q	r0,  0x218, r1
+	gettr	tr4, r1
+	st.q	r0,  0x220, r1
+	gettr	tr5, r1
+	st.q	r0,  0x228, r1
+	gettr	tr6, r1
+	st.q	r0,  0x230, r1
+	gettr	tr7, r1
+	st.q	r0,  0x238, r1
+
+	getcon	sr,  r1
+	getcon	ssr,  r2
+	getcon	pssr,  r3
+	getcon	spc,  r4
+	getcon	pspc,  r5
+	getcon	intevt,  r6
+	getcon	expevt,  r7
+	getcon	pexpevt,  r8
+	getcon	tra,  r9
+	getcon	tea,  r10
+	getcon	kcr0, r11
+	getcon	kcr1, r12
+	getcon	vbr,  r13
+	getcon	resvec,  r14
+
+	st.q	r0,  0x240, r1
+	st.q	r0,  0x248, r2
+	st.q	r0,  0x250, r3
+	st.q	r0,  0x258, r4
+	st.q	r0,  0x260, r5
+	st.q	r0,  0x268, r6
+	st.q	r0,  0x270, r7
+	st.q	r0,  0x278, r8
+	st.q	r0,  0x280, r9
+	st.q	r0,  0x288, r10
+	st.q	r0,  0x290, r11
+	st.q	r0,  0x298, r12
+	st.q	r0,  0x2a0, r13
+	st.q	r0,  0x2a8, r14
+
+	getcon	SPC,r2
+	getcon	SSR,r3
+	getcon	EXPEVT,r4
+	/* Prepare to jump to C - physical address */
+	movi	panic_handler-CONFIG_CACHED_MEMORY_OFFSET, r1
+	ori	r1, 1, r1
+	ptabs   r1, tr0
+	getcon	DCR, SP
+	blink	tr0, ZERO
+	nop
+	nop
+	nop
+	nop
+
+
+
+
+/*
+ * --- Signal Handling Section
+ */
+
+/*
+ * extern long long _sa_default_rt_restorer
+ * extern long long _sa_default_restorer
+ *
+ *		 or, better,
+ *
+ * extern void _sa_default_rt_restorer(void)
+ * extern void _sa_default_restorer(void)
+ *
+ * Code prototypes to do a sys_rt_sigreturn() or sys_sysreturn()
+ * from user space. Copied into user space by signal management.
+ * Both must be quad aligned and 2 quad long (4 instructions).
+ *
+ */
+	.balign 8
+	.global sa_default_rt_restorer
+sa_default_rt_restorer:
+	movi	0x10, r9
+	shori	__NR_rt_sigreturn, r9
+	trapa	r9
+	nop
+
+	.balign 8
+	.global sa_default_restorer
+sa_default_restorer:
+	movi	0x10, r9
+	shori	__NR_sigreturn, r9
+	trapa	r9
+	nop
+
+/*
+ * --- __ex_table Section
+ */
+
+/*
+ * User Access Exception Table.
+ */
+	.section	__ex_table,  "a"
+
+	.global asm_uaccess_start	/* Just a marker */
+asm_uaccess_start:
+
+	.long	___copy_user1, ___copy_user_exit
+	.long	___copy_user2, ___copy_user_exit
+	.long	___clear_user1, ___clear_user_exit
+	.long	___strncpy_from_user1, ___strncpy_from_user_exit
+	.long	___strnlen_user1, ___strnlen_user_exit
+	.long	___get_user_asm_b1, ___get_user_asm_b_exit
+	.long	___get_user_asm_w1, ___get_user_asm_w_exit
+	.long	___get_user_asm_l1, ___get_user_asm_l_exit
+	.long	___get_user_asm_q1, ___get_user_asm_q_exit
+	.long	___put_user_asm_b1, ___put_user_asm_b_exit
+	.long	___put_user_asm_w1, ___put_user_asm_w_exit
+	.long	___put_user_asm_l1, ___put_user_asm_l_exit
+	.long	___put_user_asm_q1, ___put_user_asm_q_exit
+
+	.global asm_uaccess_end		/* Just a marker */
+asm_uaccess_end:
+
+
+
+
+/*
+ * --- .text.init Section
+ */
+
+	.section	.text.init, "ax"
+
+/*
+ * void trap_init (void)
+ *
+ */
+	.global	trap_init
+trap_init:
+	addi	SP, -24, SP			/* Room to save r28/r29/r30 */
+	st.q	SP, 0, r28
+	st.q	SP, 8, r29
+	st.q	SP, 16, r30
+
+	/* Set VBR and RESVEC */
+	movi	LVBR_block, r19
+	andi	r19, -4, r19			/* reset MMUOFF + reserved */
+	/* For RESVEC exceptions we force the MMU off, which means we need the
+	   physical address. */
+	movi	LRESVEC_block-CONFIG_CACHED_MEMORY_OFFSET, r20
+	andi	r20, -4, r20			/* reset reserved */
+	ori	r20, 1, r20			/* set MMUOFF */
+	putcon	r19, VBR
+	putcon	r20, RESVEC
+
+	/* Sanity check */
+	movi	LVBR_block_end, r21
+	andi	r21, -4, r21
+	movi	BLOCK_SIZE, r29			/* r29 = expected size */
+	or	r19, ZERO, r30
+	add	r19, r29, r19
+
+	/*
+	 * Ugly, but better loop forever now than crash afterwards.
+	 * We should print a message, but if we touch LVBR or
+	 * LRESVEC blocks we should not be surprised if we get stuck
+	 * in trap_init().
+	 */
+	pta	trap_init_loop, tr1
+	gettr	tr1, r28			/* r28 = trap_init_loop */
+	sub	r21, r30, r30			/* r30 = actual size */
+
+	/*
+	 * VBR/RESVEC handlers overlap by being bigger than
+	 * allowed. Very bad. Just loop forever.
+	 * (r28) panic/loop address
+	 * (r29) expected size
+	 * (r30) actual size
+	 */
+trap_init_loop:
+	bne	r19, r21, tr1
+
+	/* Now that exception vectors are set up reset SR.BL */
+	getcon 	SR, r22
+	movi	SR_UNBLOCK_EXC, r23
+	and	r22, r23, r22
+	putcon	r22, SR
+
+	addi	SP, 24, SP
+	ptabs	LINK, tr0
+	blink	tr0, ZERO
+
diff --git a/arch/sh64/kernel/fpu.c b/arch/sh64/kernel/fpu.c
new file mode 100644
index 0000000..8ad4ed6
--- /dev/null
+++ b/arch/sh64/kernel/fpu.c
@@ -0,0 +1,170 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/fpu.c
+ *
+ * Copyright (C) 2001  Manuela Cirronis, Paolo Alberelli
+ * Copyright (C) 2002  STMicroelectronics Limited
+ *   Author : Stuart Menefy
+ *
+ * Started from SH4 version:
+ *   Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <asm/processor.h>
+#include <asm/user.h>
+#include <asm/io.h>
+
+/*
+ * Initially load the FPU with signalling NANS.  This bit pattern
+ * has the property that no matter whether considered as single or as
+ * double precision, it still represents a signalling NAN.
+ */
+#define sNAN64		0xFFFFFFFFFFFFFFFFULL
+#define sNAN32		0xFFFFFFFFUL
+
+static union sh_fpu_union init_fpuregs = {
+	.hard = {
+	  .fp_regs = { [0 ... 63] = sNAN32 },
+	  .fpscr = FPSCR_INIT
+	}
+};
+
+inline void fpsave(struct sh_fpu_hard_struct *fpregs)
+{
+	asm volatile("fst.p     %0, (0*8), fp0\n\t"
+		     "fst.p     %0, (1*8), fp2\n\t"
+		     "fst.p     %0, (2*8), fp4\n\t"
+		     "fst.p     %0, (3*8), fp6\n\t"
+		     "fst.p     %0, (4*8), fp8\n\t"
+		     "fst.p     %0, (5*8), fp10\n\t"
+		     "fst.p     %0, (6*8), fp12\n\t"
+		     "fst.p     %0, (7*8), fp14\n\t"
+		     "fst.p     %0, (8*8), fp16\n\t"
+		     "fst.p     %0, (9*8), fp18\n\t"
+		     "fst.p     %0, (10*8), fp20\n\t"
+		     "fst.p     %0, (11*8), fp22\n\t"
+		     "fst.p     %0, (12*8), fp24\n\t"
+		     "fst.p     %0, (13*8), fp26\n\t"
+		     "fst.p     %0, (14*8), fp28\n\t"
+		     "fst.p     %0, (15*8), fp30\n\t"
+		     "fst.p     %0, (16*8), fp32\n\t"
+		     "fst.p     %0, (17*8), fp34\n\t"
+		     "fst.p     %0, (18*8), fp36\n\t"
+		     "fst.p     %0, (19*8), fp38\n\t"
+		     "fst.p     %0, (20*8), fp40\n\t"
+		     "fst.p     %0, (21*8), fp42\n\t"
+		     "fst.p     %0, (22*8), fp44\n\t"
+		     "fst.p     %0, (23*8), fp46\n\t"
+		     "fst.p     %0, (24*8), fp48\n\t"
+		     "fst.p     %0, (25*8), fp50\n\t"
+		     "fst.p     %0, (26*8), fp52\n\t"
+		     "fst.p     %0, (27*8), fp54\n\t"
+		     "fst.p     %0, (28*8), fp56\n\t"
+		     "fst.p     %0, (29*8), fp58\n\t"
+		     "fst.p     %0, (30*8), fp60\n\t"
+		     "fst.p     %0, (31*8), fp62\n\t"
+
+		     "fgetscr   fr63\n\t"
+		     "fst.s     %0, (32*8), fr63\n\t"
+		: /* no output */
+		: "r" (fpregs)
+		: "memory");
+}
+
+
+static inline void
+fpload(struct sh_fpu_hard_struct *fpregs)
+{
+	asm volatile("fld.p     %0, (0*8), fp0\n\t"
+		     "fld.p     %0, (1*8), fp2\n\t"
+		     "fld.p     %0, (2*8), fp4\n\t"
+		     "fld.p     %0, (3*8), fp6\n\t"
+		     "fld.p     %0, (4*8), fp8\n\t"
+		     "fld.p     %0, (5*8), fp10\n\t"
+		     "fld.p     %0, (6*8), fp12\n\t"
+		     "fld.p     %0, (7*8), fp14\n\t"
+		     "fld.p     %0, (8*8), fp16\n\t"
+		     "fld.p     %0, (9*8), fp18\n\t"
+		     "fld.p     %0, (10*8), fp20\n\t"
+		     "fld.p     %0, (11*8), fp22\n\t"
+		     "fld.p     %0, (12*8), fp24\n\t"
+		     "fld.p     %0, (13*8), fp26\n\t"
+		     "fld.p     %0, (14*8), fp28\n\t"
+		     "fld.p     %0, (15*8), fp30\n\t"
+		     "fld.p     %0, (16*8), fp32\n\t"
+		     "fld.p     %0, (17*8), fp34\n\t"
+		     "fld.p     %0, (18*8), fp36\n\t"
+		     "fld.p     %0, (19*8), fp38\n\t"
+		     "fld.p     %0, (20*8), fp40\n\t"
+		     "fld.p     %0, (21*8), fp42\n\t"
+		     "fld.p     %0, (22*8), fp44\n\t"
+		     "fld.p     %0, (23*8), fp46\n\t"
+		     "fld.p     %0, (24*8), fp48\n\t"
+		     "fld.p     %0, (25*8), fp50\n\t"
+		     "fld.p     %0, (26*8), fp52\n\t"
+		     "fld.p     %0, (27*8), fp54\n\t"
+		     "fld.p     %0, (28*8), fp56\n\t"
+		     "fld.p     %0, (29*8), fp58\n\t"
+		     "fld.p     %0, (30*8), fp60\n\t"
+
+		     "fld.s     %0, (32*8), fr63\n\t"
+		     "fputscr   fr63\n\t"
+
+		     "fld.p     %0, (31*8), fp62\n\t"
+		: /* no output */
+		: "r" (fpregs) );
+}
+
+void fpinit(struct sh_fpu_hard_struct *fpregs)
+{
+	*fpregs = init_fpuregs.hard;
+}
+
+asmlinkage void
+do_fpu_error(unsigned long ex, struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+
+	regs->pc += 4;
+
+	tsk->thread.trap_no = 11;
+	tsk->thread.error_code = 0;
+	force_sig(SIGFPE, tsk);
+}
+
+
+asmlinkage void
+do_fpu_state_restore(unsigned long ex, struct pt_regs *regs)
+{
+	void die(const char *str, struct pt_regs *regs, long err);
+
+	if (! user_mode(regs))
+		die("FPU used in kernel", regs, ex);
+
+	regs->sr &= ~SR_FD;
+
+	if (last_task_used_math == current)
+		return;
+
+	grab_fpu();
+	if (last_task_used_math != NULL) {
+		/* Other processes fpu state, save away */
+		fpsave(&last_task_used_math->thread.fpu.hard);
+        }
+        last_task_used_math = current;
+        if (used_math()) {
+                fpload(&current->thread.fpu.hard);
+        } else {
+		/* First time FPU user.  */
+		fpload(&init_fpuregs.hard);
+                set_used_math();
+        }
+	release_fpu();
+}
+
diff --git a/arch/sh64/kernel/head.S b/arch/sh64/kernel/head.S
new file mode 100644
index 0000000..cc0b628
--- /dev/null
+++ b/arch/sh64/kernel/head.S
@@ -0,0 +1,373 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/head.S
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003, 2004  Paul Mundt
+ *
+ *
+ * benedict.gaster@superh.com:	 2nd May 2002
+ *    Moved definition of empty_zero_page to its own section allowing
+ *    it to be placed at an absolute address known at load time.
+ *
+ * lethal@linux-sh.org:          9th May 2003
+ *    Kill off GLOBAL_NAME() usage.
+ *
+ * lethal@linux-sh.org:          8th May 2004
+ *    Add early SCIF console DTLB mapping.
+ */
+
+#include <linux/config.h>
+
+#include <asm/page.h>
+#include <asm/mmu_context.h>
+#include <asm/cache.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/registers.h>
+#include <asm/thread_info.h>
+
+/*
+ * MMU defines: TLB boundaries.
+ */
+
+#define MMUIR_FIRST	ITLB_FIXED
+#define MMUIR_END	ITLB_LAST_VAR_UNRESTRICTED+TLB_STEP
+#define MMUIR_STEP	TLB_STEP
+
+#define MMUDR_FIRST	DTLB_FIXED
+#define MMUDR_END	DTLB_LAST_VAR_UNRESTRICTED+TLB_STEP
+#define MMUDR_STEP	TLB_STEP
+
+/* Safety check : CONFIG_CACHED_MEMORY_OFFSET has to be a multiple of 512Mb */
+#if (CONFIG_CACHED_MEMORY_OFFSET & ((1UL<<29)-1))
+#error "CONFIG_CACHED_MEMORY_OFFSET must be a multiple of 512Mb"
+#endif
+
+/*
+ * MMU defines: Fixed TLBs.
+ */
+/* Deal safely with the case where the base of RAM is not 512Mb aligned */
+
+#define ALIGN_512M_MASK (0xffffffffe0000000)
+#define ALIGNED_EFFECTIVE ((CONFIG_CACHED_MEMORY_OFFSET + CONFIG_MEMORY_START) & ALIGN_512M_MASK)
+#define ALIGNED_PHYSICAL (CONFIG_MEMORY_START & ALIGN_512M_MASK)
+
+#define MMUIR_TEXT_H	(0x0000000000000003 | ALIGNED_EFFECTIVE)
+			/* Enabled, Shared, ASID 0, Eff. Add. 0xA0000000 */
+
+#define MMUIR_TEXT_L	(0x000000000000009a | ALIGNED_PHYSICAL)
+			/* 512 Mb, Cacheable, Write-back, execute, Not User, Ph. Add. */
+
+#define MMUDR_CACHED_H	0x0000000000000003 | ALIGNED_EFFECTIVE
+			/* Enabled, Shared, ASID 0, Eff. Add. 0xA0000000 */
+#define MMUDR_CACHED_L	0x000000000000015a | ALIGNED_PHYSICAL
+			/* 512 Mb, Cacheable, Write-back, read/write, Not User, Ph. Add. */
+
+#ifdef CONFIG_ICACHE_DISABLED
+#define	ICCR0_INIT_VAL	ICCR0_OFF			/* ICACHE off */
+#else
+#define	ICCR0_INIT_VAL	ICCR0_ON | ICCR0_ICI		/* ICE + ICI */
+#endif
+#define	ICCR1_INIT_VAL	ICCR1_NOLOCK			/* No locking */
+
+#if defined (CONFIG_DCACHE_DISABLED)
+#define	OCCR0_INIT_VAL	OCCR0_OFF			   /* D-cache: off  */
+#elif defined (CONFIG_DCACHE_WRITE_THROUGH)
+#define	OCCR0_INIT_VAL	OCCR0_ON | OCCR0_OCI | OCCR0_WT	   /* D-cache: on,   */
+							   /* WT, invalidate */
+#elif defined (CONFIG_DCACHE_WRITE_BACK)
+#define	OCCR0_INIT_VAL	OCCR0_ON | OCCR0_OCI | OCCR0_WB	   /* D-cache: on,   */
+							   /* WB, invalidate */
+#else
+#error preprocessor flag CONFIG_DCACHE_... not recognized!
+#endif
+
+#define	OCCR1_INIT_VAL	OCCR1_NOLOCK			   /* No locking     */
+
+	.section	.empty_zero_page, "aw"
+	.global empty_zero_page
+
+empty_zero_page:
+	.long	1		/* MOUNT_ROOT_RDONLY */
+	.long	0		/* RAMDISK_FLAGS */
+	.long	0x0200		/* ORIG_ROOT_DEV */
+	.long	1		/* LOADER_TYPE */
+	.long	0x00800000	/* INITRD_START */
+	.long	0x00800000	/* INITRD_SIZE */
+	.long	0
+
+	.text
+	.balign 4096,0,4096
+
+	.section	.data, "aw"
+	.balign	PAGE_SIZE
+
+	.section	.data, "aw"
+	.balign	PAGE_SIZE
+
+	.global swapper_pg_dir
+swapper_pg_dir:
+	.space PAGE_SIZE, 0
+
+	.global empty_bad_page
+empty_bad_page:
+	.space PAGE_SIZE, 0
+
+	.global empty_bad_pte_table
+empty_bad_pte_table:
+	.space PAGE_SIZE, 0
+
+	.global	fpu_in_use
+fpu_in_use:	.quad	0
+
+
+	.section	.text, "ax"
+	.balign L1_CACHE_BYTES
+/*
+ * Condition at the entry of __stext:
+ * . Reset state:
+ *   . SR.FD    = 1		(FPU disabled)
+ *   . SR.BL    = 1		(Exceptions disabled)
+ *   . SR.MD    = 1		(Privileged Mode)
+ *   . SR.MMU   = 0		(MMU Disabled)
+ *   . SR.CD    = 0		(CTC User Visible)
+ *   . SR.IMASK = Undefined	(Interrupt Mask)
+ *
+ * Operations supposed to be performed by __stext:
+ * . prevent speculative fetch onto device memory while MMU is off
+ * . reflect as much as possible SH5 ABI (r15, r26, r27, r18)
+ * . first, save CPU state and set it to something harmless
+ * . any CPU detection and/or endianness settings (?)
+ * . initialize EMI/LMI (but not TMU/RTC/INTC/SCIF): TBD
+ * . set initial TLB entries for cached and uncached regions
+ *   (no fine granularity paging)
+ * . set initial cache state
+ * . enable MMU and caches
+ * . set CPU to a consistent state
+ *   . registers (including stack pointer and current/KCR0)
+ *   . NOT expecting to set Exception handling nor VBR/RESVEC/DCR
+ *     at this stage. This is all to later Linux initialization steps.
+ *   . initialize FPU
+ * . clear BSS
+ * . jump into start_kernel()
+ * . be prepared to hopeless start_kernel() returns.
+ *
+ */
+	.global _stext
+_stext:
+	/*
+	 * Prevent speculative fetch on device memory due to
+	 * uninitialized target registers.
+	 */
+	ptabs/u	ZERO, tr0
+	ptabs/u	ZERO, tr1
+	ptabs/u	ZERO, tr2
+	ptabs/u	ZERO, tr3
+	ptabs/u	ZERO, tr4
+	ptabs/u	ZERO, tr5
+	ptabs/u	ZERO, tr6
+	ptabs/u	ZERO, tr7
+	synci
+
+	/*
+	 * Read/Set CPU state. After this block:
+	 * r29 = Initial SR
+	 */
+	getcon	SR, r29
+	movi	SR_HARMLESS, r20
+	putcon	r20, SR
+
+	/*
+	 * Initialize EMI/LMI. To Be Done.
+	 */
+
+	/*
+	 * CPU detection and/or endianness settings (?). To Be Done.
+	 * Pure PIC code here, please ! Just save state into r30.
+         * After this block:
+	 * r30 = CPU type/Platform Endianness
+	 */
+
+	/*
+	 * Set initial TLB entries for cached and uncached regions.
+	 * Note: PTA/BLINK is PIC code, PTABS/BLINK isn't !
+	 */
+	/* Clear ITLBs */
+	pta	clear_ITLB, tr1
+	movi	MMUIR_FIRST, r21
+	movi	MMUIR_END, r22
+clear_ITLB:
+	putcfg	r21, 0, ZERO		/* Clear MMUIR[n].PTEH.V */
+	addi	r21, MMUIR_STEP, r21
+        bne	r21, r22, tr1
+
+	/* Clear DTLBs */
+	pta	clear_DTLB, tr1
+	movi	MMUDR_FIRST, r21
+	movi	MMUDR_END, r22
+clear_DTLB:
+	putcfg	r21, 0, ZERO		/* Clear MMUDR[n].PTEH.V */
+	addi	r21, MMUDR_STEP, r21
+        bne	r21, r22, tr1
+
+	/* Map one big (512Mb) page for ITLB */
+	movi	MMUIR_FIRST, r21
+	movi	MMUIR_TEXT_L, r22	/* PTEL first */
+	add.l	r22, r63, r22		/* Sign extend */
+	putcfg	r21, 1, r22		/* Set MMUIR[0].PTEL */
+	movi	MMUIR_TEXT_H, r22	/* PTEH last */
+	add.l	r22, r63, r22		/* Sign extend */
+	putcfg	r21, 0, r22		/* Set MMUIR[0].PTEH */
+
+	/* Map one big CACHED (512Mb) page for DTLB */
+	movi	MMUDR_FIRST, r21
+	movi	MMUDR_CACHED_L, r22	/* PTEL first */
+	add.l	r22, r63, r22		/* Sign extend */
+	putcfg	r21, 1, r22		/* Set MMUDR[0].PTEL */
+	movi	MMUDR_CACHED_H, r22	/* PTEH last */
+	add.l	r22, r63, r22		/* Sign extend */
+	putcfg	r21, 0, r22		/* Set MMUDR[0].PTEH */
+
+#ifdef CONFIG_EARLY_PRINTK
+	/*
+	 * Setup a DTLB translation for SCIF phys.
+	 */
+	addi    r21, MMUDR_STEP, r21
+	movi    0x0a03, r22	/* SCIF phys */
+	shori   0x0148, r22
+	putcfg  r21, 1, r22	/* PTEL first */
+	movi    0xfa03, r22	/* 0xfa030000, fixed SCIF virt */
+	shori   0x0003, r22
+	putcfg  r21, 0, r22	/* PTEH last */
+#endif
+
+	/*
+	 * Set cache behaviours.
+	 */
+	/* ICache */
+	movi	ICCR_BASE, r21
+	movi	ICCR0_INIT_VAL, r22
+	movi	ICCR1_INIT_VAL, r23
+	putcfg	r21, ICCR_REG0, r22
+	putcfg	r21, ICCR_REG1, r23
+
+	/* OCache */
+	movi	OCCR_BASE, r21
+	movi	OCCR0_INIT_VAL, r22
+	movi	OCCR1_INIT_VAL, r23
+	putcfg	r21, OCCR_REG0, r22
+	putcfg	r21, OCCR_REG1, r23
+
+
+	/*
+	 * Enable Caches and MMU. Do the first non-PIC jump.
+         * Now head.S global variables, constants and externs
+	 * can be used.
+	 */
+	getcon	SR, r21
+	movi	SR_ENABLE_MMU, r22
+	or	r21, r22, r21
+	putcon	r21, SSR
+	movi	hyperspace, r22
+	ori	r22, 1, r22	    /* Make it SHmedia, not required but..*/
+	putcon	r22, SPC
+	synco
+	rte			    /* And now go into the hyperspace ... */
+hyperspace:			    /* ... that's the next instruction !  */
+
+	/*
+	 * Set CPU to a consistent state.
+	 * r31 = FPU support flag
+	 * tr0/tr7 in use. Others give a chance to loop somewhere safe
+	 */
+	movi	start_kernel, r32
+	ori	r32, 1, r32
+
+	ptabs	r32, tr0		    /* r32 = _start_kernel address        */
+	pta/u	hopeless, tr1
+	pta/u	hopeless, tr2
+	pta/u	hopeless, tr3
+	pta/u	hopeless, tr4
+	pta/u	hopeless, tr5
+	pta/u	hopeless, tr6
+	pta/u	hopeless, tr7
+	gettr	tr1, r28			/* r28 = hopeless address */
+
+	/* Set initial stack pointer */
+	movi	init_thread_union, SP
+	putcon	SP, KCR0		/* Set current to init_task */
+	movi	THREAD_SIZE, r22	/* Point to the end */
+	add	SP, r22, SP
+
+	/*
+	 * Initialize FPU.
+	 * Keep FPU flag in r31. After this block:
+	 * r31 = FPU flag
+	 */
+	movi fpu_in_use, r31	/* Temporary */
+
+#ifdef CONFIG_SH_FPU
+	getcon	SR, r21
+	movi	SR_ENABLE_FPU, r22
+	and	r21, r22, r22
+	putcon	r22, SR			/* Try to enable */
+	getcon	SR, r22
+	xor	r21, r22, r21
+	shlri	r21, 15, r21		/* Supposedly 0/1 */
+	st.q	r31, 0 , r21		/* Set fpu_in_use */
+#else
+	movi	0, r21
+	st.q	r31, 0 , r21		/* Set fpu_in_use */
+#endif
+	or	r21, ZERO, r31		/* Set FPU flag at last */
+
+#ifndef CONFIG_SH_NO_BSS_INIT
+/* Don't clear BSS if running on slow platforms such as an RTL simulation,
+   remote memory via SHdebug link, etc.  For these the memory can be guaranteed
+   to be all zero on boot anyway. */
+	/*
+	 * Clear bss
+	 */
+	pta	clear_quad, tr1
+	movi	__bss_start, r22
+	movi	_end, r23
+clear_quad:
+	st.q	r22, 0, ZERO
+	addi	r22, 8, r22
+	bne	r22, r23, tr1		/* Both quad aligned, see vmlinux.lds.S */
+#endif
+	pta/u	hopeless, tr1
+
+	/* Say bye to head.S but be prepared to wrongly get back ... */
+	blink	tr0, LINK
+
+	/* If we ever get back here through LINK/tr1-tr7 */
+	pta/u	hopeless, tr7
+
+hopeless:
+	/*
+	 * Something's badly wrong here. Loop endlessly,
+         * there's nothing more we can do about it.
+	 *
+	 * Note on hopeless: it can be jumped into invariably
+	 * before or after jumping into hyperspace. The only
+	 * requirement is to be PIC called (PTA) before and
+	 * any way (PTA/PTABS) after. According to Virtual
+	 * to Physical mapping a simulator/emulator can easily
+	 * tell where we came here from just looking at hopeless
+	 * (PC) address.
+	 *
+	 * For debugging purposes:
+	 * (r28) hopeless/loop address
+	 * (r29) Original SR
+	 * (r30) CPU type/Platform endianness
+	 * (r31) FPU Support
+	 * (r32) _start_kernel address
+	 */
+	blink	tr7, ZERO
+
+
diff --git a/arch/sh64/kernel/init_task.c b/arch/sh64/kernel/init_task.c
new file mode 100644
index 0000000..de2d07d
--- /dev/null
+++ b/arch/sh64/kernel/init_task.c
@@ -0,0 +1,46 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/init_task.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003  Paul Mundt
+ *
+ */
+#include <linux/rwsem.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+struct pt_regs fake_swapper_regs;
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE-byte aligned due
+ * to the way process stacks are handled. This is done by having a
+ * special "init_task" linker map entry..
+ */
+union thread_union init_thread_union
+	__attribute__((__section__(".data.init_task"))) =
+		{ INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
diff --git a/arch/sh64/kernel/irq.c b/arch/sh64/kernel/irq.c
new file mode 100644
index 0000000..9fc2b71
--- /dev/null
+++ b/arch/sh64/kernel/irq.c
@@ -0,0 +1,116 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/irq.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003  Paul Mundt
+ *
+ */
+
+/*
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/bitops.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <linux/irq.h>
+
+void ack_bad_irq(unsigned int irq)
+{
+	printk("unexpected IRQ trap at irq %02x\n", irq);
+}
+
+#if defined(CONFIG_PROC_FS)
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *) v, j;
+	struct irqaction * action;
+	unsigned long flags;
+
+	if (i == 0) {
+		seq_puts(p, "           ");
+		for (j=0; j<NR_CPUS; j++)
+			if (cpu_online(j))
+				seq_printf(p, "CPU%d       ",j);
+		seq_putc(p, '\n');
+	}
+
+	if (i < NR_IRQS) {
+		spin_lock_irqsave(&irq_desc[i].lock, flags);
+		action = irq_desc[i].action;
+		if (!action)
+			goto unlock;
+		seq_printf(p, "%3d: ",i);
+		seq_printf(p, "%10u ", kstat_irqs(i));
+		seq_printf(p, " %14s", irq_desc[i].handler->typename);
+		seq_printf(p, "  %s", action->name);
+
+		for (action=action->next; action; action = action->next)
+			seq_printf(p, ", %s", action->name);
+		seq_putc(p, '\n');
+unlock:
+		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+	}
+	return 0;
+}
+#endif
+
+/*
+ * do_NMI handles all Non-Maskable Interrupts.
+ */
+asmlinkage void do_NMI(unsigned long vector_num, struct pt_regs * regs)
+{
+	if (regs->sr & 0x40000000)
+		printk("unexpected NMI trap in system mode\n");
+	else
+		printk("unexpected NMI trap in user mode\n");
+
+	/* No statistics */
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's.
+ */
+asmlinkage int do_IRQ(unsigned long vector_num, struct pt_regs * regs)
+{
+	int irq;
+
+	irq_enter();
+
+	irq = irq_demux(vector_num);
+
+	if (irq >= 0) {
+		__do_IRQ(irq, regs);
+	} else {
+		printk("unexpected IRQ trap at vector %03lx\n", vector_num);
+	}
+
+	irq_exit();
+
+	return 1;
+}
+
diff --git a/arch/sh64/kernel/irq_intc.c b/arch/sh64/kernel/irq_intc.c
new file mode 100644
index 0000000..43f88f3
--- /dev/null
+++ b/arch/sh64/kernel/irq_intc.c
@@ -0,0 +1,272 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/irq_intc.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003  Paul Mundt
+ *
+ * Interrupt Controller support for SH5 INTC.
+ * Per-interrupt selective. IRLM=0 (Fixed priority) is not
+ * supported being useless without a cascaded interrupt
+ * controller.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/bitops.h>       /* this includes also <asm/registers.h */
+                                /* which is required to remap register */
+                                /* names used into __asm__ blocks...   */
+
+#include <asm/hardware.h>
+#include <asm/platform.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+/*
+ * Maybe the generic Peripheral block could move to a more
+ * generic include file. INTC Block will be defined here
+ * and only here to make INTC self-contained in a single
+ * file.
+ */
+#define	INTC_BLOCK_OFFSET	0x01000000
+
+/* Base */
+#define INTC_BASE		PHYS_PERIPHERAL_BLOCK + \
+				INTC_BLOCK_OFFSET
+
+/* Address */
+#define INTC_ICR_SET		(intc_virt + 0x0)
+#define INTC_ICR_CLEAR		(intc_virt + 0x8)
+#define INTC_INTPRI_0		(intc_virt + 0x10)
+#define INTC_INTSRC_0		(intc_virt + 0x50)
+#define INTC_INTSRC_1		(intc_virt + 0x58)
+#define INTC_INTREQ_0		(intc_virt + 0x60)
+#define INTC_INTREQ_1		(intc_virt + 0x68)
+#define INTC_INTENB_0		(intc_virt + 0x70)
+#define INTC_INTENB_1		(intc_virt + 0x78)
+#define INTC_INTDSB_0		(intc_virt + 0x80)
+#define INTC_INTDSB_1		(intc_virt + 0x88)
+
+#define INTC_ICR_IRLM		0x1
+#define	INTC_INTPRI_PREGS	8		/* 8 Priority Registers */
+#define	INTC_INTPRI_PPREG	8		/* 8 Priorities per Register */
+
+
+/*
+ * Mapper between the vector ordinal and the IRQ number
+ * passed to kernel/device drivers.
+ */
+int intc_evt_to_irq[(0xE20/0x20)+1] = {
+	-1, -1, -1, -1, -1, -1, -1, -1,	/* 0x000 - 0x0E0 */
+	-1, -1, -1, -1, -1, -1, -1, -1,	/* 0x100 - 0x1E0 */
+	 0,  0,  0,  0,  0,  1,  0,  0,	/* 0x200 - 0x2E0 */
+	 2,  0,  0,  3,  0,  0,  0, -1,	/* 0x300 - 0x3E0 */
+	32, 33, 34, 35, 36, 37, 38, -1,	/* 0x400 - 0x4E0 */
+	-1, -1, -1, 63, -1, -1, -1, -1,	/* 0x500 - 0x5E0 */
+	-1, -1, 18, 19, 20, 21, 22, -1,	/* 0x600 - 0x6E0 */
+	39, 40, 41, 42, -1, -1, -1, -1,	/* 0x700 - 0x7E0 */
+	 4,  5,  6,  7, -1, -1, -1, -1,	/* 0x800 - 0x8E0 */
+	-1, -1, -1, -1, -1, -1, -1, -1,	/* 0x900 - 0x9E0 */
+	12, 13, 14, 15, 16, 17, -1, -1,	/* 0xA00 - 0xAE0 */
+	-1, -1, -1, -1, -1, -1, -1, -1,	/* 0xB00 - 0xBE0 */
+	-1, -1, -1, -1, -1, -1, -1, -1,	/* 0xC00 - 0xCE0 */
+	-1, -1, -1, -1, -1, -1, -1, -1,	/* 0xD00 - 0xDE0 */
+	-1, -1				/* 0xE00 - 0xE20 */
+};
+
+/*
+ * Opposite mapper.
+ */
+static int IRQ_to_vectorN[NR_INTC_IRQS] = {
+	0x12, 0x15, 0x18, 0x1B, 0x40, 0x41, 0x42, 0x43, /*  0- 7 */
+	  -1,   -1,   -1,   -1, 0x50, 0x51, 0x52, 0x53,	/*  8-15 */
+	0x54, 0x55, 0x32, 0x33, 0x34, 0x35, 0x36,   -1, /* 16-23 */
+	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1, /* 24-31 */
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x38,	/* 32-39 */
+        0x39, 0x3A, 0x3B,   -1,   -1,   -1,   -1,   -1, /* 40-47 */
+	  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1, /* 48-55 */
+	  -1,   -1,   -1,   -1,   -1,   -1,   -1, 0x2B, /* 56-63 */
+
+};
+
+static unsigned long intc_virt;
+
+static unsigned int startup_intc_irq(unsigned int irq);
+static void shutdown_intc_irq(unsigned int irq);
+static void enable_intc_irq(unsigned int irq);
+static void disable_intc_irq(unsigned int irq);
+static void mask_and_ack_intc(unsigned int);
+static void end_intc_irq(unsigned int irq);
+
+static struct hw_interrupt_type intc_irq_type = {
+	"INTC",
+	startup_intc_irq,
+	shutdown_intc_irq,
+	enable_intc_irq,
+	disable_intc_irq,
+	mask_and_ack_intc,
+	end_intc_irq
+};
+
+static int irlm;		/* IRL mode */
+
+static unsigned int startup_intc_irq(unsigned int irq)
+{
+	enable_intc_irq(irq);
+	return 0; /* never anything pending */
+}
+
+static void shutdown_intc_irq(unsigned int irq)
+{
+	disable_intc_irq(irq);
+}
+
+static void enable_intc_irq(unsigned int irq)
+{
+	unsigned long reg;
+	unsigned long bitmask;
+
+	if ((irq <= IRQ_IRL3) && (irlm == NO_PRIORITY))
+		printk("Trying to use straight IRL0-3 with an encoding platform.\n");
+
+	if (irq < 32) {
+		reg = INTC_INTENB_0;
+		bitmask = 1 << irq;
+	} else {
+		reg = INTC_INTENB_1;
+		bitmask = 1 << (irq - 32);
+	}
+
+	ctrl_outl(bitmask, reg);
+}
+
+static void disable_intc_irq(unsigned int irq)
+{
+	unsigned long reg;
+	unsigned long bitmask;
+
+	if (irq < 32) {
+		reg = INTC_INTDSB_0;
+		bitmask = 1 << irq;
+	} else {
+		reg = INTC_INTDSB_1;
+		bitmask = 1 << (irq - 32);
+	}
+
+	ctrl_outl(bitmask, reg);
+}
+
+static void mask_and_ack_intc(unsigned int irq)
+{
+	disable_intc_irq(irq);
+}
+
+static void end_intc_irq(unsigned int irq)
+{
+	enable_intc_irq(irq);
+}
+
+/* For future use, if we ever support IRLM=0) */
+void make_intc_irq(unsigned int irq)
+{
+	disable_irq_nosync(irq);
+	irq_desc[irq].handler = &intc_irq_type;
+	disable_intc_irq(irq);
+}
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_SYSCTL)
+int intc_irq_describe(char* p, int irq)
+{
+	if (irq < NR_INTC_IRQS)
+		return sprintf(p, "(0x%3x)", IRQ_to_vectorN[irq]*0x20);
+	else
+		return 0;
+}
+#endif
+
+void __init init_IRQ(void)
+{
+        unsigned long long __dummy0, __dummy1=~0x00000000100000f0;
+	unsigned long reg;
+	unsigned long data;
+	int i;
+
+	intc_virt = onchip_remap(INTC_BASE, 1024, "INTC");
+	if (!intc_virt) {
+		panic("Unable to remap INTC\n");
+	}
+
+
+	/* Set default: per-line enable/disable, priority driven ack/eoi */
+	for (i = 0; i < NR_INTC_IRQS; i++) {
+		if (platform_int_priority[i] != NO_PRIORITY) {
+			irq_desc[i].handler = &intc_irq_type;
+		}
+	}
+
+
+	/* Disable all interrupts and set all priorities to 0 to avoid trouble */
+	ctrl_outl(-1, INTC_INTDSB_0);
+	ctrl_outl(-1, INTC_INTDSB_1);
+
+	for (reg = INTC_INTPRI_0, i = 0; i < INTC_INTPRI_PREGS; i++, reg += 8)
+		ctrl_outl( NO_PRIORITY, reg);
+
+
+	/* Set IRLM */
+	/* If all the priorities are set to 'no priority', then
+	 * assume we are using encoded mode.
+	 */
+	irlm = platform_int_priority[IRQ_IRL0] + platform_int_priority[IRQ_IRL1] + \
+		platform_int_priority[IRQ_IRL2] + platform_int_priority[IRQ_IRL3];
+
+	if (irlm == NO_PRIORITY) {
+		/* IRLM = 0 */
+		reg = INTC_ICR_CLEAR;
+		i = IRQ_INTA;
+		printk("Trying to use encoded IRL0-3. IRLs unsupported.\n");
+	} else {
+		/* IRLM = 1 */
+		reg = INTC_ICR_SET;
+		i = IRQ_IRL0;
+	}
+	ctrl_outl(INTC_ICR_IRLM, reg);
+
+	/* Set interrupt priorities according to platform description */
+	for (data = 0, reg = INTC_INTPRI_0; i < NR_INTC_IRQS; i++) {
+		data |= platform_int_priority[i] << ((i % INTC_INTPRI_PPREG) * 4);
+		if ((i % INTC_INTPRI_PPREG) == (INTC_INTPRI_PPREG - 1)) {
+			/* Upon the 7th, set Priority Register */
+			ctrl_outl(data, reg);
+			data = 0;
+			reg += 8;
+		}
+	}
+
+#ifdef CONFIG_SH_CAYMAN
+	{
+		extern void init_cayman_irq(void);
+
+		init_cayman_irq();
+	}
+#endif
+
+	/*
+	 * And now let interrupts come in.
+	 * sti() is not enough, we need to
+	 * lower priority, too.
+	 */
+        __asm__ __volatile__("getcon    " __SR ", %0\n\t"
+                             "and       %0, %1, %0\n\t"
+                             "putcon    %0, " __SR "\n\t"
+                             : "=&r" (__dummy0)
+                             : "r" (__dummy1));
+}
diff --git a/arch/sh64/kernel/led.c b/arch/sh64/kernel/led.c
new file mode 100644
index 0000000..cf993c4
--- /dev/null
+++ b/arch/sh64/kernel/led.c
@@ -0,0 +1,41 @@
+/*
+ * arch/sh64/kernel/led.c
+ *
+ * Copyright (C) 2002 Stuart Menefy <stuart.menefy@st.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Flash the LEDs
+ */
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+
+void mach_led(int pos, int val);
+
+/* acts like an actual heart beat -- ie thump-thump-pause... */
+void heartbeat(void)
+{
+	static unsigned int cnt = 0, period = 0, dist = 0;
+
+	if (cnt == 0 || cnt == dist) {
+		mach_led(-1, 1);
+	} else if (cnt == 7 || cnt == dist + 7) {
+		mach_led(-1, 0);
+	}
+
+	if (++cnt > period) {
+		cnt = 0;
+
+		/*
+		 * The hyperbolic function below modifies the heartbeat period
+		 * length in dependency of the current (5min) load. It goes
+		 * through the points f(0)=126, f(1)=86, f(5)=51, f(inf)->30.
+		 */
+		period = ((672 << FSHIFT) / (5 * avenrun[0] +
+					    (7 << FSHIFT))) + 30;
+		dist = period / 4;
+	}
+}
+
diff --git a/arch/sh64/kernel/module.c b/arch/sh64/kernel/module.c
new file mode 100644
index 0000000..2598f6b
--- /dev/null
+++ b/arch/sh64/kernel/module.c
@@ -0,0 +1,161 @@
+/*  Kernel module help for sh64.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+    Copyright 2004 SuperH (UK) Ltd
+    Author: Richard Curnow
+
+    Based on the sh version, and on code from the sh64-specific parts of
+    modutils, originally written by Richard Curnow and Ben Gaster.
+
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc(size);
+}
+
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+	/* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
+}
+
+/* We don't need anything special. */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+			      Elf_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *mod)
+{
+	return 0;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	Elf32_Addr relocation;
+	uint32_t *location;
+	int align;
+	int is_shmedia;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+		relocation = sym->st_value + rel[i].r_addend;
+		align = (int)location & 3;
+
+		/* For text addresses, bit2 of the st_other field indicates
+		 * whether the symbol is SHmedia (1) or SHcompact (0).  If
+		 * SHmedia, the LSB of the symbol needs to be asserted
+		 * for the CPU to be in SHmedia mode when it starts executing
+		 * the branch target. */
+		is_shmedia = (sym->st_other & 4) ? 1 : 0;
+		if (is_shmedia) {
+			relocation |= 1;
+		}
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_SH_DIR32:
+			DEBUGP("R_SH_DIR32 @%08lx = %08lx\n", (unsigned long) location, (unsigned long) relocation);
+			*location += relocation;
+			break;
+		case R_SH_REL32:
+			DEBUGP("R_SH_REL32 @%08lx = %08lx\n", (unsigned long) location, (unsigned long) relocation);
+			relocation -= (Elf32_Addr) location;
+			*location += relocation;
+			break;
+		case R_SH_IMM_LOW16:
+			DEBUGP("R_SH_IMM_LOW16 @%08lx = %08lx\n", (unsigned long) location, (unsigned long) relocation);
+			*location = (*location & ~0x3fffc00) |
+				((relocation & 0xffff) << 10);
+			break;
+		case R_SH_IMM_MEDLOW16:
+			DEBUGP("R_SH_IMM_MEDLOW16 @%08lx = %08lx\n", (unsigned long) location, (unsigned long) relocation);
+			*location = (*location & ~0x3fffc00) |
+				(((relocation >> 16) & 0xffff) << 10);
+			break;
+		case R_SH_IMM_LOW16_PCREL:
+			DEBUGP("R_SH_IMM_LOW16_PCREL @%08lx = %08lx\n", (unsigned long) location, (unsigned long) relocation);
+			relocation -= (Elf32_Addr) location;
+			*location = (*location & ~0x3fffc00) |
+				((relocation & 0xffff) << 10);
+			break;
+		case R_SH_IMM_MEDLOW16_PCREL:
+			DEBUGP("R_SH_IMM_MEDLOW16_PCREL @%08lx = %08lx\n", (unsigned long) location, (unsigned long) relocation);
+			relocation -= (Elf32_Addr) location;
+			*location = (*location & ~0x3fffc00) |
+				(((relocation >> 16) & 0xffff) << 10);
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	printk(KERN_ERR "module %s: REL RELOCATION unsupported\n",
+	       me->name);
+	return -ENOEXEC;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+}
+
diff --git a/arch/sh64/kernel/pci-dma.c b/arch/sh64/kernel/pci-dma.c
new file mode 100644
index 0000000..a36c3d7
--- /dev/null
+++ b/arch/sh64/kernel/pci-dma.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2001 David J. Mckay (david.mckay@st.com)
+ * Copyright (C) 2003 Paul Mundt (lethal@linux-sh.org)
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Dynamic DMA mapping support.
+ */
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+
+void *consistent_alloc(struct pci_dev *hwdev, size_t size,
+			   dma_addr_t *dma_handle)
+{
+	void *ret;
+	int gfp = GFP_ATOMIC;
+        void *vp;
+
+	if (hwdev == NULL || hwdev->dma_mask != 0xffffffff)
+		gfp |= GFP_DMA;
+
+	ret = (void *)__get_free_pages(gfp, get_order(size));
+
+	/* now call our friend ioremap_nocache to give us an uncached area */
+        vp = ioremap_nocache(virt_to_phys(ret), size);
+
+	if (vp != NULL) {
+		memset(vp, 0, size);
+		*dma_handle = virt_to_bus(ret);
+		dma_cache_wback_inv((unsigned long)ret, size);
+	}
+
+	return vp;
+}
+
+void consistent_free(struct pci_dev *hwdev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle)
+{
+	void *alloc;
+
+	alloc = bus_to_virt((unsigned long)dma_handle);
+	free_pages((unsigned long)alloc, get_order(size));
+
+	iounmap(vaddr);
+}
+
diff --git a/arch/sh64/kernel/pci_sh5.c b/arch/sh64/kernel/pci_sh5.c
new file mode 100644
index 0000000..6197879
--- /dev/null
+++ b/arch/sh64/kernel/pci_sh5.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2001 David J. Mckay (david.mckay@st.com)
+ * Copyright (C) 2003, 2004 Paul Mundt
+ * Copyright (C) 2004 Richard Curnow
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Support functions for the SH5 PCI hardware.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/rwsem.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <asm/pci.h>
+#include <linux/irq.h>
+
+#include <asm/io.h>
+#include <asm/hardware.h>
+#include "pci_sh5.h"
+
+static unsigned long pcicr_virt;
+unsigned long pciio_virt;
+
+static void __init pci_fixup_ide_bases(struct pci_dev *d)
+{
+	int i;
+
+	/*
+	 * PCI IDE controllers use non-standard I/O port decoding, respect it.
+	 */
+	if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return;
+	printk("PCI: IDE base address fixup for %s\n", pci_name(d));
+	for(i=0; i<4; i++) {
+		struct resource *r = &d->resource[i];
+		if ((r->start & ~0x80) == 0x374) {
+			r->start |= 2;
+			r->end = r->start;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases);
+
+char * __init pcibios_setup(char *str)
+{
+	return str;
+}
+
+/* Rounds a number UP to the nearest power of two. Used for
+ * sizing the PCI window.
+ */
+static u32 __init r2p2(u32 num)
+{
+	int i = 31;
+	u32 tmp = num;
+
+	if (num == 0)
+		return 0;
+
+	do {
+		if (tmp & (1 << 31))
+			break;
+		i--;
+		tmp <<= 1;
+	} while (i >= 0);
+
+	tmp = 1 << i;
+	/* If the original number isn't a power of 2, round it up */
+	if (tmp != num)
+		tmp <<= 1;
+
+	return tmp;
+}
+
+extern unsigned long long memory_start, memory_end;
+
+int __init sh5pci_init(unsigned memStart, unsigned memSize)
+{
+	u32 lsr0;
+	u32 uval;
+
+	pcicr_virt = onchip_remap(SH5PCI_ICR_BASE, 1024, "PCICR");
+	if (!pcicr_virt) {
+		panic("Unable to remap PCICR\n");
+	}
+
+	pciio_virt = onchip_remap(SH5PCI_IO_BASE, 0x10000, "PCIIO");
+	if (!pciio_virt) {
+		panic("Unable to remap PCIIO\n");
+	}
+
+	pr_debug("Register base addres is 0x%08lx\n", pcicr_virt);
+
+	/* Clear snoop registers */
+        SH5PCI_WRITE(CSCR0, 0);
+        SH5PCI_WRITE(CSCR1, 0);
+
+	pr_debug("Wrote to reg\n");
+
+        /* Switch off interrupts */
+        SH5PCI_WRITE(INTM,  0);
+        SH5PCI_WRITE(AINTM, 0);
+        SH5PCI_WRITE(PINTM, 0);
+
+        /* Set bus active, take it out of reset */
+        uval = SH5PCI_READ(CR);
+
+	/* Set command Register */
+        SH5PCI_WRITE(CR, uval | CR_LOCK_MASK | CR_CFINT| CR_FTO | CR_PFE | CR_PFCS | CR_BMAM);
+
+	uval=SH5PCI_READ(CR);
+        pr_debug("CR is actually 0x%08x\n",uval);
+
+        /* Allow it to be a master */
+	/* NB - WE DISABLE I/O ACCESS to stop overlap */
+        /* set WAIT bit to enable stepping, an attempt to improve stability */
+	SH5PCI_WRITE_SHORT(CSR_CMD,
+			    PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_WAIT);
+
+        /*
+        ** Set translation mapping memory in order to convert the address
+        ** used for the main bus, to the PCI internal address.
+        */
+        SH5PCI_WRITE(MBR,0x40000000);
+
+        /* Always set the max size 512M */
+        SH5PCI_WRITE(MBMR, PCISH5_MEM_SIZCONV(512*1024*1024));
+
+        /*
+        ** I/O addresses are mapped at internal PCI specific address
+        ** as is described into the configuration bridge table.
+        ** These are changed to 0, to allow cards that have legacy
+        ** io such as vga to function correctly. We set the SH5 IOBAR to
+        ** 256K, which is a bit big as we can only have 64K of address space
+        */
+
+        SH5PCI_WRITE(IOBR,0x0);
+
+	pr_debug("PCI:Writing 0x%08x to IOBR\n",0);
+
+        /* Set up a 256K window. Totally pointless waste  of address space */
+        SH5PCI_WRITE(IOBMR,0);
+	pr_debug("PCI:Writing 0x%08x to IOBMR\n",0);
+
+	/* The SH5 has a HUGE 256K I/O region, which breaks the PCI spec. Ideally,
+         * we would want to map the I/O region somewhere, but it is so big this is not
+         * that easy!
+         */
+	SH5PCI_WRITE(CSR_IBAR0,~0);
+	/* Set memory size value */
+        memSize = memory_end - memory_start;
+
+        /* Now we set up the mbars so the PCI bus can see the memory of the machine */
+        if (memSize < (1024 * 1024)) {
+                printk(KERN_ERR "PCISH5: Ridiculous memory size of 0x%x?\n", memSize);
+                return -EINVAL;
+        }
+
+        /* Set LSR 0 */
+        lsr0 = (memSize > (512 * 1024 * 1024)) ? 0x1ff00001 : ((r2p2(memSize) - 0x100000) | 0x1);
+        SH5PCI_WRITE(LSR0, lsr0);
+
+	pr_debug("PCI:Writing 0x%08x to LSR0\n",lsr0);
+
+        /* Set MBAR 0 */
+        SH5PCI_WRITE(CSR_MBAR0, memory_start);
+        SH5PCI_WRITE(LAR0, memory_start);
+
+        SH5PCI_WRITE(CSR_MBAR1,0);
+        SH5PCI_WRITE(LAR1,0);
+        SH5PCI_WRITE(LSR1,0);
+
+	pr_debug("PCI:Writing 0x%08llx to CSR_MBAR0\n",memory_start);
+	pr_debug("PCI:Writing 0x%08llx to LAR0\n",memory_start);
+
+        /* Enable the PCI interrupts on the device */
+        SH5PCI_WRITE(INTM,  ~0);
+        SH5PCI_WRITE(AINTM, ~0);
+        SH5PCI_WRITE(PINTM, ~0);
+
+	pr_debug("Switching on all error interrupts\n");
+
+        return(0);
+}
+
+static int sh5pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+			int size, u32 *val)
+{
+	SH5PCI_WRITE(PAR, CONFIG_CMD(bus, devfn, where));
+
+	switch (size) {
+		case 1:
+			*val = (u8)SH5PCI_READ_BYTE(PDR + (where & 3));
+			break;
+		case 2:
+			*val = (u16)SH5PCI_READ_SHORT(PDR + (where & 2));
+			break;
+		case 4:
+			*val = SH5PCI_READ(PDR);
+			break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int sh5pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+			 int size, u32 val)
+{
+	SH5PCI_WRITE(PAR, CONFIG_CMD(bus, devfn, where));
+
+	switch (size) {
+		case 1:
+			SH5PCI_WRITE_BYTE(PDR + (where & 3), (u8)val);
+			break;
+		case 2:
+			SH5PCI_WRITE_SHORT(PDR + (where & 2), (u16)val);
+			break;
+		case 4:
+			SH5PCI_WRITE(PDR, val);
+			break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pci_config_ops = {
+	.read =		sh5pci_read,
+	.write =	sh5pci_write,
+};
+
+/* Everything hangs off this */
+static struct pci_bus *pci_root_bus;
+
+
+static u8 __init no_swizzle(struct pci_dev *dev, u8 * pin)
+{
+	pr_debug("swizzle for dev %d on bus %d slot %d pin is %d\n",
+	         dev->devfn,dev->bus->number, PCI_SLOT(dev->devfn),*pin);
+	return PCI_SLOT(dev->devfn);
+}
+
+static inline u8 bridge_swizzle(u8 pin, u8 slot)
+{
+	return (((pin-1) + slot) % 4) + 1;
+}
+
+u8 __init common_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	if (dev->bus->number != 0) {
+		u8 pin = *pinp;
+		do {
+			pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn));
+			/* Move up the chain of bridges. */
+			dev = dev->bus->self;
+		} while (dev->bus->self);
+		*pinp = pin;
+
+		/* The slot is the slot of the last bridge. */
+	}
+
+	return PCI_SLOT(dev->devfn);
+}
+
+/* This needs to be shunted out of here into the board specific bit */
+
+static int __init map_cayman_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+	int result = -1;
+
+	/* The complication here is that the PCI IRQ lines from the Cayman's 2
+	   5V slots get into the CPU via a different path from the IRQ lines
+	   from the 3 3.3V slots.  Thus, we have to detect whether the card's
+	   interrupts go via the 5V or 3.3V path, i.e. the 'bridge swizzling'
+	   at the point where we cross from 5V to 3.3V is not the normal case.
+
+	   The added complication is that we don't know that the 5V slots are
+	   always bus 2, because a card containing a PCI-PCI bridge may be
+	   plugged into a 3.3V slot, and this changes the bus numbering.
+
+	   Also, the Cayman has an intermediate PCI bus that goes a custom
+	   expansion board header (and to the secondary bridge).  This bus has
+	   never been used in practice.
+
+	   The 1ary onboard PCI-PCI bridge is device 3 on bus 0
+	   The 2ary onboard PCI-PCI bridge is device 0 on the 2ary bus of the 1ary bridge.
+	   */
+
+	struct slot_pin {
+		int slot;
+		int pin;
+	} path[4];
+	int i=0;
+
+	while (dev->bus->number > 0) {
+
+		slot = path[i].slot = PCI_SLOT(dev->devfn);
+		pin = path[i].pin = bridge_swizzle(pin, slot);
+		dev = dev->bus->self;
+		i++;
+		if (i > 3) panic("PCI path to root bus too long!\n");
+	}
+
+	slot = PCI_SLOT(dev->devfn);
+	/* This is the slot on bus 0 through which the device is eventually
+	   reachable. */
+
+	/* Now work back up. */
+	if ((slot < 3) || (i == 0)) {
+		/* Bus 0 (incl. PCI-PCI bridge itself) : perform the final
+		   swizzle now. */
+		result = IRQ_INTA + bridge_swizzle(pin, slot) - 1;
+	} else {
+		i--;
+		slot = path[i].slot;
+		pin  = path[i].pin;
+		if (slot > 0) {
+			panic("PCI expansion bus device found - not handled!\n");
+		} else {
+			if (i > 0) {
+				/* 5V slots */
+				i--;
+				slot = path[i].slot;
+				pin  = path[i].pin;
+				/* 'pin' was swizzled earlier wrt slot, don't do it again. */
+				result = IRQ_P2INTA + (pin - 1);
+			} else {
+				/* IRQ for 2ary PCI-PCI bridge : unused */
+				result = -1;
+			}
+		}
+	}
+
+	return result;
+}
+
+irqreturn_t pcish5_err_irq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned pci_int, pci_air, pci_cir, pci_aint;
+
+	pci_int = SH5PCI_READ(INT);
+	pci_cir = SH5PCI_READ(CIR);
+	pci_air = SH5PCI_READ(AIR);
+
+	if (pci_int) {
+		printk("PCI INTERRUPT (at %08llx)!\n", regs->pc);
+		printk("PCI INT -> 0x%x\n", pci_int & 0xffff);
+		printk("PCI AIR -> 0x%x\n", pci_air);
+		printk("PCI CIR -> 0x%x\n", pci_cir);
+		SH5PCI_WRITE(INT, ~0);
+	}
+
+	pci_aint = SH5PCI_READ(AINT);
+	if (pci_aint) {
+		printk("PCI ARB INTERRUPT!\n");
+		printk("PCI AINT -> 0x%x\n", pci_aint);
+		printk("PCI AIR -> 0x%x\n", pci_air);
+		printk("PCI CIR -> 0x%x\n", pci_cir);
+		SH5PCI_WRITE(AINT, ~0);
+	}
+
+	return IRQ_HANDLED;
+}
+
+irqreturn_t pcish5_serr_irq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	printk("SERR IRQ\n");
+
+	return IRQ_NONE;
+}
+
+#define ROUND_UP(x, a)		(((x) + (a) - 1) & ~((a) - 1))
+
+static void __init
+pcibios_size_bridge(struct pci_bus *bus, struct resource *ior,
+		    struct resource *memr)
+{
+	struct resource io_res, mem_res;
+	struct pci_dev *dev;
+	struct pci_dev *bridge = bus->self;
+	struct list_head *ln;
+
+	if (!bridge)
+		return;	/* host bridge, nothing to do */
+
+	/* set reasonable default locations for pcibios_align_resource */
+	io_res.start = PCIBIOS_MIN_IO;
+	mem_res.start = PCIBIOS_MIN_MEM;
+
+	io_res.end = io_res.start;
+	mem_res.end = mem_res.start;
+
+	/* Collect information about how our direct children are layed out. */
+	for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+		int i;
+		dev = pci_dev_b(ln);
+
+		/* Skip bridges for now */
+		if (dev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
+			continue;
+
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource res;
+			unsigned long size;
+
+			memcpy(&res, &dev->resource[i], sizeof(res));
+			size = res.end - res.start + 1;
+
+			if (res.flags & IORESOURCE_IO) {
+				res.start = io_res.end;
+				pcibios_align_resource(dev, &res, size, 0);
+				io_res.end = res.start + size;
+			} else if (res.flags & IORESOURCE_MEM) {
+				res.start = mem_res.end;
+				pcibios_align_resource(dev, &res, size, 0);
+				mem_res.end = res.start + size;
+			}
+		}
+	}
+
+	/* And for all of the subordinate busses. */
+	for (ln=bus->children.next; ln != &bus->children; ln=ln->next)
+		pcibios_size_bridge(pci_bus_b(ln), &io_res, &mem_res);
+
+	/* turn the ending locations into sizes (subtract start) */
+	io_res.end -= io_res.start;
+	mem_res.end -= mem_res.start;
+
+	/* Align the sizes up by bridge rules */
+	io_res.end = ROUND_UP(io_res.end, 4*1024) - 1;
+	mem_res.end = ROUND_UP(mem_res.end, 1*1024*1024) - 1;
+
+	/* Adjust the bridge's allocation requirements */
+	bridge->resource[0].end = bridge->resource[0].start + io_res.end;
+	bridge->resource[1].end = bridge->resource[1].start + mem_res.end;
+
+	bridge->resource[PCI_BRIDGE_RESOURCES].end =
+	    bridge->resource[PCI_BRIDGE_RESOURCES].start + io_res.end;
+	bridge->resource[PCI_BRIDGE_RESOURCES+1].end =
+	    bridge->resource[PCI_BRIDGE_RESOURCES+1].start + mem_res.end;
+
+	/* adjust parent's resource requirements */
+	if (ior) {
+		ior->end = ROUND_UP(ior->end, 4*1024);
+		ior->end += io_res.end;
+	}
+
+	if (memr) {
+		memr->end = ROUND_UP(memr->end, 1*1024*1024);
+		memr->end += mem_res.end;
+	}
+}
+
+#undef ROUND_UP
+
+static void __init pcibios_size_bridges(void)
+{
+	struct resource io_res, mem_res;
+
+	memset(&io_res, 0, sizeof(io_res));
+	memset(&mem_res, 0, sizeof(mem_res));
+
+	pcibios_size_bridge(pci_root_bus, &io_res, &mem_res);
+}
+
+static int __init pcibios_init(void)
+{
+        if (request_irq(IRQ_ERR, pcish5_err_irq,
+                        SA_INTERRUPT, "PCI Error",NULL) < 0) {
+                printk(KERN_ERR "PCISH5: Cannot hook PCI_PERR interrupt\n");
+                return -EINVAL;
+        }
+
+        if (request_irq(IRQ_SERR, pcish5_serr_irq,
+                        SA_INTERRUPT, "PCI SERR interrupt", NULL) < 0) {
+                printk(KERN_ERR "PCISH5: Cannot hook PCI_SERR interrupt\n");
+                return -EINVAL;
+        }
+
+	/* The pci subsytem needs to know where memory is and how much
+	 * of it there is. I've simply made these globals. A better mechanism
+	 * is probably needed.
+	 */
+	sh5pci_init(__pa(memory_start),
+		     __pa(memory_end) - __pa(memory_start));
+
+	pci_root_bus = pci_scan_bus(0, &pci_config_ops, NULL);
+	pcibios_size_bridges();
+	pci_assign_unassigned_resources();
+	pci_fixup_irqs(no_swizzle, map_cayman_irq);
+
+	return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+void __init pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev = bus->self;
+	int i;
+
+#if 1
+	if(dev) {
+		for(i=0; i<3; i++) {
+			bus->resource[i] =
+				&dev->resource[PCI_BRIDGE_RESOURCES+i];
+			bus->resource[i]->name = bus->name;
+		}
+		bus->resource[0]->flags |= IORESOURCE_IO;
+		bus->resource[1]->flags |= IORESOURCE_MEM;
+
+		/* For now, propagate host limits to the bus;
+		 * we'll adjust them later. */
+
+#if 1
+		bus->resource[0]->end = 64*1024 - 1 ;
+		bus->resource[1]->end = PCIBIOS_MIN_MEM+(256*1024*1024)-1;
+		bus->resource[0]->start = PCIBIOS_MIN_IO;
+		bus->resource[1]->start = PCIBIOS_MIN_MEM;
+#else
+		bus->resource[0]->end = 0
+		bus->resource[1]->end = 0
+		bus->resource[0]->start =0
+		  bus->resource[1]->start = 0;
+#endif
+		/* Turn off downstream PF memory address range by default */
+		bus->resource[2]->start = 1024*1024;
+		bus->resource[2]->end = bus->resource[2]->start - 1;
+	}
+#endif
+
+}
+
diff --git a/arch/sh64/kernel/pci_sh5.h b/arch/sh64/kernel/pci_sh5.h
new file mode 100644
index 0000000..8f21f5d
--- /dev/null
+++ b/arch/sh64/kernel/pci_sh5.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2001 David J. Mckay (david.mckay@st.com)
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Defintions for the SH5 PCI hardware.
+ */
+
+/* Product ID */
+#define PCISH5_PID		0x350d
+
+/* vendor ID */
+#define PCISH5_VID		0x1054
+
+/* Configuration types */
+#define ST_TYPE0                0x00    /* Configuration cycle type 0 */
+#define ST_TYPE1                0x01    /* Configuration cycle type 1 */
+
+/* VCR data */
+#define PCISH5_VCR_STATUS      0x00
+#define PCISH5_VCR_VERSION     0x08
+
+/*
+** ICR register offsets and bits
+*/
+#define PCISH5_ICR_CR          0x100   /* PCI control register values */
+#define CR_PBAM                 (1<<12)
+#define CR_PFCS                 (1<<11)
+#define CR_FTO                  (1<<10)
+#define CR_PFE                  (1<<9)
+#define CR_TBS                  (1<<8)
+#define CR_SPUE                 (1<<7)
+#define CR_BMAM                 (1<<6)
+#define CR_HOST                 (1<<5)
+#define CR_CLKEN                (1<<4)
+#define CR_SOCS                 (1<<3)
+#define CR_IOCS                 (1<<2)
+#define CR_RSTCTL               (1<<1)
+#define CR_CFINT                (1<<0)
+#define CR_LOCK_MASK            0xa5000000
+
+#define PCISH5_ICR_INT         0x114   /* Interrupt registert values     */
+#define INT_MADIM               (1<<2)
+
+#define PCISH5_ICR_LSR0        0X104   /* Local space register values    */
+#define PCISH5_ICR_LSR1        0X108   /* Local space register values    */
+#define PCISH5_ICR_LAR0        0x10c   /* Local address register values  */
+#define PCISH5_ICR_LAR1        0x110   /* Local address register values  */
+#define PCISH5_ICR_INTM        0x118   /* Interrupt mask register values                         */
+#define PCISH5_ICR_AIR         0x11c   /* Interrupt error address information register values    */
+#define PCISH5_ICR_CIR         0x120   /* Interrupt error command information register values    */
+#define PCISH5_ICR_AINT        0x130   /* Interrupt error arbiter interrupt register values      */
+#define PCISH5_ICR_AINTM       0x134   /* Interrupt error arbiter interrupt mask register values */
+#define PCISH5_ICR_BMIR        0x138   /* Interrupt error info register of bus master values     */
+#define PCISH5_ICR_PAR         0x1c0   /* Pio address register values                            */
+#define PCISH5_ICR_MBR         0x1c4   /* Memory space bank register values                      */
+#define PCISH5_ICR_IOBR        0x1c8   /* I/O space bank register values                         */
+#define PCISH5_ICR_PINT        0x1cc   /* power management interrupt register values             */
+#define PCISH5_ICR_PINTM       0x1d0   /* power management interrupt mask register values        */
+#define PCISH5_ICR_MBMR        0x1d8   /* memory space bank mask register values                 */
+#define PCISH5_ICR_IOBMR       0x1dc   /* I/O space bank mask register values                    */
+#define PCISH5_ICR_CSCR0       0x210   /* PCI cache snoop control register 0                     */
+#define PCISH5_ICR_CSCR1       0x214   /* PCI cache snoop control register 1                     */
+#define PCISH5_ICR_PDR         0x220   /* Pio data register values                               */
+
+/* These are configs space registers */
+#define PCISH5_ICR_CSR_VID     0x000	/* Vendor id                           */
+#define PCISH5_ICR_CSR_DID     0x002   /* Device id                           */
+#define PCISH5_ICR_CSR_CMD     0x004   /* Command register                    */
+#define PCISH5_ICR_CSR_STATUS  0x006   /* Stautus                             */
+#define PCISH5_ICR_CSR_IBAR0   0x010   /* I/O base address register           */
+#define PCISH5_ICR_CSR_MBAR0   0x014   /* First  Memory base address register */
+#define PCISH5_ICR_CSR_MBAR1   0x018   /* Second Memory base address register */
+
+
+
+/* Base address of registers */
+#define SH5PCI_ICR_BASE (PHYS_PCI_BLOCK + 0x00040000)
+#define SH5PCI_IO_BASE  (PHYS_PCI_BLOCK + 0x00800000)
+/* #define SH5PCI_VCR_BASE (P2SEG_PCICB_BLOCK + P2SEG)    */
+
+/* Register selection macro */
+#define PCISH5_ICR_REG(x)                ( pcicr_virt + (PCISH5_ICR_##x))
+/* #define PCISH5_VCR_REG(x)                ( SH5PCI_VCR_BASE (PCISH5_VCR_##x)) */
+
+/* Write I/O functions */
+#define SH5PCI_WRITE(reg,val)        ctrl_outl((u32)(val),PCISH5_ICR_REG(reg))
+#define SH5PCI_WRITE_SHORT(reg,val)  ctrl_outw((u16)(val),PCISH5_ICR_REG(reg))
+#define SH5PCI_WRITE_BYTE(reg,val)   ctrl_outb((u8)(val),PCISH5_ICR_REG(reg))
+
+/* Read I/O functions */
+#define SH5PCI_READ(reg)             ctrl_inl(PCISH5_ICR_REG(reg))
+#define SH5PCI_READ_SHORT(reg)       ctrl_inw(PCISH5_ICR_REG(reg))
+#define SH5PCI_READ_BYTE(reg)        ctrl_inb(PCISH5_ICR_REG(reg))
+
+/* Set PCI config bits */
+#define SET_CONFIG_BITS(bus,devfn,where)  ((((bus) << 16) | ((devfn) << 8) | ((where) & ~3)) | 0x80000000)
+
+/* Set PCI command register */
+#define CONFIG_CMD(bus, devfn, where)            SET_CONFIG_BITS(bus->number,devfn,where)
+
+/* Size converters */
+#define PCISH5_MEM_SIZCONV(x)		  (((x / 0x40000) - 1) << 18)
+#define PCISH5_IO_SIZCONV(x)		  (((x / 0x40000) - 1) << 18)
+
+
diff --git a/arch/sh64/kernel/pcibios.c b/arch/sh64/kernel/pcibios.c
new file mode 100644
index 0000000..50c61dc
--- /dev/null
+++ b/arch/sh64/kernel/pcibios.c
@@ -0,0 +1,168 @@
+/*
+ * $Id: pcibios.c,v 1.1 2001/08/24 12:38:19 dwmw2 Exp $
+ *
+ * arch/sh/kernel/pcibios.c
+ *
+ * Copyright (C) 2002 STMicroelectronics Limited
+ *   Author : David J. McKay
+ *
+ * Copyright (C) 2004 Richard Curnow, SuperH UK Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ * This is GPL'd.
+ *
+ * Provided here are generic versions of:
+ *	pcibios_update_resource()
+ *	pcibios_align_resource()
+ *	pcibios_enable_device()
+ *	pcibios_set_master()
+ *	pcibios_update_irq()
+ *
+ * These functions are collected here to reduce duplication of common
+ * code amongst the many platform-specific PCI support code files.
+ *
+ * Platform-specific files are expected to provide:
+ *	pcibios_fixup_bus()
+ *	pcibios_init()
+ *	pcibios_setup()
+ *	pcibios_fixup_pbus_ranges()
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+void
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+			struct resource *res, int resource)
+{
+	u32 new, check;
+	int reg;
+
+	new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
+	if (resource < 6) {
+		reg = PCI_BASE_ADDRESS_0 + 4*resource;
+	} else if (resource == PCI_ROM_RESOURCE) {
+		res->flags |= IORESOURCE_ROM_ENABLE;
+		new |= PCI_ROM_ADDRESS_ENABLE;
+		reg = dev->rom_base_reg;
+	} else {
+		/* Somebody might have asked allocation of a non-standard resource */
+		return;
+	}
+
+	pci_write_config_dword(dev, reg, new);
+	pci_read_config_dword(dev, reg, &check);
+	if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
+		printk(KERN_ERR "PCI: Error while updating region "
+		       "%s/%d (%08x != %08x)\n", pci_name(dev), resource,
+		       new, check);
+	}
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ */
+void pcibios_align_resource(void *data, struct resource *res,
+			    unsigned long size, unsigned long align)
+{
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long start = res->start;
+
+		if (start & 0x300) {
+			start = (start + 0x3ff) & ~0x3ff;
+			res->start = start;
+		}
+	}
+}
+
+static void pcibios_enable_bridge(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->subordinate;
+	u16 cmd, old_cmd;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+
+	if (bus->resource[0]->flags & IORESOURCE_IO) {
+		cmd |= PCI_COMMAND_IO;
+	}
+	if ((bus->resource[1]->flags & IORESOURCE_MEM) ||
+	    (bus->resource[2]->flags & IORESOURCE_PREFETCH)) {
+		cmd |= PCI_COMMAND_MEMORY;
+	}
+
+	if (cmd != old_cmd) {
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+
+	printk("PCI bridge %s, command register -> %04x\n",
+		pci_name(dev), cmd);
+
+}
+
+
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	int idx;
+	struct resource *r;
+
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		pcibios_enable_bridge(dev);
+	}
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	for(idx=0; idx<6; idx++) {
+		if (!(mask & (1 << idx)))
+			continue;
+		r = &dev->resource[idx];
+		if (!r->start && r->end) {
+			printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev));
+			return -EINVAL;
+		}
+		if (r->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (r->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+	if (dev->resource[PCI_ROM_RESOURCE].start)
+		cmd |= PCI_COMMAND_MEMORY;
+	if (cmd != old_cmd) {
+		printk(KERN_INFO "PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check and set
+ *  the latency timer as it may not be properly set.
+ */
+unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", pci_name(dev), lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
diff --git a/arch/sh64/kernel/process.c b/arch/sh64/kernel/process.c
new file mode 100644
index 0000000..efde41c
--- /dev/null
+++ b/arch/sh64/kernel/process.c
@@ -0,0 +1,962 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/process.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003  Paul Mundt
+ * Copyright (C) 2003, 2004 Richard Curnow
+ *
+ * Started from SH3/4 version:
+ *   Copyright (C) 1999, 2000  Niibe Yutaka & Kaz Kojima
+ *
+ *   In turn started from i386 version:
+ *     Copyright (C) 1995  Linus Torvalds
+ *
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+/* Temporary flags/tests. All to be removed/undefined. BEGIN */
+#define IDLE_TRACE
+#define VM_SHOW_TABLES
+#define VM_TEST_FAULT
+#define VM_TEST_RTLBMISS
+#define VM_TEST_WTLBMISS
+
+#undef VM_SHOW_TABLES
+#undef IDLE_TRACE
+/* Temporary flags/tests. All to be removed/undefined. END */
+
+#define __KERNEL_SYSCALLS__
+#include <stdarg.h>
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/rwsem.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/unistd.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>		/* includes also <asm/registers.h> */
+#include <asm/mmu_context.h>
+#include <asm/elf.h>
+#include <asm/page.h>
+
+#include <linux/irq.h>
+
+struct task_struct *last_task_used_math = NULL;
+
+#ifdef IDLE_TRACE
+#ifdef VM_SHOW_TABLES
+/* For testing */
+static void print_PTE(long base)
+{
+	int i, skip=0;
+	long long x, y, *p = (long long *) base;
+
+	for (i=0; i< 512; i++, p++){
+		if (*p == 0) {
+			if (!skip) {
+				skip++;
+				printk("(0s) ");
+			}
+		} else {
+			skip=0;
+			x = (*p) >> 32;
+			y = (*p) & 0xffffffff;
+			printk("%08Lx%08Lx ", x, y);
+			if (!((i+1)&0x3)) printk("\n");
+		}
+	}
+}
+
+/* For testing */
+static void print_DIR(long base)
+{
+	int i, skip=0;
+	long *p = (long *) base;
+
+	for (i=0; i< 512; i++, p++){
+		if (*p == 0) {
+			if (!skip) {
+				skip++;
+				printk("(0s) ");
+			}
+		} else {
+			skip=0;
+			printk("%08lx ", *p);
+			if (!((i+1)&0x7)) printk("\n");
+		}
+	}
+}
+
+/* For testing */
+static void print_vmalloc_first_tables(void)
+{
+
+#define PRESENT	0x800	/* Bit 11 */
+
+	/*
+	 * Do it really dirty by looking at raw addresses,
+         * raw offsets, no types. If we used pgtable/pgalloc
+	 * macros/definitions we could hide potential bugs.
+	 *
+	 * Note that pointers are 32-bit for CDC.
+	 */
+	long pgdt, pmdt, ptet;
+
+	pgdt = (long) &swapper_pg_dir;
+	printk("-->PGD (0x%08lx):\n", pgdt);
+	print_DIR(pgdt);
+	printk("\n");
+
+	/* VMALLOC pool is mapped at 0xc0000000, second (pointer) entry in PGD */
+	pgdt += 4;
+	pmdt = (long) (* (long *) pgdt);
+	if (!(pmdt & PRESENT)) {
+		printk("No PMD\n");
+		return;
+	} else pmdt &= 0xfffff000;
+
+	printk("-->PMD (0x%08lx):\n", pmdt);
+	print_DIR(pmdt);
+	printk("\n");
+
+	/* Get the pmdt displacement for 0xc0000000 */
+	pmdt += 2048;
+
+	/* just look at first two address ranges ... */
+        /* ... 0xc0000000 ... */
+	ptet = (long) (* (long *) pmdt);
+	if (!(ptet & PRESENT)) {
+		printk("No PTE0\n");
+		return;
+	} else ptet &= 0xfffff000;
+
+	printk("-->PTE0 (0x%08lx):\n", ptet);
+	print_PTE(ptet);
+	printk("\n");
+
+        /* ... 0xc0001000 ... */
+	ptet += 4;
+	if (!(ptet & PRESENT)) {
+		printk("No PTE1\n");
+		return;
+	} else ptet &= 0xfffff000;
+	printk("-->PTE1 (0x%08lx):\n", ptet);
+	print_PTE(ptet);
+	printk("\n");
+}
+#else
+#define print_vmalloc_first_tables()
+#endif	/* VM_SHOW_TABLES */
+
+static void test_VM(void)
+{
+	void *a, *b, *c;
+
+#ifdef VM_SHOW_TABLES
+	printk("Initial PGD/PMD/PTE\n");
+#endif
+        print_vmalloc_first_tables();
+
+	printk("Allocating 2 bytes\n");
+	a = vmalloc(2);
+        print_vmalloc_first_tables();
+
+	printk("Allocating 4100 bytes\n");
+	b = vmalloc(4100);
+        print_vmalloc_first_tables();
+
+	printk("Allocating 20234 bytes\n");
+	c = vmalloc(20234);
+        print_vmalloc_first_tables();
+
+#ifdef VM_TEST_FAULT
+	/* Here you may want to fault ! */
+
+#ifdef VM_TEST_RTLBMISS
+	printk("Ready to fault upon read.\n");
+	if (* (char *) a) {
+		printk("RTLBMISSed on area a !\n");
+	}
+	printk("RTLBMISSed on area a !\n");
+#endif
+
+#ifdef VM_TEST_WTLBMISS
+	printk("Ready to fault upon write.\n");
+	*((char *) b) = 'L';
+	printk("WTLBMISSed on area b !\n");
+#endif
+
+#endif	/* VM_TEST_FAULT */
+
+	printk("Deallocating the 4100 byte chunk\n");
+	vfree(b);
+        print_vmalloc_first_tables();
+
+	printk("Deallocating the 2 byte chunk\n");
+	vfree(a);
+        print_vmalloc_first_tables();
+
+	printk("Deallocating the last chunk\n");
+	vfree(c);
+        print_vmalloc_first_tables();
+}
+
+extern unsigned long volatile jiffies;
+int once = 0;
+unsigned long old_jiffies;
+int pid = -1, pgid = -1;
+
+void idle_trace(void)
+{
+
+	_syscall0(int, getpid)
+	_syscall1(int, getpgid, int, pid)
+
+	if (!once) {
+        	/* VM allocation/deallocation simple test */
+		test_VM();
+		pid = getpid();
+
+        	printk("Got all through to Idle !!\n");
+        	printk("I'm now going to loop forever ...\n");
+        	printk("Any ! below is a timer tick.\n");
+		printk("Any . below is a getpgid system call from pid = %d.\n", pid);
+
+
+        	old_jiffies = jiffies;
+		once++;
+	}
+
+	if (old_jiffies != jiffies) {
+		old_jiffies = jiffies - old_jiffies;
+		switch (old_jiffies) {
+		case 1:
+			printk("!");
+			break;
+		case 2:
+			printk("!!");
+			break;
+		case 3:
+			printk("!!!");
+			break;
+		case 4:
+			printk("!!!!");
+			break;
+		default:
+			printk("(%d!)", (int) old_jiffies);
+		}
+		old_jiffies = jiffies;
+	}
+	pgid = getpgid(pid);
+	printk(".");
+}
+#else
+#define idle_trace()	do { } while (0)
+#endif	/* IDLE_TRACE */
+
+static int hlt_counter = 1;
+
+#define HARD_IDLE_TIMEOUT (HZ / 3)
+
+void disable_hlt(void)
+{
+	hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+	hlt_counter--;
+}
+
+static int __init nohlt_setup(char *__unused)
+{
+	hlt_counter = 1;
+	return 1;
+}
+
+static int __init hlt_setup(char *__unused)
+{
+	hlt_counter = 0;
+	return 1;
+}
+
+__setup("nohlt", nohlt_setup);
+__setup("hlt", hlt_setup);
+
+static inline void hlt(void)
+{
+	if (hlt_counter)
+		return;
+
+	__asm__ __volatile__ ("sleep" : : : "memory");
+}
+
+/*
+ * The idle loop on a uniprocessor SH..
+ */
+void default_idle(void)
+{
+	/* endless idle loop with no priority at all */
+	while (1) {
+		if (hlt_counter) {
+			while (1)
+				if (need_resched())
+					break;
+		} else {
+			local_irq_disable();
+			while (!need_resched()) {
+				local_irq_enable();
+				idle_trace();
+				hlt();
+				local_irq_disable();
+			}
+			local_irq_enable();
+		}
+		schedule();
+	}
+}
+
+void cpu_idle(void)
+{
+	default_idle();
+}
+
+void machine_restart(char * __unused)
+{
+	extern void phys_stext(void);
+
+	phys_stext();
+}
+
+void machine_halt(void)
+{
+	for (;;);
+}
+
+void machine_power_off(void)
+{
+	extern void enter_deep_standby(void);
+
+	enter_deep_standby();
+}
+
+void show_regs(struct pt_regs * regs)
+{
+	unsigned long long ah, al, bh, bl, ch, cl;
+
+	printk("\n");
+
+	ah = (regs->pc) >> 32;
+	al = (regs->pc) & 0xffffffff;
+	bh = (regs->regs[18]) >> 32;
+	bl = (regs->regs[18]) & 0xffffffff;
+	ch = (regs->regs[15]) >> 32;
+	cl = (regs->regs[15]) & 0xffffffff;
+	printk("PC  : %08Lx%08Lx LINK: %08Lx%08Lx SP  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->sr) >> 32;
+	al = (regs->sr) & 0xffffffff;
+        asm volatile ("getcon   " __TEA ", %0" : "=r" (bh));
+        asm volatile ("getcon   " __TEA ", %0" : "=r" (bl));
+	bh = (bh) >> 32;
+	bl = (bl) & 0xffffffff;
+        asm volatile ("getcon   " __KCR0 ", %0" : "=r" (ch));
+        asm volatile ("getcon   " __KCR0 ", %0" : "=r" (cl));
+	ch = (ch) >> 32;
+	cl = (cl) & 0xffffffff;
+	printk("SR  : %08Lx%08Lx TEA : %08Lx%08Lx KCR0: %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[0]) >> 32;
+	al = (regs->regs[0]) & 0xffffffff;
+	bh = (regs->regs[1]) >> 32;
+	bl = (regs->regs[1]) & 0xffffffff;
+	ch = (regs->regs[2]) >> 32;
+	cl = (regs->regs[2]) & 0xffffffff;
+	printk("R0  : %08Lx%08Lx R1  : %08Lx%08Lx R2  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[3]) >> 32;
+	al = (regs->regs[3]) & 0xffffffff;
+	bh = (regs->regs[4]) >> 32;
+	bl = (regs->regs[4]) & 0xffffffff;
+	ch = (regs->regs[5]) >> 32;
+	cl = (regs->regs[5]) & 0xffffffff;
+	printk("R3  : %08Lx%08Lx R4  : %08Lx%08Lx R5  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[6]) >> 32;
+	al = (regs->regs[6]) & 0xffffffff;
+	bh = (regs->regs[7]) >> 32;
+	bl = (regs->regs[7]) & 0xffffffff;
+	ch = (regs->regs[8]) >> 32;
+	cl = (regs->regs[8]) & 0xffffffff;
+	printk("R6  : %08Lx%08Lx R7  : %08Lx%08Lx R8  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[9]) >> 32;
+	al = (regs->regs[9]) & 0xffffffff;
+	bh = (regs->regs[10]) >> 32;
+	bl = (regs->regs[10]) & 0xffffffff;
+	ch = (regs->regs[11]) >> 32;
+	cl = (regs->regs[11]) & 0xffffffff;
+	printk("R9  : %08Lx%08Lx R10 : %08Lx%08Lx R11 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[12]) >> 32;
+	al = (regs->regs[12]) & 0xffffffff;
+	bh = (regs->regs[13]) >> 32;
+	bl = (regs->regs[13]) & 0xffffffff;
+	ch = (regs->regs[14]) >> 32;
+	cl = (regs->regs[14]) & 0xffffffff;
+	printk("R12 : %08Lx%08Lx R13 : %08Lx%08Lx R14 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[16]) >> 32;
+	al = (regs->regs[16]) & 0xffffffff;
+	bh = (regs->regs[17]) >> 32;
+	bl = (regs->regs[17]) & 0xffffffff;
+	ch = (regs->regs[19]) >> 32;
+	cl = (regs->regs[19]) & 0xffffffff;
+	printk("R16 : %08Lx%08Lx R17 : %08Lx%08Lx R19 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[20]) >> 32;
+	al = (regs->regs[20]) & 0xffffffff;
+	bh = (regs->regs[21]) >> 32;
+	bl = (regs->regs[21]) & 0xffffffff;
+	ch = (regs->regs[22]) >> 32;
+	cl = (regs->regs[22]) & 0xffffffff;
+	printk("R20 : %08Lx%08Lx R21 : %08Lx%08Lx R22 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[23]) >> 32;
+	al = (regs->regs[23]) & 0xffffffff;
+	bh = (regs->regs[24]) >> 32;
+	bl = (regs->regs[24]) & 0xffffffff;
+	ch = (regs->regs[25]) >> 32;
+	cl = (regs->regs[25]) & 0xffffffff;
+	printk("R23 : %08Lx%08Lx R24 : %08Lx%08Lx R25 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[26]) >> 32;
+	al = (regs->regs[26]) & 0xffffffff;
+	bh = (regs->regs[27]) >> 32;
+	bl = (regs->regs[27]) & 0xffffffff;
+	ch = (regs->regs[28]) >> 32;
+	cl = (regs->regs[28]) & 0xffffffff;
+	printk("R26 : %08Lx%08Lx R27 : %08Lx%08Lx R28 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[29]) >> 32;
+	al = (regs->regs[29]) & 0xffffffff;
+	bh = (regs->regs[30]) >> 32;
+	bl = (regs->regs[30]) & 0xffffffff;
+	ch = (regs->regs[31]) >> 32;
+	cl = (regs->regs[31]) & 0xffffffff;
+	printk("R29 : %08Lx%08Lx R30 : %08Lx%08Lx R31 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[32]) >> 32;
+	al = (regs->regs[32]) & 0xffffffff;
+	bh = (regs->regs[33]) >> 32;
+	bl = (regs->regs[33]) & 0xffffffff;
+	ch = (regs->regs[34]) >> 32;
+	cl = (regs->regs[34]) & 0xffffffff;
+	printk("R32 : %08Lx%08Lx R33 : %08Lx%08Lx R34 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[35]) >> 32;
+	al = (regs->regs[35]) & 0xffffffff;
+	bh = (regs->regs[36]) >> 32;
+	bl = (regs->regs[36]) & 0xffffffff;
+	ch = (regs->regs[37]) >> 32;
+	cl = (regs->regs[37]) & 0xffffffff;
+	printk("R35 : %08Lx%08Lx R36 : %08Lx%08Lx R37 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[38]) >> 32;
+	al = (regs->regs[38]) & 0xffffffff;
+	bh = (regs->regs[39]) >> 32;
+	bl = (regs->regs[39]) & 0xffffffff;
+	ch = (regs->regs[40]) >> 32;
+	cl = (regs->regs[40]) & 0xffffffff;
+	printk("R38 : %08Lx%08Lx R39 : %08Lx%08Lx R40 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[41]) >> 32;
+	al = (regs->regs[41]) & 0xffffffff;
+	bh = (regs->regs[42]) >> 32;
+	bl = (regs->regs[42]) & 0xffffffff;
+	ch = (regs->regs[43]) >> 32;
+	cl = (regs->regs[43]) & 0xffffffff;
+	printk("R41 : %08Lx%08Lx R42 : %08Lx%08Lx R43 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[44]) >> 32;
+	al = (regs->regs[44]) & 0xffffffff;
+	bh = (regs->regs[45]) >> 32;
+	bl = (regs->regs[45]) & 0xffffffff;
+	ch = (regs->regs[46]) >> 32;
+	cl = (regs->regs[46]) & 0xffffffff;
+	printk("R44 : %08Lx%08Lx R45 : %08Lx%08Lx R46 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[47]) >> 32;
+	al = (regs->regs[47]) & 0xffffffff;
+	bh = (regs->regs[48]) >> 32;
+	bl = (regs->regs[48]) & 0xffffffff;
+	ch = (regs->regs[49]) >> 32;
+	cl = (regs->regs[49]) & 0xffffffff;
+	printk("R47 : %08Lx%08Lx R48 : %08Lx%08Lx R49 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[50]) >> 32;
+	al = (regs->regs[50]) & 0xffffffff;
+	bh = (regs->regs[51]) >> 32;
+	bl = (regs->regs[51]) & 0xffffffff;
+	ch = (regs->regs[52]) >> 32;
+	cl = (regs->regs[52]) & 0xffffffff;
+	printk("R50 : %08Lx%08Lx R51 : %08Lx%08Lx R52 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[53]) >> 32;
+	al = (regs->regs[53]) & 0xffffffff;
+	bh = (regs->regs[54]) >> 32;
+	bl = (regs->regs[54]) & 0xffffffff;
+	ch = (regs->regs[55]) >> 32;
+	cl = (regs->regs[55]) & 0xffffffff;
+	printk("R53 : %08Lx%08Lx R54 : %08Lx%08Lx R55 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[56]) >> 32;
+	al = (regs->regs[56]) & 0xffffffff;
+	bh = (regs->regs[57]) >> 32;
+	bl = (regs->regs[57]) & 0xffffffff;
+	ch = (regs->regs[58]) >> 32;
+	cl = (regs->regs[58]) & 0xffffffff;
+	printk("R56 : %08Lx%08Lx R57 : %08Lx%08Lx R58 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[59]) >> 32;
+	al = (regs->regs[59]) & 0xffffffff;
+	bh = (regs->regs[60]) >> 32;
+	bl = (regs->regs[60]) & 0xffffffff;
+	ch = (regs->regs[61]) >> 32;
+	cl = (regs->regs[61]) & 0xffffffff;
+	printk("R59 : %08Lx%08Lx R60 : %08Lx%08Lx R61 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[62]) >> 32;
+	al = (regs->regs[62]) & 0xffffffff;
+	bh = (regs->tregs[0]) >> 32;
+	bl = (regs->tregs[0]) & 0xffffffff;
+	ch = (regs->tregs[1]) >> 32;
+	cl = (regs->tregs[1]) & 0xffffffff;
+	printk("R62 : %08Lx%08Lx T0  : %08Lx%08Lx T1  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->tregs[2]) >> 32;
+	al = (regs->tregs[2]) & 0xffffffff;
+	bh = (regs->tregs[3]) >> 32;
+	bl = (regs->tregs[3]) & 0xffffffff;
+	ch = (regs->tregs[4]) >> 32;
+	cl = (regs->tregs[4]) & 0xffffffff;
+	printk("T2  : %08Lx%08Lx T3  : %08Lx%08Lx T4  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->tregs[5]) >> 32;
+	al = (regs->tregs[5]) & 0xffffffff;
+	bh = (regs->tregs[6]) >> 32;
+	bl = (regs->tregs[6]) & 0xffffffff;
+	ch = (regs->tregs[7]) >> 32;
+	cl = (regs->tregs[7]) & 0xffffffff;
+	printk("T5  : %08Lx%08Lx T6  : %08Lx%08Lx T7  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	/*
+	 * If we're in kernel mode, dump the stack too..
+	 */
+	if (!user_mode(regs)) {
+		void show_stack(struct task_struct *tsk, unsigned long *sp);
+		unsigned long sp = regs->regs[15] & 0xffffffff;
+		struct task_struct *tsk = get_current();
+
+		tsk->thread.kregs = regs;
+
+		show_stack(tsk, (unsigned long *)sp);
+	}
+}
+
+struct task_struct * alloc_task_struct(void)
+{
+	/* Get task descriptor pages */
+	return (struct task_struct *)
+		__get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE));
+}
+
+void free_task_struct(struct task_struct *p)
+{
+	free_pages((unsigned long) p, get_order(THREAD_SIZE));
+}
+
+/*
+ * Create a kernel thread
+ */
+
+/*
+ * This is the mechanism for creating a new kernel thread.
+ *
+ * NOTE! Only a kernel-only process(ie the swapper or direct descendants
+ * who haven't done an "execve()") should use this: it will work within
+ * a system call from a "real" process, but the process memory space will
+ * not be free'd until both the parent and the child have exited.
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+	/* A bit less processor dependent than older sh ... */
+	unsigned int reply;
+
+static __inline__ _syscall2(int,clone,unsigned long,flags,unsigned long,newsp)
+static __inline__ _syscall1(int,exit,int,ret)
+
+	reply = clone(flags | CLONE_VM, 0);
+	if (!reply) {
+		/* Child */
+		reply = exit(fn(arg));
+	}
+
+	return reply;
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+	/* See arch/sparc/kernel/process.c for the precedent for doing this -- RPC.
+
+	   The SH-5 FPU save/restore approach relies on last_task_used_math
+	   pointing to a live task_struct.  When another task tries to use the
+	   FPU for the 1st time, the FPUDIS trap handling (see
+	   arch/sh64/kernel/fpu.c) will save the existing FPU state to the
+	   FP regs field within last_task_used_math before re-loading the new
+	   task's FPU state (or initialising it if the FPU has been used
+	   before).  So if last_task_used_math is stale, and its page has already been
+	   re-allocated for another use, the consequences are rather grim. Unless we
+	   null it here, there is no other path through which it would get safely
+	   nulled. */
+
+#ifdef CONFIG_SH_FPU
+	if (last_task_used_math == current) {
+		last_task_used_math = NULL;
+	}
+#endif
+}
+
+void flush_thread(void)
+{
+
+	/* Called by fs/exec.c (flush_old_exec) to remove traces of a
+	 * previously running executable. */
+#ifdef CONFIG_SH_FPU
+	if (last_task_used_math == current) {
+		last_task_used_math = NULL;
+	}
+	/* Force FPU state to be reinitialised after exec */
+	clear_used_math();
+#endif
+
+	/* if we are a kernel thread, about to change to user thread,
+         * update kreg
+         */
+	if(current->thread.kregs==&fake_swapper_regs) {
+          current->thread.kregs =
+             ((struct pt_regs *)(THREAD_SIZE + (unsigned long) current) - 1);
+	  current->thread.uregs = current->thread.kregs;
+	}
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+	/* do nothing */
+}
+
+/* Fill in the fpu structure for a core dump.. */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+#ifdef CONFIG_SH_FPU
+	int fpvalid;
+	struct task_struct *tsk = current;
+
+	fpvalid = !!tsk_used_math(tsk);
+	if (fpvalid) {
+		if (current == last_task_used_math) {
+			grab_fpu();
+			fpsave(&tsk->thread.fpu.hard);
+			release_fpu();
+			last_task_used_math = 0;
+			regs->sr |= SR_FD;
+		}
+
+		memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu));
+	}
+
+	return fpvalid;
+#else
+	return 0; /* Task didn't use the fpu at all. */
+#endif
+}
+
+asmlinkage void ret_from_fork(void);
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+		unsigned long unused,
+		struct task_struct *p, struct pt_regs *regs)
+{
+	struct pt_regs *childregs;
+	unsigned long long se;			/* Sign extension */
+
+#ifdef CONFIG_SH_FPU
+	if(last_task_used_math == current) {
+		grab_fpu();
+		fpsave(&current->thread.fpu.hard);
+		release_fpu();
+		last_task_used_math = NULL;
+		regs->sr |= SR_FD;
+	}
+#endif
+	/* Copy from sh version */
+	childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long) p->thread_info )) - 1;
+
+	*childregs = *regs;
+
+	if (user_mode(regs)) {
+		childregs->regs[15] = usp;
+		p->thread.uregs = childregs;
+	} else {
+		childregs->regs[15] = (unsigned long)p->thread_info + THREAD_SIZE;
+	}
+
+	childregs->regs[9] = 0; /* Set return value for child */
+	childregs->sr |= SR_FD; /* Invalidate FPU flag */
+
+	p->thread.sp = (unsigned long) childregs;
+	p->thread.pc = (unsigned long) ret_from_fork;
+
+	/*
+	 * Sign extend the edited stack.
+         * Note that thread.pc and thread.pc will stay
+	 * 32-bit wide and context switch must take care
+	 * of NEFF sign extension.
+	 */
+
+	se = childregs->regs[15];
+	se = (se & NEFF_SIGN) ? (se | NEFF_MASK) : se;
+	childregs->regs[15] = se;
+
+	return 0;
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+	dump->magic = CMAGIC;
+	dump->start_code = current->mm->start_code;
+	dump->start_data  = current->mm->start_data;
+	dump->start_stack = regs->regs[15] & ~(PAGE_SIZE - 1);
+	dump->u_tsize = (current->mm->end_code - dump->start_code) >> PAGE_SHIFT;
+	dump->u_dsize = (current->mm->brk + (PAGE_SIZE-1) - dump->start_data) >> PAGE_SHIFT;
+	dump->u_ssize = (current->mm->start_stack - dump->start_stack +
+			 PAGE_SIZE - 1) >> PAGE_SHIFT;
+	/* Debug registers will come here. */
+
+	dump->regs = *regs;
+
+	dump->u_fpvalid = dump_fpu(regs, &dump->fpu);
+}
+
+asmlinkage int sys_fork(unsigned long r2, unsigned long r3,
+			unsigned long r4, unsigned long r5,
+			unsigned long r6, unsigned long r7,
+			struct pt_regs *pregs)
+{
+	return do_fork(SIGCHLD, pregs->regs[15], pregs, 0, 0, 0);
+}
+
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7,
+			 struct pt_regs *pregs)
+{
+	if (!newsp)
+		newsp = pregs->regs[15];
+	return do_fork(clone_flags, newsp, pregs, 0, 0, 0);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys_vfork(unsigned long r2, unsigned long r3,
+			 unsigned long r4, unsigned long r5,
+			 unsigned long r6, unsigned long r7,
+			 struct pt_regs *pregs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, pregs->regs[15], pregs, 0, 0, 0);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(char *ufilename, char **uargv,
+			  char **uenvp, unsigned long r5,
+			  unsigned long r6, unsigned long r7,
+			  struct pt_regs *pregs)
+{
+	int error;
+	char *filename;
+
+	lock_kernel();
+	filename = getname((char __user *)ufilename);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+
+	error = do_execve(filename,
+			  (char __user * __user *)uargv,
+			  (char __user * __user *)uenvp,
+			  pregs);
+	if (error == 0) {
+		task_lock(current);
+		current->ptrace &= ~PT_DTRACE;
+		task_unlock(current);
+	}
+	putname(filename);
+out:
+	unlock_kernel();
+	return error;
+}
+
+/*
+ * These bracket the sleeping functions..
+ */
+extern void interruptible_sleep_on(wait_queue_head_t *q);
+
+#define mid_sched	((unsigned long) interruptible_sleep_on)
+
+static int in_sh64_switch_to(unsigned long pc)
+{
+	extern char __sh64_switch_to_end;
+	/* For a sleeping task, the PC is somewhere in the middle of the function,
+	   so we don't have to worry about masking the LSB off */
+	return (pc >= (unsigned long) sh64_switch_to) &&
+	       (pc < (unsigned long) &__sh64_switch_to_end);
+}
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long schedule_fp;
+	unsigned long sh64_switch_to_fp;
+	unsigned long schedule_caller_pc;
+	unsigned long pc;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	/*
+	 * The same comment as on the Alpha applies here, too ...
+	 */
+	pc = thread_saved_pc(p);
+
+#ifdef CONFIG_FRAME_POINTER
+	if (in_sh64_switch_to(pc)) {
+		sh64_switch_to_fp = (long) p->thread.sp;
+		/* r14 is saved at offset 4 in the sh64_switch_to frame */
+		schedule_fp = *(unsigned long *) (long)(sh64_switch_to_fp + 4);
+
+		/* and the caller of 'schedule' is (currently!) saved at offset 24
+		   in the frame of schedule (from disasm) */
+		schedule_caller_pc = *(unsigned long *) (long)(schedule_fp + 24);
+		return schedule_caller_pc;
+	}
+#endif
+	return pc;
+}
+
+/* Provide a /proc/asids file that lists out the
+   ASIDs currently associated with the processes.  (If the DM.PC register is
+   examined through the debug link, this shows ASID + PC.  To make use of this,
+   the PID->ASID relationship needs to be known.  This is primarily for
+   debugging.)
+   */
+
+#if defined(CONFIG_SH64_PROC_ASIDS)
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+
+static int
+asids_proc_info(char *buf, char **start, off_t fpos, int length, int *eof, void *data)
+{
+	int len=0;
+	struct task_struct *p;
+	read_lock(&tasklist_lock);
+	for_each_process(p) {
+		int pid = p->pid;
+		struct mm_struct *mm;
+		if (!pid) continue;
+		mm = p->mm;
+		if (mm) {
+			unsigned long asid, context;
+			context = mm->context;
+			asid = (context & 0xff);
+			len += sprintf(buf+len, "%5d : %02lx\n", pid, asid);
+		} else {
+			len += sprintf(buf+len, "%5d : (none)\n", pid);
+		}
+	}
+	read_unlock(&tasklist_lock);
+	*eof = 1;
+	return len;
+}
+
+static int __init register_proc_asids(void)
+{
+  create_proc_read_entry("asids", 0, NULL, asids_proc_info, NULL);
+  return 0;
+}
+
+__initcall(register_proc_asids);
+#endif
+
diff --git a/arch/sh64/kernel/ptrace.c b/arch/sh64/kernel/ptrace.c
new file mode 100644
index 0000000..800288c
--- /dev/null
+++ b/arch/sh64/kernel/ptrace.c
@@ -0,0 +1,376 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/ptrace.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003  Paul Mundt
+ *
+ * Started from SH3/4 version:
+ *   SuperH version:   Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
+ *
+ *   Original x86 implementation:
+ *	By Ross Biro 1/23/92
+ *	edited by Linus Torvalds
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/mmu_context.h>
+
+/* This mask defines the bits of the SR which the user is not allowed to
+   change, which are everything except S, Q, M, PR, SZ, FR. */
+#define SR_MASK      (0xffff8cfd)
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * This routine will get a word from the user area in the process kernel stack.
+ */
+static inline int get_stack_long(struct task_struct *task, int offset)
+{
+	unsigned char *stack;
+
+	stack = (unsigned char *)(task->thread.uregs);
+	stack += offset;
+	return (*((int *)stack));
+}
+
+static inline unsigned long
+get_fpu_long(struct task_struct *task, unsigned long addr)
+{
+	unsigned long tmp;
+	struct pt_regs *regs;
+	regs = (struct pt_regs*)((unsigned char *)task + THREAD_SIZE) - 1;
+
+	if (!tsk_used_math(task)) {
+		if (addr == offsetof(struct user_fpu_struct, fpscr)) {
+			tmp = FPSCR_INIT;
+		} else {
+			tmp = 0xffffffffUL; /* matches initial value in fpu.c */
+		}
+		return tmp;
+	}
+
+	if (last_task_used_math == task) {
+		grab_fpu();
+		fpsave(&task->thread.fpu.hard);
+		release_fpu();
+		last_task_used_math = 0;
+		regs->sr |= SR_FD;
+	}
+
+	tmp = ((long *)&task->thread.fpu)[addr / sizeof(unsigned long)];
+	return tmp;
+}
+
+/*
+ * This routine will put a word into the user area in the process kernel stack.
+ */
+static inline int put_stack_long(struct task_struct *task, int offset,
+				 unsigned long data)
+{
+	unsigned char *stack;
+
+	stack = (unsigned char *)(task->thread.uregs);
+	stack += offset;
+	*(unsigned long *) stack = data;
+	return 0;
+}
+
+static inline int
+put_fpu_long(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+	struct pt_regs *regs;
+
+	regs = (struct pt_regs*)((unsigned char *)task + THREAD_SIZE) - 1;
+
+	if (!tsk_used_math(task)) {
+		fpinit(&task->thread.fpu.hard);
+		set_stopped_child_used_math(task);
+	} else if (last_task_used_math == task) {
+		grab_fpu();
+		fpsave(&task->thread.fpu.hard);
+		release_fpu();
+		last_task_used_math = 0;
+		regs->sr |= SR_FD;
+	}
+
+	((long *)&task->thread.fpu)[addr / sizeof(unsigned long)] = data;
+	return 0;
+}
+
+asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+{
+	struct task_struct *child;
+	extern void poke_real_address_q(unsigned long long addr, unsigned long long data);
+#define WPC_DBRMODE 0x0d104008
+	static int first_call = 1;
+	int ret;
+
+	lock_kernel();
+
+	if (first_call) {
+		/* Set WPC.DBRMODE to 0.  This makes all debug events get
+		 * delivered through RESVEC, i.e. into the handlers in entry.S.
+		 * (If the kernel was downloaded using a remote gdb, WPC.DBRMODE
+		 * would normally be left set to 1, which makes debug events get
+		 * delivered through DBRVEC, i.e. into the remote gdb's
+		 * handlers.  This prevents ptrace getting them, and confuses
+		 * the remote gdb.) */
+		printk("DBRMODE set to 0 to permit native debugging\n");
+		poke_real_address_q(WPC_DBRMODE, 0);
+		first_call = 0;
+	}
+
+	ret = -EPERM;
+	if (request == PTRACE_TRACEME) {
+		/* are we already being traced? */
+		if (current->ptrace & PT_PTRACED)
+			goto out;
+		/* set the ptrace bit in the process flags. */
+		current->ptrace |= PT_PTRACED;
+		ret = 0;
+		goto out;
+	}
+	ret = -ESRCH;
+	read_lock(&tasklist_lock);
+	child = find_task_by_pid(pid);
+	if (child)
+		get_task_struct(child);
+	read_unlock(&tasklist_lock);
+	if (!child)
+		goto out;
+
+	ret = -EPERM;
+	if (pid == 1)		/* you may not mess with init */
+		goto out_tsk;
+
+	if (request == PTRACE_ATTACH) {
+		ret = ptrace_attach(child);
+			goto out_tsk;
+		}
+
+	ret = ptrace_check_attach(child, request == PTRACE_KILL);
+	if (ret < 0)
+		goto out_tsk;
+
+	switch (request) {
+	/* when I and D space are separate, these will need to be fixed. */
+	case PTRACE_PEEKTEXT: /* read word at location addr. */
+	case PTRACE_PEEKDATA: {
+		unsigned long tmp;
+		int copied;
+
+		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+		ret = -EIO;
+		if (copied != sizeof(tmp))
+			break;
+		ret = put_user(tmp,(unsigned long *) data);
+		break;
+	}
+
+	/* read the word at location addr in the USER area. */
+	case PTRACE_PEEKUSR: {
+		unsigned long tmp;
+
+		ret = -EIO;
+		if ((addr & 3) || addr < 0)
+			break;
+
+		if (addr < sizeof(struct pt_regs))
+			tmp = get_stack_long(child, addr);
+		else if ((addr >= offsetof(struct user, fpu)) &&
+			 (addr <  offsetof(struct user, u_fpvalid))) {
+			tmp = get_fpu_long(child, addr - offsetof(struct user, fpu));
+		} else if (addr == offsetof(struct user, u_fpvalid)) {
+			tmp = !!tsk_used_math(child);
+		} else {
+			break;
+		}
+		ret = put_user(tmp, (unsigned long *)data);
+		break;
+	}
+
+	/* when I and D space are separate, this will have to be fixed. */
+	case PTRACE_POKETEXT: /* write the word at location addr. */
+	case PTRACE_POKEDATA:
+		ret = 0;
+		if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
+			break;
+		ret = -EIO;
+		break;
+
+	case PTRACE_POKEUSR:
+                /* write the word at location addr in the USER area. We must
+                   disallow any changes to certain SR bits or u_fpvalid, since
+                   this could crash the kernel or result in a security
+                   loophole. */
+		ret = -EIO;
+		if ((addr & 3) || addr < 0)
+			break;
+
+		if (addr < sizeof(struct pt_regs)) {
+			/* Ignore change of top 32 bits of SR */
+			if (addr == offsetof (struct pt_regs, sr)+4)
+			{
+				ret = 0;
+				break;
+			}
+			/* If lower 32 bits of SR, ignore non-user bits */
+			if (addr == offsetof (struct pt_regs, sr))
+			{
+				long cursr = get_stack_long(child, addr);
+				data &= ~(SR_MASK);
+				data |= (cursr & SR_MASK);
+			}
+			ret = put_stack_long(child, addr, data);
+		}
+		else if ((addr >= offsetof(struct user, fpu)) &&
+			 (addr <  offsetof(struct user, u_fpvalid))) {
+			ret = put_fpu_long(child, addr - offsetof(struct user, fpu), data);
+		}
+		break;
+
+	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+	case PTRACE_CONT: { /* restart after signal. */
+		ret = -EIO;
+		if ((unsigned long) data > _NSIG)
+			break;
+		if (request == PTRACE_SYSCALL)
+			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		else
+			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		child->exit_code = data;
+		wake_up_process(child);
+		ret = 0;
+		break;
+	}
+
+/*
+ * make the child exit.  Best I can do is send it a sigkill.
+ * perhaps it should be put in the status that it wants to
+ * exit.
+ */
+	case PTRACE_KILL: {
+		ret = 0;
+		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+			break;
+		child->exit_code = SIGKILL;
+		wake_up_process(child);
+		break;
+	}
+
+	case PTRACE_SINGLESTEP: {  /* set the trap flag. */
+		struct pt_regs *regs;
+
+		ret = -EIO;
+		if ((unsigned long) data > _NSIG)
+			break;
+		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+		if ((child->ptrace & PT_DTRACE) == 0) {
+			/* Spurious delayed TF traps may occur */
+			child->ptrace |= PT_DTRACE;
+		}
+
+		regs = child->thread.uregs;
+
+		regs->sr |= SR_SSTEP;	/* auto-resetting upon exception */
+
+		child->exit_code = data;
+		/* give it a chance to run. */
+		wake_up_process(child);
+		ret = 0;
+		break;
+	}
+
+	case PTRACE_DETACH: /* detach a process that was attached. */
+		ret = ptrace_detach(child, data);
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+out_tsk:
+	put_task_struct(child);
+out:
+	unlock_kernel();
+	return ret;
+}
+
+asmlinkage void syscall_trace(void)
+{
+	struct task_struct *tsk = current;
+
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+	if (!(tsk->ptrace & PT_PTRACED))
+		return;
+
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+				 ? 0x80 : 0));
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (tsk->exit_code) {
+		send_sig(tsk->exit_code, tsk, 1);
+		tsk->exit_code = 0;
+	}
+}
+
+/* Called with interrupts disabled */
+asmlinkage void do_single_step(unsigned long long vec, struct pt_regs *regs)
+{
+	/* This is called after a single step exception (DEBUGSS).
+	   There is no need to change the PC, as it is a post-execution
+	   exception, as entry.S does not do anything to the PC for DEBUGSS.
+	   We need to clear the Single Step setting in SR to avoid
+	   continually stepping. */
+	local_irq_enable();
+	regs->sr &= ~SR_SSTEP;
+	force_sig(SIGTRAP, current);
+}
+
+/* Called with interrupts disabled */
+asmlinkage void do_software_break_point(unsigned long long vec,
+					struct pt_regs *regs)
+{
+	/* We need to forward step the PC, to counteract the backstep done
+	   in signal.c. */
+	local_irq_enable();
+	force_sig(SIGTRAP, current);
+	regs->pc += 4;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+        /* nothing to do.. */
+}
diff --git a/arch/sh64/kernel/semaphore.c b/arch/sh64/kernel/semaphore.c
new file mode 100644
index 0000000..72c1653
--- /dev/null
+++ b/arch/sh64/kernel/semaphore.c
@@ -0,0 +1,140 @@
+/*
+ * Just taken from alpha implementation.
+ * This can't work well, perhaps.
+ */
+/*
+ *  Generic semaphore code. Buyer beware. Do your own
+ * specific changes in <asm/semaphore-helper.h>
+ */
+
+#include <linux/errno.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/init.h>
+#include <asm/semaphore.h>
+#include <asm/semaphore-helper.h>
+
+spinlock_t semaphore_wake_lock;
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
+ * When __up() is called, the count was negative before
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit.  ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore.  The others will go back
+ * to sleep.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+void __up(struct semaphore *sem)
+{
+	wake_one_more(sem);
+	wake_up(&sem->wait);
+}
+
+/*
+ * Perform the "down" function.  Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible.  This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return.  If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+
+#define DOWN_VAR				\
+	struct task_struct *tsk = current;	\
+	wait_queue_t wait;			\
+	init_waitqueue_entry(&wait, tsk);
+
+#define DOWN_HEAD(task_state)						\
+									\
+									\
+	tsk->state = (task_state);					\
+	add_wait_queue(&sem->wait, &wait);				\
+									\
+	/*								\
+	 * Ok, we're set up.  sem->count is known to be less than zero	\
+	 * so we must wait.						\
+	 *								\
+	 * We can let go the lock for purposes of waiting.		\
+	 * We re-acquire it after awaking so as to protect		\
+	 * all semaphore operations.					\
+	 *								\
+	 * If "up()" is called before we call waking_non_zero() then	\
+	 * we will catch it right away.  If it is called later then	\
+	 * we will have to go through a wakeup cycle to catch it.	\
+	 *								\
+	 * Multiple waiters contend for the semaphore lock to see	\
+	 * who gets to gate through and who has to wait some more.	\
+	 */								\
+	for (;;) {
+
+#define DOWN_TAIL(task_state)			\
+		tsk->state = (task_state);	\
+	}					\
+	tsk->state = TASK_RUNNING;		\
+	remove_wait_queue(&sem->wait, &wait);
+
+void __sched __down(struct semaphore * sem)
+{
+	DOWN_VAR
+	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
+	if (waking_non_zero(sem))
+		break;
+	schedule();
+	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+	int ret = 0;
+	DOWN_VAR
+	DOWN_HEAD(TASK_INTERRUPTIBLE)
+
+	ret = waking_non_zero_interruptible(sem, tsk);
+	if (ret)
+	{
+		if (ret == 1)
+			/* ret != 0 only if we get interrupted -arca */
+			ret = 0;
+		break;
+	}
+	schedule();
+	DOWN_TAIL(TASK_INTERRUPTIBLE)
+	return ret;
+}
+
+int __down_trylock(struct semaphore * sem)
+{
+	return waking_non_zero_trylock(sem);
+}
diff --git a/arch/sh64/kernel/setup.c b/arch/sh64/kernel/setup.c
new file mode 100644
index 0000000..c7a7b816
--- /dev/null
+++ b/arch/sh64/kernel/setup.c
@@ -0,0 +1,385 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/setup.c
+ *
+ * sh64 Arch Support
+ *
+ * This file handles the architecture-dependent parts of initialization
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003, 2004  Paul Mundt
+ *
+ * benedict.gaster@superh.com:   2nd May 2002
+ *    Modified to use the empty_zero_page to pass command line arguments.
+ *
+ * benedict.gaster@superh.com:	 3rd May 2002
+ *    Added support for ramdisk, removing statically linked romfs at the same time.
+ *
+ * lethal@linux-sh.org:          15th May 2003
+ *    Added generic procfs cpuinfo reporting. Make boards just export their name.
+ *
+ * lethal@linux-sh.org:          25th May 2003
+ *    Added generic get_cpu_subtype() for subtype reporting from cpu_data->type.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/blkdev.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+#include <linux/initrd.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/platform.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_VT
+#include <linux/console.h>
+#endif
+
+struct screen_info screen_info;
+
+#ifdef CONFIG_BLK_DEV_RAM
+extern int rd_doload;		/* 1 = load ramdisk, 0 = don't load */
+extern int rd_prompt;		/* 1 = prompt for ramdisk, 0 = don't prompt */
+extern int rd_image_start;	/* starting block # of image */
+#endif
+
+extern int root_mountflags;
+extern char *get_system_type(void);
+extern void platform_setup(void);
+extern void platform_monitor(void);
+extern void platform_reserve(void);
+extern int sh64_cache_init(void);
+extern int sh64_tlb_init(void);
+
+#define RAMDISK_IMAGE_START_MASK	0x07FF
+#define RAMDISK_PROMPT_FLAG		0x8000
+#define RAMDISK_LOAD_FLAG		0x4000
+
+static char command_line[COMMAND_LINE_SIZE] = { 0, };
+unsigned long long memory_start = CONFIG_MEMORY_START;
+unsigned long long memory_end = CONFIG_MEMORY_START + (CONFIG_MEMORY_SIZE_IN_MB * 1024 * 1024);
+
+struct sh_cpuinfo boot_cpu_data;
+
+static inline void parse_mem_cmdline (char ** cmdline_p)
+{
+        char c = ' ', *to = command_line, *from = COMMAND_LINE;
+	int len = 0;
+
+	/* Save unparsed command line copy for /proc/cmdline */
+	memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
+	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+
+	for (;;) {
+	  /*
+	   * "mem=XXX[kKmM]" defines a size of memory.
+	   */
+	        if (c == ' ' && !memcmp(from, "mem=", 4)) {
+		      if (to != command_line)
+			to--;
+		      {
+			unsigned long mem_size;
+
+			mem_size = memparse(from+4, &from);
+			memory_end = memory_start + mem_size;
+		      }
+		}
+		c = *(from++);
+		if (!c)
+		  break;
+		if (COMMAND_LINE_SIZE <= ++len)
+		  break;
+		*(to++) = c;
+	}
+	*to = '\0';
+
+	*cmdline_p = command_line;
+}
+
+static void __init sh64_cpu_type_detect(void)
+{
+	extern unsigned long long peek_real_address_q(unsigned long long addr);
+	unsigned long long cir;
+	/* Do peeks in real mode to avoid having to set up a mapping for the
+	   WPC registers.  On SH5-101 cut2, such a mapping would be exposed to
+	   an address translation erratum which would make it hard to set up
+	   correctly. */
+	cir = peek_real_address_q(0x0d000008);
+
+	if ((cir & 0xffff) == 0x5103) {
+		boot_cpu_data.type = CPU_SH5_103;
+	} else if (((cir >> 32) & 0xffff) == 0x51e2) {
+		/* CPU.VCR aliased at CIR address on SH5-101 */
+		boot_cpu_data.type = CPU_SH5_101;
+	} else {
+		boot_cpu_data.type = CPU_SH_NONE;
+	}
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	unsigned long bootmap_size, i;
+	unsigned long first_pfn, start_pfn, last_pfn, pages;
+
+#ifdef CONFIG_EARLY_PRINTK
+	extern void enable_early_printk(void);
+
+	/*
+	 * Setup Early SCIF console
+	 */
+	enable_early_printk();
+#endif
+
+	/*
+	 * Setup TLB mappings
+	 */
+	sh64_tlb_init();
+
+	/*
+	 * Caches are already initialized by the time we get here, so we just
+	 * fill in cpu_data info for the caches.
+	 */
+	sh64_cache_init();
+
+	platform_setup();
+	platform_monitor();
+
+	sh64_cpu_type_detect();
+
+	ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+
+#ifdef CONFIG_BLK_DEV_RAM
+	rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+	rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+	rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+
+	if (!MOUNT_ROOT_RDONLY)
+		root_mountflags &= ~MS_RDONLY;
+	init_mm.start_code = (unsigned long) _text;
+	init_mm.end_code = (unsigned long) _etext;
+	init_mm.end_data = (unsigned long) _edata;
+	init_mm.brk = (unsigned long) _end;
+
+	code_resource.start = __pa(_text);
+	code_resource.end = __pa(_etext)-1;
+	data_resource.start = __pa(_etext);
+	data_resource.end = __pa(_edata)-1;
+
+	parse_mem_cmdline(cmdline_p);
+
+	/*
+	 * Find the lowest and highest page frame numbers we have available
+	 */
+	first_pfn = PFN_DOWN(memory_start);
+	last_pfn = PFN_DOWN(memory_end);
+	pages = last_pfn - first_pfn;
+
+	/*
+	 * Partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	start_pfn = PFN_UP(__pa(_end));
+
+	/*
+	 * Find a proper area for the bootmem bitmap. After this
+	 * bootstrap step all allocations (until the page allocator
+	 * is intact) must be done via bootmem_alloc().
+	 */
+	bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn,
+					 first_pfn,
+					 last_pfn);
+        /*
+         * Round it up.
+         */
+        bootmap_size = PFN_PHYS(PFN_UP(bootmap_size));
+
+	/*
+	 * Register fully available RAM pages with the bootmem allocator.
+	 */
+	free_bootmem_node(NODE_DATA(0), PFN_PHYS(first_pfn), PFN_PHYS(pages));
+
+	/*
+	 * Reserve all kernel sections + bootmem bitmap + a guard page.
+	 */
+	reserve_bootmem_node(NODE_DATA(0), PFN_PHYS(first_pfn),
+		        (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE) - PFN_PHYS(first_pfn));
+
+	/*
+	 * Reserve platform dependent sections
+	 */
+	platform_reserve();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (LOADER_TYPE && INITRD_START) {
+		if (INITRD_START + INITRD_SIZE <= (PFN_PHYS(last_pfn))) {
+		        reserve_bootmem_node(NODE_DATA(0), INITRD_START + __MEMORY_START, INITRD_SIZE);
+
+			initrd_start =
+			  (long) INITRD_START ? INITRD_START + PAGE_OFFSET +  __MEMORY_START : 0;
+
+			initrd_end = initrd_start + INITRD_SIZE;
+		} else {
+			printk("initrd extends beyond end of memory "
+			    "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+				    (long) INITRD_START + INITRD_SIZE,
+				    PFN_PHYS(last_pfn));
+			initrd_start = 0;
+		}
+	}
+#endif
+
+	/*
+	 * Claim all RAM, ROM, and I/O resources.
+	 */
+
+	/* Kernel RAM */
+	request_resource(&iomem_resource, &code_resource);
+	request_resource(&iomem_resource, &data_resource);
+
+	/* Other KRAM space */
+	for (i = 0; i < STANDARD_KRAM_RESOURCES - 2; i++)
+		request_resource(&iomem_resource,
+				 &platform_parms.kram_res_p[i]);
+
+	/* XRAM space */
+	for (i = 0; i < STANDARD_XRAM_RESOURCES; i++)
+		request_resource(&iomem_resource,
+				 &platform_parms.xram_res_p[i]);
+
+	/* ROM space */
+	for (i = 0; i < STANDARD_ROM_RESOURCES; i++)
+		request_resource(&iomem_resource,
+				 &platform_parms.rom_res_p[i]);
+
+	/* I/O space */
+	for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+		request_resource(&ioport_resource,
+				 &platform_parms.io_res_p[i]);
+
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+
+	printk("Hardware FPU: %s\n", fpu_in_use ? "enabled" : "disabled");
+
+	paging_init();
+}
+
+void __xchg_called_with_bad_pointer(void)
+{
+	printk(KERN_EMERG "xchg() called with bad pointer !\n");
+}
+
+static struct cpu cpu[1];
+
+static int __init topology_init(void)
+{
+	return register_cpu(cpu, 0, NULL);
+}
+
+subsys_initcall(topology_init);
+
+/*
+ *	Get CPU information
+ */
+static const char *cpu_name[] = {
+	[CPU_SH5_101]	= "SH5-101",
+	[CPU_SH5_103]	= "SH5-103",
+	[CPU_SH_NONE]	= "Unknown",
+};
+
+const char *get_cpu_subtype(void)
+{
+	return cpu_name[boot_cpu_data.type];
+}
+
+#ifdef CONFIG_PROC_FS
+static int show_cpuinfo(struct seq_file *m,void *v)
+{
+	unsigned int cpu = smp_processor_id();
+
+	if (!cpu)
+		seq_printf(m, "machine\t\t: %s\n", get_system_type());
+
+	seq_printf(m, "processor\t: %d\n", cpu);
+	seq_printf(m, "cpu family\t: SH-5\n");
+	seq_printf(m, "cpu type\t: %s\n", get_cpu_subtype());
+
+	seq_printf(m, "icache size\t: %dK-bytes\n",
+		   (boot_cpu_data.icache.ways *
+		    boot_cpu_data.icache.sets *
+		    boot_cpu_data.icache.linesz) >> 10);
+	seq_printf(m, "dcache size\t: %dK-bytes\n",
+		   (boot_cpu_data.dcache.ways *
+		    boot_cpu_data.dcache.sets *
+		    boot_cpu_data.dcache.linesz) >> 10);
+	seq_printf(m, "itlb entries\t: %d\n", boot_cpu_data.itlb.entries);
+	seq_printf(m, "dtlb entries\t: %d\n", boot_cpu_data.dtlb.entries);
+
+#define PRINT_CLOCK(name, value) \
+	seq_printf(m, name " clock\t: %d.%02dMHz\n", \
+		     ((value) / 1000000), ((value) % 1000000)/10000)
+
+	PRINT_CLOCK("cpu", boot_cpu_data.cpu_clock);
+	PRINT_CLOCK("bus", boot_cpu_data.bus_clock);
+	PRINT_CLOCK("module", boot_cpu_data.module_clock);
+
+        seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+		     (loops_per_jiffy*HZ+2500)/500000,
+		     ((loops_per_jiffy*HZ+2500)/5000) % 100);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return (void*)(*pos == 0);
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return NULL;
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
+#endif /* CONFIG_PROC_FS */
diff --git a/arch/sh64/kernel/sh_ksyms.c b/arch/sh64/kernel/sh_ksyms.c
new file mode 100644
index 0000000..0b5497d
--- /dev/null
+++ b/arch/sh64/kernel/sh_ksyms.c
@@ -0,0 +1,89 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/sh_ksyms.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/user.h>
+#include <linux/elfcore.h>
+#include <linux/sched.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/tty.h>
+
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+
+extern void dump_thread(struct pt_regs *, struct user *);
+extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
+
+#if 0
+/* Not yet - there's no declaration of drive_info anywhere. */
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+#endif
+
+/* platform dependent support */
+EXPORT_SYMBOL(dump_thread);
+EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(kernel_thread);
+
+/* Networking helper routines. */
+EXPORT_SYMBOL(csum_partial_copy);
+
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strstr);
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__down_trylock);
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__put_user_asm_l);
+EXPORT_SYMBOL(__get_user_asm_l);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memscan);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strlen);
+
+EXPORT_SYMBOL(flush_dcache_page);
+
+/* For ext3 */
+EXPORT_SYMBOL(sh64_page_clear);
+
+/* Ugh.  These come in from libgcc.a at link time. */
+
+extern void __sdivsi3(void);
+extern void __muldi3(void);
+extern void __udivsi3(void);
+extern char __div_table;
+EXPORT_SYMBOL(__sdivsi3);
+EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__div_table);
+
+
diff --git a/arch/sh64/kernel/signal.c b/arch/sh64/kernel/signal.c
new file mode 100644
index 0000000..45ad102
--- /dev/null
+++ b/arch/sh64/kernel/signal.c
@@ -0,0 +1,727 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/signal.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003  Paul Mundt
+ * Copyright (C) 2004  Richard Curnow
+ *
+ * Started from sh version.
+ *
+ */
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+
+#define REG_RET 9
+#define REG_ARG1 2
+#define REG_ARG2 3
+#define REG_ARG3 4
+#define REG_SP 15
+#define REG_PR 18
+#define REF_REG_RET regs->regs[REG_RET]
+#define REF_REG_SP regs->regs[REG_SP]
+#define DEREF_REG_PR regs->regs[REG_PR]
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+
+asmlinkage int
+sys_sigsuspend(old_sigset_t mask,
+	       unsigned long r3, unsigned long r4, unsigned long r5,
+	       unsigned long r6, unsigned long r7,
+	       struct pt_regs * regs)
+{
+	sigset_t saveset;
+
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sighand->siglock);
+	saveset = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	REF_REG_RET = -EINTR;
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		regs->pc += 4;    /* because sys_sigreturn decrements the pc */
+		if (do_signal(regs, &saveset)) {
+			/* pc now points at signal handler. Need to decrement
+			   it because entry.S will increment it. */
+			regs->pc -= 4;
+			return -EINTR;
+		}
+	}
+}
+
+asmlinkage int
+sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize,
+	          unsigned long r4, unsigned long r5, unsigned long r6,
+	          unsigned long r7,
+	          struct pt_regs * regs)
+{
+	sigset_t saveset, newset;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (copy_from_user(&newset, unewset, sizeof(newset)))
+		return -EFAULT;
+	sigdelsetmask(&newset, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	saveset = current->blocked;
+	current->blocked = newset;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	REF_REG_RET = -EINTR;
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		regs->pc += 4;    /* because sys_sigreturn decrements the pc */
+		if (do_signal(regs, &saveset)) {
+			/* pc now points at signal handler. Need to decrement
+			   it because entry.S will increment it. */
+			regs->pc -= 4;
+			return -EINTR;
+		}
+	}
+}
+
+asmlinkage int
+sys_sigaction(int sig, const struct old_sigaction __user *act,
+	      struct old_sigaction __user *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
+
+asmlinkage int
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+	        unsigned long r4, unsigned long r5, unsigned long r6,
+	        unsigned long r7,
+	        struct pt_regs * regs)
+{
+	return do_sigaltstack(uss, uoss, REF_REG_SP);
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+struct sigframe
+{
+	struct sigcontext sc;
+	unsigned long extramask[_NSIG_WORDS-1];
+	long long retcode[2];
+};
+
+struct rt_sigframe
+{
+	struct siginfo __user *pinfo;
+	void *puc;
+	struct siginfo info;
+	struct ucontext uc;
+	long long retcode[2];
+};
+
+#ifdef CONFIG_SH_FPU
+static inline int
+restore_sigcontext_fpu(struct pt_regs *regs, struct sigcontext __user *sc)
+{
+	int err = 0;
+	int fpvalid;
+
+	err |= __get_user (fpvalid, &sc->sc_fpvalid);
+	conditional_used_math(fpvalid);
+	if (! fpvalid)
+		return err;
+
+	if (current == last_task_used_math) {
+		last_task_used_math = NULL;
+		regs->sr |= SR_FD;
+	}
+
+	err |= __copy_from_user(&current->thread.fpu.hard, &sc->sc_fpregs[0],
+				(sizeof(long long) * 32) + (sizeof(int) * 1));
+
+	return err;
+}
+
+static inline int
+setup_sigcontext_fpu(struct pt_regs *regs, struct sigcontext __user *sc)
+{
+	int err = 0;
+	int fpvalid;
+
+	fpvalid = !!used_math();
+	err |= __put_user(fpvalid, &sc->sc_fpvalid);
+	if (! fpvalid)
+		return err;
+
+	if (current == last_task_used_math) {
+		grab_fpu();
+		fpsave(&current->thread.fpu.hard);
+		release_fpu();
+		last_task_used_math = NULL;
+		regs->sr |= SR_FD;
+	}
+
+	err |= __copy_to_user(&sc->sc_fpregs[0], &current->thread.fpu.hard,
+			      (sizeof(long long) * 32) + (sizeof(int) * 1));
+	clear_used_math();
+
+	return err;
+}
+#else
+static inline int
+restore_sigcontext_fpu(struct pt_regs *regs, struct sigcontext __user *sc)
+{}
+static inline int
+setup_sigcontext_fpu(struct pt_regs *regs, struct sigcontext __user *sc)
+{}
+#endif
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, long long *r2_p)
+{
+	unsigned int err = 0;
+        unsigned long long current_sr, new_sr;
+#define SR_MASK 0xffff8cfd
+
+#define COPY(x)		err |= __get_user(regs->x, &sc->sc_##x)
+
+	COPY(regs[0]);	COPY(regs[1]);	COPY(regs[2]);	COPY(regs[3]);
+	COPY(regs[4]);	COPY(regs[5]);	COPY(regs[6]);	COPY(regs[7]);
+	COPY(regs[8]);	COPY(regs[9]);  COPY(regs[10]);	COPY(regs[11]);
+	COPY(regs[12]);	COPY(regs[13]);	COPY(regs[14]);	COPY(regs[15]);
+	COPY(regs[16]);	COPY(regs[17]);	COPY(regs[18]);	COPY(regs[19]);
+	COPY(regs[20]);	COPY(regs[21]);	COPY(regs[22]);	COPY(regs[23]);
+	COPY(regs[24]);	COPY(regs[25]);	COPY(regs[26]);	COPY(regs[27]);
+	COPY(regs[28]);	COPY(regs[29]);	COPY(regs[30]);	COPY(regs[31]);
+	COPY(regs[32]);	COPY(regs[33]);	COPY(regs[34]);	COPY(regs[35]);
+	COPY(regs[36]);	COPY(regs[37]);	COPY(regs[38]);	COPY(regs[39]);
+	COPY(regs[40]);	COPY(regs[41]);	COPY(regs[42]);	COPY(regs[43]);
+	COPY(regs[44]);	COPY(regs[45]);	COPY(regs[46]);	COPY(regs[47]);
+	COPY(regs[48]);	COPY(regs[49]);	COPY(regs[50]);	COPY(regs[51]);
+	COPY(regs[52]);	COPY(regs[53]);	COPY(regs[54]);	COPY(regs[55]);
+	COPY(regs[56]);	COPY(regs[57]);	COPY(regs[58]);	COPY(regs[59]);
+	COPY(regs[60]);	COPY(regs[61]);	COPY(regs[62]);
+	COPY(tregs[0]);	COPY(tregs[1]);	COPY(tregs[2]);	COPY(tregs[3]);
+	COPY(tregs[4]);	COPY(tregs[5]);	COPY(tregs[6]);	COPY(tregs[7]);
+
+        /* Prevent the signal handler manipulating SR in a way that can
+           crash the kernel. i.e. only allow S, Q, M, PR, SZ, FR to be
+           modified */
+        current_sr = regs->sr;
+        err |= __get_user(new_sr, &sc->sc_sr);
+        regs->sr &= SR_MASK;
+        regs->sr |= (new_sr & ~SR_MASK);
+
+	COPY(pc);
+
+#undef COPY
+
+	/* Must do this last in case it sets regs->sr.fd (i.e. after rest of sr
+	 * has been restored above.) */
+	err |= restore_sigcontext_fpu(regs, sc);
+
+	regs->syscall_nr = -1;		/* disable syscall checks */
+	err |= __get_user(*r2_p, &sc->sc_regs[REG_RET]);
+	return err;
+}
+
+asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3,
+				   unsigned long r4, unsigned long r5,
+				   unsigned long r6, unsigned long r7,
+				   struct pt_regs * regs)
+{
+	struct sigframe __user *frame = (struct sigframe __user *) (long) REF_REG_SP;
+	sigset_t set;
+	long long ret;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+
+	if (__get_user(set.sig[0], &frame->sc.oldmask)
+	    || (_NSIG_WORDS > 1
+		&& __copy_from_user(&set.sig[1], &frame->extramask,
+				    sizeof(frame->extramask))))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->sc, &ret))
+		goto badframe;
+	regs->pc -= 4;
+
+	return (int) ret;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3,
+				unsigned long r4, unsigned long r5,
+				unsigned long r6, unsigned long r7,
+				struct pt_regs * regs)
+{
+	struct rt_sigframe __user *frame = (struct rt_sigframe __user *) (long) REF_REG_SP;
+	sigset_t set;
+	stack_t __user st;
+	long long ret;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ret))
+		goto badframe;
+	regs->pc -= 4;
+
+	if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
+		goto badframe;
+	/* It is more difficult to avoid calling this function than to
+	   call it and ignore errors.  */
+	do_sigaltstack(&st, NULL, REF_REG_SP);
+
+	return (int) ret;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/*
+ * Set up a signal frame.
+ */
+
+static int
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+		 unsigned long mask)
+{
+	int err = 0;
+
+	/* Do this first, otherwise is this sets sr->fd, that value isn't preserved. */
+	err |= setup_sigcontext_fpu(regs, sc);
+
+#define COPY(x)		err |= __put_user(regs->x, &sc->sc_##x)
+
+	COPY(regs[0]);	COPY(regs[1]);	COPY(regs[2]);	COPY(regs[3]);
+	COPY(regs[4]);	COPY(regs[5]);	COPY(regs[6]);	COPY(regs[7]);
+	COPY(regs[8]);	COPY(regs[9]);	COPY(regs[10]);	COPY(regs[11]);
+	COPY(regs[12]);	COPY(regs[13]);	COPY(regs[14]);	COPY(regs[15]);
+	COPY(regs[16]);	COPY(regs[17]);	COPY(regs[18]);	COPY(regs[19]);
+	COPY(regs[20]);	COPY(regs[21]);	COPY(regs[22]);	COPY(regs[23]);
+	COPY(regs[24]);	COPY(regs[25]);	COPY(regs[26]);	COPY(regs[27]);
+	COPY(regs[28]);	COPY(regs[29]);	COPY(regs[30]);	COPY(regs[31]);
+	COPY(regs[32]);	COPY(regs[33]);	COPY(regs[34]);	COPY(regs[35]);
+	COPY(regs[36]);	COPY(regs[37]);	COPY(regs[38]);	COPY(regs[39]);
+	COPY(regs[40]);	COPY(regs[41]);	COPY(regs[42]);	COPY(regs[43]);
+	COPY(regs[44]);	COPY(regs[45]);	COPY(regs[46]);	COPY(regs[47]);
+	COPY(regs[48]);	COPY(regs[49]);	COPY(regs[50]);	COPY(regs[51]);
+	COPY(regs[52]);	COPY(regs[53]);	COPY(regs[54]);	COPY(regs[55]);
+	COPY(regs[56]);	COPY(regs[57]);	COPY(regs[58]);	COPY(regs[59]);
+	COPY(regs[60]);	COPY(regs[61]);	COPY(regs[62]);
+	COPY(tregs[0]);	COPY(tregs[1]);	COPY(tregs[2]);	COPY(tregs[3]);
+	COPY(tregs[4]);	COPY(tregs[5]);	COPY(tregs[6]);	COPY(tregs[7]);
+	COPY(sr);	COPY(pc);
+
+#undef COPY
+
+	err |= __put_user(mask, &sc->oldmask);
+
+	return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size)
+{
+	if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! on_sig_stack(sp))
+		sp = current->sas_ss_sp + current->sas_ss_size;
+
+	return (void __user *)((sp - frame_size) & -8ul);
+}
+
+void sa_default_restorer(void);		/* See comments below */
+void sa_default_rt_restorer(void);	/* See comments below */
+
+static void setup_frame(int sig, struct k_sigaction *ka,
+			sigset_t *set, struct pt_regs *regs)
+{
+	struct sigframe __user *frame;
+	int err = 0;
+	int signal;
+
+	frame = get_sigframe(ka, regs->regs[REG_SP], sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	signal = current_thread_info()->exec_domain
+		&& current_thread_info()->exec_domain->signal_invmap
+		&& sig < 32
+		? current_thread_info()->exec_domain->signal_invmap[sig]
+		: sig;
+
+	err |= setup_sigcontext(&frame->sc, regs, set->sig[0]);
+
+	/* Give up earlier as i386, in case */
+	if (err)
+		goto give_sigsegv;
+
+	if (_NSIG_WORDS > 1) {
+		err |= __copy_to_user(frame->extramask, &set->sig[1],
+				      sizeof(frame->extramask)); }
+
+	/* Give up earlier as i386, in case */
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		DEREF_REG_PR = (unsigned long) ka->sa.sa_restorer | 0x1;
+
+		/*
+		 * On SH5 all edited pointers are subject to NEFF
+		 */
+		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
+        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+	} else {
+		/*
+		 * Different approach on SH5.
+	         * . Endianness independent asm code gets placed in entry.S .
+		 *   This is limited to four ASM instructions corresponding
+		 *   to two long longs in size.
+		 * . err checking is done on the else branch only
+		 * . flush_icache_range() is called upon __put_user() only
+		 * . all edited pointers are subject to NEFF
+		 * . being code, linker turns ShMedia bit on, always
+		 *   dereference index -1.
+		 */
+		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
+		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
+        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+
+		if (__copy_to_user(frame->retcode,
+			(unsigned long long)sa_default_restorer & (~1), 16) != 0)
+			goto give_sigsegv;
+
+		/* Cohere the trampoline with the I-cache. */
+		flush_cache_sigtramp(DEREF_REG_PR-1, DEREF_REG_PR-1+16);
+	}
+
+	/*
+	 * Set up registers for signal handler.
+	 * All edited pointers are subject to NEFF.
+	 */
+	regs->regs[REG_SP] = (unsigned long) frame;
+	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
+        		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
+
+        /* FIXME:
+           The glibc profiling support for SH-5 needs to be passed a sigcontext
+           so it can retrieve the PC.  At some point during 2003 the glibc
+           support was changed to receive the sigcontext through the 2nd
+           argument, but there are still versions of libc.so in use that use
+           the 3rd argument.  Until libc.so is stabilised, pass the sigcontext
+           through both 2nd and 3rd arguments.
+        */
+
+	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->sc;
+	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->sc;
+
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+	regs->pc = (regs->pc & NEFF_SIGN) ? (regs->pc | NEFF_MASK) : regs->pc;
+
+	set_fs(USER_DS);
+
+#if DEBUG_SIG
+	/* Broken %016Lx */
+	printk("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
+		signal,
+		current->comm, current->pid, frame,
+		regs->pc >> 32, regs->pc & 0xffffffff,
+		DEREF_REG_PR >> 32, DEREF_REG_PR & 0xffffffff);
+#endif
+
+	return;
+
+give_sigsegv:
+	force_sigsegv(sig, current);
+}
+
+static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			   sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	int err = 0;
+	int signal;
+
+	frame = get_sigframe(ka, regs->regs[REG_SP], sizeof(*frame));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	signal = current_thread_info()->exec_domain
+		&& current_thread_info()->exec_domain->signal_invmap
+		&& sig < 32
+		? current_thread_info()->exec_domain->signal_invmap[sig]
+		: sig;
+
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, info);
+
+	/* Give up earlier as i386, in case */
+	if (err)
+		goto give_sigsegv;
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(0, &frame->uc.uc_link);
+	err |= __put_user((void *)current->sas_ss_sp,
+			  &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(regs->regs[REG_SP]),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= setup_sigcontext(&frame->uc.uc_mcontext,
+			        regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	/* Give up earlier as i386, in case */
+	if (err)
+		goto give_sigsegv;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		DEREF_REG_PR = (unsigned long) ka->sa.sa_restorer | 0x1;
+
+		/*
+		 * On SH5 all edited pointers are subject to NEFF
+		 */
+		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
+        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+	} else {
+		/*
+		 * Different approach on SH5.
+	         * . Endianness independent asm code gets placed in entry.S .
+		 *   This is limited to four ASM instructions corresponding
+		 *   to two long longs in size.
+		 * . err checking is done on the else branch only
+		 * . flush_icache_range() is called upon __put_user() only
+		 * . all edited pointers are subject to NEFF
+		 * . being code, linker turns ShMedia bit on, always
+		 *   dereference index -1.
+		 */
+
+		DEREF_REG_PR = (unsigned long) frame->retcode | 0x01;
+		DEREF_REG_PR = (DEREF_REG_PR & NEFF_SIGN) ?
+        		 	(DEREF_REG_PR | NEFF_MASK) : DEREF_REG_PR;
+
+		if (__copy_to_user(frame->retcode,
+			(unsigned long long)sa_default_rt_restorer & (~1), 16) != 0)
+			goto give_sigsegv;
+
+		flush_icache_range(DEREF_REG_PR-1, DEREF_REG_PR-1+15);
+	}
+
+	/*
+	 * Set up registers for signal handler.
+	 * All edited pointers are subject to NEFF.
+	 */
+	regs->regs[REG_SP] = (unsigned long) frame;
+	regs->regs[REG_SP] = (regs->regs[REG_SP] & NEFF_SIGN) ?
+        		 (regs->regs[REG_SP] | NEFF_MASK) : regs->regs[REG_SP];
+	regs->regs[REG_ARG1] = signal; /* Arg for signal handler */
+	regs->regs[REG_ARG2] = (unsigned long long)(unsigned long)(signed long)&frame->info;
+	regs->regs[REG_ARG3] = (unsigned long long)(unsigned long)(signed long)&frame->uc.uc_mcontext;
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+	regs->pc = (regs->pc & NEFF_SIGN) ? (regs->pc | NEFF_MASK) : regs->pc;
+
+	set_fs(USER_DS);
+
+#if DEBUG_SIG
+	/* Broken %016Lx */
+	printk("SIG deliver (#%d,%s:%d): sp=%p pc=%08Lx%08Lx link=%08Lx%08Lx\n",
+		signal,
+		current->comm, current->pid, frame,
+		regs->pc >> 32, regs->pc & 0xffffffff,
+		DEREF_REG_PR >> 32, DEREF_REG_PR & 0xffffffff);
+#endif
+
+	return;
+
+give_sigsegv:
+	force_sigsegv(sig, current);
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+
+static void
+handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
+		sigset_t *oldset, struct pt_regs * regs)
+{
+	/* Are we from a system call? */
+	if (regs->syscall_nr >= 0) {
+		/* If so, check system call restarting.. */
+		switch (regs->regs[REG_RET]) {
+			case -ERESTARTNOHAND:
+				regs->regs[REG_RET] = -EINTR;
+				break;
+
+			case -ERESTARTSYS:
+				if (!(ka->sa.sa_flags & SA_RESTART)) {
+					regs->regs[REG_RET] = -EINTR;
+					break;
+				}
+			/* fallthrough */
+			case -ERESTARTNOINTR:
+				/* Decode syscall # */
+				regs->regs[REG_RET] = regs->syscall_nr;
+				regs->pc -= 4;
+		}
+	}
+
+	/* Set up the stack frame */
+	if (ka->sa.sa_flags & SA_SIGINFO)
+		setup_rt_frame(sig, ka, info, oldset, regs);
+	else
+		setup_frame(sig, ka, oldset, regs);
+
+	if (!(ka->sa.sa_flags & SA_NODEFER)) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+		sigaddset(&current->blocked,sig);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+int do_signal(struct pt_regs *regs, sigset_t *oldset)
+{
+	siginfo_t info;
+	int signr;
+	struct k_sigaction ka;
+
+	/*
+	 * We want the common case to go fast, which
+	 * is why we may in certain cases get here from
+	 * kernel mode. Just return without doing anything
+	 * if so.
+	 */
+	if (!user_mode(regs))
+		return 1;
+
+	if (try_to_freeze(0))
+		goto no_signal;
+
+	if (!oldset)
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, 0);
+
+	if (signr > 0) {
+		/* Whee!  Actually deliver the signal.  */
+		handle_signal(signr, &info, &ka, oldset, regs);
+		return 1;
+	}
+
+no_signal:
+	/* Did we come from a system call? */
+	if (regs->syscall_nr >= 0) {
+		/* Restart the system call - no handlers present */
+		if (regs->regs[REG_RET] == -ERESTARTNOHAND ||
+		    regs->regs[REG_RET] == -ERESTARTSYS ||
+		    regs->regs[REG_RET] == -ERESTARTNOINTR) {
+			/* Decode Syscall # */
+			regs->regs[REG_RET] = regs->syscall_nr;
+			regs->pc -= 4;
+		}
+	}
+	return 0;
+}
diff --git a/arch/sh64/kernel/switchto.S b/arch/sh64/kernel/switchto.S
new file mode 100644
index 0000000..45b2d90
--- /dev/null
+++ b/arch/sh64/kernel/switchto.S
@@ -0,0 +1,198 @@
+/*
+ * arch/sh64/kernel/switchto.S
+ *
+ * sh64 context switch
+ *
+ * Copyright (C) 2004  Richard Curnow
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+*/
+
+	.section .text..SHmedia32,"ax"
+	.little
+
+	.balign 32
+
+	.type sh64_switch_to,@function
+	.global sh64_switch_to
+	.global __sh64_switch_to_end
+sh64_switch_to:
+
+/* Incoming args
+   r2 - prev
+   r3 - &prev->thread
+   r4 - next
+   r5 - &next->thread
+
+   Outgoing results
+   r2 - last (=prev) : this just stays in r2 throughout
+
+   Want to create a full (struct pt_regs) on the stack to allow backtracing
+   functions to work.  However, we only need to populate the callee-save
+   register slots in this structure; since we're a function our ancestors must
+   have themselves preserved all caller saved state in the stack.  This saves
+   some wasted effort since we won't need to look at the values.
+
+   In particular, all caller-save registers are immediately available for
+   scratch use.
+
+*/
+
+#define FRAME_SIZE (76*8 + 8)
+
+	movi	FRAME_SIZE, r0
+	sub.l	r15, r0, r15
+	! Do normal-style register save to support backtrace
+
+	st.l	r15,   0, r18	! save link reg
+	st.l	r15,   4, r14	! save fp
+	add.l	r15, r63, r14	! setup frame pointer
+
+	! hopefully this looks normal to the backtrace now.
+
+	addi.l	r15,   8, r1    ! base of pt_regs
+	addi.l	r1,   24, r0    ! base of pt_regs.regs
+	addi.l	r0, (63*8), r8	! base of pt_regs.trregs
+
+	/* Note : to be fixed?
+	   struct pt_regs is really designed for holding the state on entry
+	   to an exception, i.e. pc,sr,regs etc.  However, for the context
+	   switch state, some of this is not required.  But the unwinder takes
+	   struct pt_regs * as an arg so we have to build this structure
+	   to allow unwinding switched tasks in show_state() */
+
+	st.q	r0, ( 9*8), r9
+	st.q	r0, (10*8), r10
+	st.q	r0, (11*8), r11
+	st.q	r0, (12*8), r12
+	st.q	r0, (13*8), r13
+	st.q	r0, (14*8), r14 ! for unwind, want to look as though we took a trap at
+	! the point where the process is left in suspended animation, i.e. current
+	! fp here, not the saved one.
+	st.q	r0, (16*8), r16
+
+	st.q	r0, (24*8), r24
+	st.q	r0, (25*8), r25
+	st.q	r0, (26*8), r26
+	st.q	r0, (27*8), r27
+	st.q	r0, (28*8), r28
+	st.q	r0, (29*8), r29
+	st.q	r0, (30*8), r30
+	st.q	r0, (31*8), r31
+	st.q	r0, (32*8), r32
+	st.q	r0, (33*8), r33
+	st.q	r0, (34*8), r34
+	st.q	r0, (35*8), r35
+
+	st.q	r0, (44*8), r44
+	st.q	r0, (45*8), r45
+	st.q	r0, (46*8), r46
+	st.q	r0, (47*8), r47
+	st.q	r0, (48*8), r48
+	st.q	r0, (49*8), r49
+	st.q	r0, (50*8), r50
+	st.q	r0, (51*8), r51
+	st.q	r0, (52*8), r52
+	st.q	r0, (53*8), r53
+	st.q	r0, (54*8), r54
+	st.q	r0, (55*8), r55
+	st.q	r0, (56*8), r56
+	st.q	r0, (57*8), r57
+	st.q	r0, (58*8), r58
+	st.q	r0, (59*8), r59
+
+	! do this early as pta->gettr has no pipeline forwarding (=> 5 cycle latency)
+	! Use a local label to avoid creating a symbol that will confuse the !
+	! backtrace
+	pta	.Lsave_pc, tr0
+
+	gettr	tr5, r45
+	gettr	tr6, r46
+	gettr	tr7, r47
+	st.q	r8, (5*8), r45
+	st.q	r8, (6*8), r46
+	st.q	r8, (7*8), r47
+
+	! Now switch context
+	gettr	tr0, r9
+	st.l	r3, 0, r15	! prev->thread.sp
+	st.l	r3, 8, r1	! prev->thread.kregs
+	st.l	r3, 4, r9	! prev->thread.pc
+	st.q	r1, 0, r9	! save prev->thread.pc into pt_regs->pc
+
+	! Load PC for next task (init value or save_pc later)
+	ld.l	r5, 4, r18	! next->thread.pc
+	! Switch stacks
+	ld.l	r5, 0, r15	! next->thread.sp
+	ptabs	r18, tr0
+
+	! Update current
+	ld.l	r4, 4, r9	! next->thread_info (2nd element of next task_struct)
+	putcon	r9, kcr0	! current = next->thread_info
+
+	! go to save_pc for a reschedule, or the initial thread.pc for a new process
+	blink	tr0, r63
+
+	! Restore (when we come back to a previously saved task)
+.Lsave_pc:
+	addi.l	r15, 32, r0	! r0 = next's regs
+	addi.l	r0, (63*8), r8	! r8 = next's tr_regs
+
+	ld.q	r8, (5*8), r45
+	ld.q	r8, (6*8), r46
+	ld.q	r8, (7*8), r47
+	ptabs	r45, tr5
+	ptabs	r46, tr6
+	ptabs	r47, tr7
+
+	ld.q	r0, ( 9*8), r9
+	ld.q	r0, (10*8), r10
+	ld.q	r0, (11*8), r11
+	ld.q	r0, (12*8), r12
+	ld.q	r0, (13*8), r13
+	ld.q	r0, (14*8), r14
+	ld.q	r0, (16*8), r16
+
+	ld.q	r0, (24*8), r24
+	ld.q	r0, (25*8), r25
+	ld.q	r0, (26*8), r26
+	ld.q	r0, (27*8), r27
+	ld.q	r0, (28*8), r28
+	ld.q	r0, (29*8), r29
+	ld.q	r0, (30*8), r30
+	ld.q	r0, (31*8), r31
+	ld.q	r0, (32*8), r32
+	ld.q	r0, (33*8), r33
+	ld.q	r0, (34*8), r34
+	ld.q	r0, (35*8), r35
+
+	ld.q	r0, (44*8), r44
+	ld.q	r0, (45*8), r45
+	ld.q	r0, (46*8), r46
+	ld.q	r0, (47*8), r47
+	ld.q	r0, (48*8), r48
+	ld.q	r0, (49*8), r49
+	ld.q	r0, (50*8), r50
+	ld.q	r0, (51*8), r51
+	ld.q	r0, (52*8), r52
+	ld.q	r0, (53*8), r53
+	ld.q	r0, (54*8), r54
+	ld.q	r0, (55*8), r55
+	ld.q	r0, (56*8), r56
+	ld.q	r0, (57*8), r57
+	ld.q	r0, (58*8), r58
+	ld.q	r0, (59*8), r59
+
+	! epilogue
+	ld.l	r15, 0, r18
+	ld.l	r15, 4, r14
+	ptabs	r18, tr0
+	movi	FRAME_SIZE, r0
+	add	r15, r0, r15
+	blink	tr0, r63
+__sh64_switch_to_end:
+.LFE1:
+	.size	sh64_switch_to,.LFE1-sh64_switch_to
+
diff --git a/arch/sh64/kernel/sys_sh64.c b/arch/sh64/kernel/sys_sh64.c
new file mode 100644
index 0000000..4546845
--- /dev/null
+++ b/arch/sh64/kernel/sys_sh64.c
@@ -0,0 +1,300 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/sys_sh64.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/SH5
+ * platform.
+ *
+ * Mostly taken from i386 version.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/syscalls.h>
+#include <asm/uaccess.h>
+#include <asm/ipc.h>
+#include <asm/ptrace.h>
+
+#define REG_3	3
+
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way Unix traditionally does this, though.
+ */
+#ifdef NEW_PIPE_IMPLEMENTATION
+asmlinkage int sys_pipe(unsigned long * fildes,
+			unsigned long   dummy_r3,
+			unsigned long   dummy_r4,
+			unsigned long   dummy_r5,
+			unsigned long   dummy_r6,
+			unsigned long   dummy_r7,
+			struct pt_regs * regs)	   /* r8 = pt_regs  forced by entry.S */
+{
+	int fd[2];
+	int ret;
+
+	ret = do_pipe(fd);
+	if (ret == 0)
+		/*
+		 ***********************************************************************
+		 *   To avoid the copy_to_user we prefer to break the ABIs convention, *
+		 *   packing the valid pair of file IDs into a single register (r3);   *
+		 *   while r2 is the return code as defined by the sh5-ABIs.	       *
+		 *   BE CAREFUL: pipe stub, into glibc, must be aware of this solution *
+		 ***********************************************************************
+
+#ifdef __LITTLE_ENDIAN__
+		regs->regs[REG_3] = (((unsigned long long) fd[1]) << 32) | ((unsigned long long) fd[0]);
+#else
+		regs->regs[REG_3] = (((unsigned long long) fd[0]) << 32) | ((unsigned long long) fd[1]);
+#endif
+
+		*/
+	       /* although not very clever this is endianess independent */
+		regs->regs[REG_3] = (unsigned long long) *((unsigned long long *) fd);
+
+	return ret;
+}
+
+#else
+asmlinkage int sys_pipe(unsigned long * fildes)
+{
+        int fd[2];
+        int error;
+
+        error = do_pipe(fd);
+        if (!error) {
+                if (copy_to_user(fildes, fd, 2*sizeof(int)))
+                        error = -EFAULT;
+        }
+        return error;
+}
+
+#endif
+
+/*
+ * To avoid cache alias, we map the shard page with same color.
+ */
+#define COLOUR_ALIGN(addr)	(((addr)+SHMLBA-1)&~(SHMLBA-1))
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+	unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct vm_area_struct *vma;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) && (addr & (SHMLBA - 1)))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+	if (!addr)
+		addr = TASK_UNMAPPED_BASE;
+
+	if (flags & MAP_PRIVATE)
+		addr = PAGE_ALIGN(addr);
+	else
+		addr = COLOUR_ALIGN(addr);
+
+	for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr)
+			return -ENOMEM;
+		if (!vma || addr + len <= vma->vm_start)
+			return addr;
+		addr = vma->vm_end;
+		if (!(flags & MAP_PRIVATE))
+			addr = COLOUR_ALIGN(addr);
+	}
+}
+
+/* common code for old and new mmaps */
+static inline long do_mmap2(
+	unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	unsigned long fd, unsigned long pgoff)
+{
+	int error = -EBADF;
+	struct file * file = NULL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			goto out;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+	up_write(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+out:
+	return error;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	unsigned long fd, unsigned long pgoff)
+{
+	return do_mmap2(addr, len, prot, flags, fd, pgoff);
+}
+
+asmlinkage int old_mmap(unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	int fd, unsigned long off)
+{
+	if (off & ~PAGE_MASK)
+		return -EINVAL;
+	return do_mmap2(addr, len, prot, flags, fd, off>>PAGE_SHIFT);
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+ * This is really horribly ugly.
+ */
+asmlinkage int sys_ipc(uint call, int first, int second,
+		       int third, void __user *ptr, long fifth)
+{
+	int version, ret;
+
+	version = call >> 16; /* hack for backward compatibility */
+	call &= 0xffff;
+
+	if (call <= SEMCTL)
+		switch (call) {
+		case SEMOP:
+			return sys_semtimedop(first, (struct sembuf __user *)ptr,
+					      second, NULL);
+		case SEMTIMEDOP:
+			return sys_semtimedop(first, (struct sembuf __user *)ptr,
+					      second,
+					      (const struct timespec __user *)fifth);
+		case SEMGET:
+			return sys_semget (first, second, third);
+		case SEMCTL: {
+			union semun fourth;
+			if (!ptr)
+				return -EINVAL;
+			if (get_user(fourth.__pad, (void * __user *) ptr))
+				return -EFAULT;
+			return sys_semctl (first, second, third, fourth);
+			}
+		default:
+			return -EINVAL;
+		}
+
+	if (call <= MSGCTL)
+		switch (call) {
+		case MSGSND:
+			return sys_msgsnd (first, (struct msgbuf __user *) ptr,
+					  second, third);
+		case MSGRCV:
+			switch (version) {
+			case 0: {
+				struct ipc_kludge tmp;
+				if (!ptr)
+					return -EINVAL;
+
+				if (copy_from_user(&tmp,
+						   (struct ipc_kludge __user *) ptr,
+						   sizeof (tmp)))
+					return -EFAULT;
+				return sys_msgrcv (first, tmp.msgp, second,
+						   tmp.msgtyp, third);
+				}
+			default:
+				return sys_msgrcv (first,
+						   (struct msgbuf __user *) ptr,
+						   second, fifth, third);
+			}
+		case MSGGET:
+			return sys_msgget ((key_t) first, second);
+		case MSGCTL:
+			return sys_msgctl (first, second,
+					   (struct msqid_ds __user *) ptr);
+		default:
+			return -EINVAL;
+		}
+	if (call <= SHMCTL)
+		switch (call) {
+		case SHMAT:
+			switch (version) {
+			default: {
+				ulong raddr;
+				ret = do_shmat (first, (char __user *) ptr,
+						 second, &raddr);
+				if (ret)
+					return ret;
+				return put_user (raddr, (ulong __user *) third);
+			}
+			case 1:	/* iBCS2 emulator entry point */
+				if (!segment_eq(get_fs(), get_ds()))
+					return -EINVAL;
+				return do_shmat (first, (char __user *) ptr,
+						  second, (ulong *) third);
+			}
+		case SHMDT:
+			return sys_shmdt ((char __user *)ptr);
+		case SHMGET:
+			return sys_shmget (first, second, third);
+		case SHMCTL:
+			return sys_shmctl (first, second,
+					   (struct shmid_ds __user *) ptr);
+		default:
+			return -EINVAL;
+		}
+
+	return -EINVAL;
+}
+
+asmlinkage int sys_uname(struct old_utsname * name)
+{
+	int err;
+	if (!name)
+		return -EFAULT;
+	down_read(&uts_sem);
+	err=copy_to_user(name, &system_utsname, sizeof (*name));
+	up_read(&uts_sem);
+	return err?-EFAULT:0;
+}
+
+/* Copy from mips version */
+asmlinkage long sys_shmatcall(int shmid, char __user *shmaddr,
+		int shmflg)
+{
+	unsigned long raddr;
+	int err;
+
+	err = do_shmat(shmid, shmaddr, shmflg, &raddr);
+	if (err)
+		return err;
+
+	err = raddr;
+	return err;
+}
diff --git a/arch/sh64/kernel/syscalls.S b/arch/sh64/kernel/syscalls.S
new file mode 100644
index 0000000..8ed417d
--- /dev/null
+++ b/arch/sh64/kernel/syscalls.S
@@ -0,0 +1,345 @@
+/*
+ * arch/sh64/kernel/syscalls.S
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2004  Paul Mundt
+ * Copyright (C) 2003, 2004 Richard Curnow
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/sys.h>
+
+	.section .data, "aw"
+	.balign 32
+
+/*
+ * System calls jump table
+ */
+	.globl  sys_call_table
+sys_call_table:
+	.long sys_ni_syscall		/* 0  -  old "setup()" system call  */
+	.long sys_exit
+	.long sys_fork
+	.long sys_read
+	.long sys_write
+	.long sys_open			/* 5 */
+	.long sys_close
+	.long sys_waitpid
+	.long sys_creat
+	.long sys_link
+	.long sys_unlink		/* 10 */
+	.long sys_execve
+	.long sys_chdir
+	.long sys_time
+	.long sys_mknod
+	.long sys_chmod			/* 15 */
+	.long sys_lchown16
+	.long sys_ni_syscall	/* old break syscall holder */
+	.long sys_stat
+	.long sys_lseek
+	.long sys_getpid		/* 20 */
+	.long sys_mount
+	.long sys_oldumount
+	.long sys_setuid16
+	.long sys_getuid16
+	.long sys_stime			/* 25 */
+	.long sys_ptrace
+	.long sys_alarm
+	.long sys_fstat
+	.long sys_pause
+	.long sys_utime			/* 30 */
+	.long sys_ni_syscall	/* old stty syscall holder */
+	.long sys_ni_syscall	/* old gtty syscall holder */
+	.long sys_access
+	.long sys_nice
+	.long sys_ni_syscall		/* 35 */ /* old ftime syscall holder */
+	.long sys_sync
+	.long sys_kill
+	.long sys_rename
+	.long sys_mkdir
+	.long sys_rmdir			/* 40 */
+	.long sys_dup
+	.long sys_pipe
+	.long sys_times
+	.long sys_ni_syscall	/* old prof syscall holder */
+	.long sys_brk			/* 45 */
+	.long sys_setgid16
+	.long sys_getgid16
+	.long sys_signal
+	.long sys_geteuid16
+	.long sys_getegid16		/* 50 */
+	.long sys_acct
+	.long sys_umount		/* recycled never used phys( */
+	.long sys_ni_syscall	/* old lock syscall holder */
+	.long sys_ioctl
+	.long sys_fcntl			/* 55 */
+	.long sys_ni_syscall	/* old mpx syscall holder */
+	.long sys_setpgid
+	.long sys_ni_syscall	/* old ulimit syscall holder */
+	.long sys_ni_syscall	/* sys_olduname */
+	.long sys_umask			/* 60 */
+	.long sys_chroot
+	.long sys_ustat
+	.long sys_dup2
+	.long sys_getppid
+	.long sys_getpgrp		/* 65 */
+	.long sys_setsid
+	.long sys_sigaction
+	.long sys_sgetmask
+	.long sys_ssetmask
+	.long sys_setreuid16		/* 70 */
+	.long sys_setregid16
+	.long sys_sigsuspend
+	.long sys_sigpending
+	.long sys_sethostname
+	.long sys_setrlimit		/* 75 */
+	.long sys_old_getrlimit
+	.long sys_getrusage
+	.long sys_gettimeofday
+	.long sys_settimeofday
+	.long sys_getgroups16		/* 80 */
+	.long sys_setgroups16
+	.long sys_ni_syscall	/* sys_oldselect */
+	.long sys_symlink
+	.long sys_lstat
+	.long sys_readlink		/* 85 */
+	.long sys_uselib
+	.long sys_swapon
+	.long sys_reboot
+	.long old_readdir
+	.long old_mmap			/* 90 */
+	.long sys_munmap
+	.long sys_truncate
+	.long sys_ftruncate
+	.long sys_fchmod
+	.long sys_fchown16		/* 95 */
+	.long sys_getpriority
+	.long sys_setpriority
+	.long sys_ni_syscall	/* old profil syscall holder */
+	.long sys_statfs
+	.long sys_fstatfs		/* 100 */
+	.long sys_ni_syscall	/* ioperm */
+	.long sys_socketcall	/* Obsolete implementation of socket syscall */
+	.long sys_syslog
+	.long sys_setitimer
+	.long sys_getitimer		/* 105 */
+	.long sys_newstat
+	.long sys_newlstat
+	.long sys_newfstat
+	.long sys_uname
+	.long sys_ni_syscall		/* 110 */ /* iopl */
+	.long sys_vhangup
+	.long sys_ni_syscall	/* idle */
+	.long sys_ni_syscall	/* vm86old */
+	.long sys_wait4
+	.long sys_swapoff		/* 115 */
+	.long sys_sysinfo
+	.long sys_ipc		/* Obsolete ipc syscall implementation */
+	.long sys_fsync
+	.long sys_sigreturn
+	.long sys_clone			/* 120 */
+	.long sys_setdomainname
+	.long sys_newuname
+	.long sys_ni_syscall	/* sys_modify_ldt */
+	.long sys_adjtimex
+	.long sys_mprotect		/* 125 */
+	.long sys_sigprocmask
+	.long sys_ni_syscall		/* old "create_module" */
+	.long sys_init_module
+	.long sys_delete_module
+	.long sys_ni_syscall		/* 130: old "get_kernel_syms" */
+	.long sys_quotactl
+	.long sys_getpgid
+	.long sys_fchdir
+	.long sys_bdflush
+	.long sys_sysfs			/* 135 */
+	.long sys_personality
+	.long sys_ni_syscall	/* for afs_syscall */
+	.long sys_setfsuid16
+	.long sys_setfsgid16
+	.long sys_llseek		/* 140 */
+	.long sys_getdents
+	.long sys_select
+	.long sys_flock
+	.long sys_msync
+	.long sys_readv			/* 145 */
+	.long sys_writev
+	.long sys_getsid
+	.long sys_fdatasync
+	.long sys_sysctl
+	.long sys_mlock			/* 150 */
+	.long sys_munlock
+	.long sys_mlockall
+	.long sys_munlockall
+	.long sys_sched_setparam
+	.long sys_sched_getparam	/* 155 */
+	.long sys_sched_setscheduler
+	.long sys_sched_getscheduler
+	.long sys_sched_yield
+	.long sys_sched_get_priority_max
+	.long sys_sched_get_priority_min  /* 160 */
+	.long sys_sched_rr_get_interval
+	.long sys_nanosleep
+	.long sys_mremap
+	.long sys_setresuid16
+	.long sys_getresuid16		/* 165 */
+	.long sys_ni_syscall	/* vm86 */
+	.long sys_ni_syscall	/* old "query_module" */
+	.long sys_poll
+	.long sys_nfsservctl
+	.long sys_setresgid16		/* 170 */
+	.long sys_getresgid16
+	.long sys_prctl
+	.long sys_rt_sigreturn
+	.long sys_rt_sigaction
+	.long sys_rt_sigprocmask	/* 175 */
+	.long sys_rt_sigpending
+	.long sys_rt_sigtimedwait
+	.long sys_rt_sigqueueinfo
+	.long sys_rt_sigsuspend
+	.long sys_pread64		/* 180 */
+	.long sys_pwrite64
+	.long sys_chown16
+	.long sys_getcwd
+	.long sys_capget
+	.long sys_capset		/* 185 */
+	.long sys_sigaltstack
+	.long sys_sendfile
+	.long sys_ni_syscall	/* streams1 */
+	.long sys_ni_syscall	/* streams2 */
+	.long sys_vfork			/* 190 */
+	.long sys_getrlimit
+	.long sys_mmap2
+	.long sys_truncate64
+	.long sys_ftruncate64
+	.long sys_stat64		/* 195 */
+	.long sys_lstat64
+	.long sys_fstat64
+	.long sys_lchown
+	.long sys_getuid
+	.long sys_getgid		/* 200 */
+	.long sys_geteuid
+	.long sys_getegid
+	.long sys_setreuid
+	.long sys_setregid
+	.long sys_getgroups		/* 205 */
+	.long sys_setgroups
+	.long sys_fchown
+	.long sys_setresuid
+	.long sys_getresuid
+	.long sys_setresgid		/* 210 */
+	.long sys_getresgid
+	.long sys_chown
+	.long sys_setuid
+	.long sys_setgid
+	.long sys_setfsuid		/* 215 */
+	.long sys_setfsgid
+	.long sys_pivot_root
+	.long sys_mincore
+	.long sys_madvise
+	/* Broken-out socket family (maintain backwards compatibility in syscall
+	   numbering with 2.4) */
+	.long sys_socket		/* 220 */
+	.long sys_bind
+	.long sys_connect
+	.long sys_listen
+	.long sys_accept
+	.long sys_getsockname		/* 225 */
+	.long sys_getpeername
+	.long sys_socketpair
+	.long sys_send
+	.long sys_sendto
+	.long sys_recv			/* 230*/
+	.long sys_recvfrom
+	.long sys_shutdown
+	.long sys_setsockopt
+	.long sys_getsockopt
+	.long sys_sendmsg		/* 235 */
+	.long sys_recvmsg
+	/* Broken-out IPC family (maintain backwards compatibility in syscall
+	   numbering with 2.4) */
+	.long sys_semop
+	.long sys_semget
+	.long sys_semctl
+	.long sys_msgsnd		/* 240 */
+	.long sys_msgrcv
+	.long sys_msgget
+	.long sys_msgctl
+	.long sys_shmatcall
+	.long sys_shmdt			/* 245 */
+	.long sys_shmget
+	.long sys_shmctl
+	/* Rest of syscalls listed in 2.4 i386 unistd.h */
+	.long sys_getdents64
+	.long sys_fcntl64
+	.long sys_ni_syscall		/* 250 reserved for TUX */
+	.long sys_ni_syscall		/* Reserved for Security */
+	.long sys_gettid
+	.long sys_readahead
+	.long sys_setxattr
+	.long sys_lsetxattr		/* 255 */
+	.long sys_fsetxattr
+	.long sys_getxattr
+	.long sys_lgetxattr
+	.long sys_fgetxattr
+	.long sys_listxattr		/* 260 */
+	.long sys_llistxattr
+	.long sys_flistxattr
+	.long sys_removexattr
+	.long sys_lremovexattr
+	.long sys_fremovexattr  	/* 265 */
+	.long sys_tkill
+	.long sys_sendfile64
+	.long sys_futex
+	.long sys_sched_setaffinity
+	.long sys_sched_getaffinity	/* 270 */
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_io_setup
+	.long sys_io_destroy
+	.long sys_io_getevents		/* 275 */
+	.long sys_io_submit
+	.long sys_io_cancel
+	.long sys_fadvise64
+	.long sys_ni_syscall
+	.long sys_exit_group		/* 280 */
+	/* Rest of new 2.6 syscalls */
+	.long sys_lookup_dcookie
+	.long sys_epoll_create
+	.long sys_epoll_ctl
+	.long sys_epoll_wait
+ 	.long sys_remap_file_pages	/* 285 */
+ 	.long sys_set_tid_address
+ 	.long sys_timer_create
+ 	.long sys_timer_settime
+ 	.long sys_timer_gettime
+ 	.long sys_timer_getoverrun	/* 290 */
+ 	.long sys_timer_delete
+ 	.long sys_clock_settime
+ 	.long sys_clock_gettime
+ 	.long sys_clock_getres
+ 	.long sys_clock_nanosleep	/* 295 */
+	.long sys_statfs64
+	.long sys_fstatfs64
+	.long sys_tgkill
+	.long sys_utimes
+ 	.long sys_fadvise64_64		/* 300 */
+	.long sys_ni_syscall	/* Reserved for vserver */
+	.long sys_ni_syscall	/* Reserved for mbind */
+	.long sys_ni_syscall	/* get_mempolicy */
+	.long sys_ni_syscall	/* set_mempolicy */
+	.long sys_mq_open		/* 305 */
+	.long sys_mq_unlink
+	.long sys_mq_timedsend
+	.long sys_mq_timedreceive
+	.long sys_mq_notify
+	.long sys_mq_getsetattr		/* 310 */
+	.long sys_ni_syscall	/* Reserved for kexec */
+	.long sys_waitid
+	.long sys_add_key
+	.long sys_request_key
+	.long sys_keyctl		/* 315 */
+
diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c
new file mode 100644
index 0000000..6c84da3
--- /dev/null
+++ b/arch/sh64/kernel/time.c
@@ -0,0 +1,610 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/time.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003, 2004  Paul Mundt
+ * Copyright (C) 2003  Richard Curnow
+ *
+ *    Original TMU/RTC code taken from sh version.
+ *    Copyright (C) 1999  Tetsuya Okada & Niibe Yutaka
+ *      Some code taken from i386 version.
+ *      Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/profile.h>
+#include <linux/smp.h>
+
+#include <asm/registers.h>	 /* required by inline __asm__ stmt. */
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/delay.h>
+
+#include <linux/timex.h>
+#include <linux/irq.h>
+#include <asm/hardware.h>
+
+#define TMU_TOCR_INIT	0x00
+#define TMU0_TCR_INIT	0x0020
+#define TMU_TSTR_INIT	1
+#define TMU_TSTR_OFF	0
+
+/* RCR1 Bits */
+#define RCR1_CF		0x80	/* Carry Flag             */
+#define RCR1_CIE	0x10	/* Carry Interrupt Enable */
+#define RCR1_AIE	0x08	/* Alarm Interrupt Enable */
+#define RCR1_AF		0x01	/* Alarm Flag             */
+
+/* RCR2 Bits */
+#define RCR2_PEF	0x80	/* PEriodic interrupt Flag */
+#define RCR2_PESMASK	0x70	/* Periodic interrupt Set  */
+#define RCR2_RTCEN	0x08	/* ENable RTC              */
+#define RCR2_ADJ	0x04	/* ADJustment (30-second)  */
+#define RCR2_RESET	0x02	/* Reset bit               */
+#define RCR2_START	0x01	/* Start bit               */
+
+/* Clock, Power and Reset Controller */
+#define	CPRC_BLOCK_OFF	0x01010000
+#define CPRC_BASE	PHYS_PERIPHERAL_BLOCK + CPRC_BLOCK_OFF
+
+#define FRQCR		(cprc_base+0x0)
+#define WTCSR		(cprc_base+0x0018)
+#define STBCR		(cprc_base+0x0030)
+
+/* Time Management Unit */
+#define	TMU_BLOCK_OFF	0x01020000
+#define TMU_BASE	PHYS_PERIPHERAL_BLOCK + TMU_BLOCK_OFF
+#define TMU0_BASE	tmu_base + 0x8 + (0xc * 0x0)
+#define TMU1_BASE	tmu_base + 0x8 + (0xc * 0x1)
+#define TMU2_BASE	tmu_base + 0x8 + (0xc * 0x2)
+
+#define TMU_TOCR	tmu_base+0x0	/* Byte access */
+#define TMU_TSTR	tmu_base+0x4	/* Byte access */
+
+#define TMU0_TCOR	TMU0_BASE+0x0	/* Long access */
+#define TMU0_TCNT	TMU0_BASE+0x4	/* Long access */
+#define TMU0_TCR	TMU0_BASE+0x8	/* Word access */
+
+/* Real Time Clock */
+#define	RTC_BLOCK_OFF	0x01040000
+#define RTC_BASE	PHYS_PERIPHERAL_BLOCK + RTC_BLOCK_OFF
+
+#define R64CNT  	rtc_base+0x00
+#define RSECCNT 	rtc_base+0x04
+#define RMINCNT 	rtc_base+0x08
+#define RHRCNT  	rtc_base+0x0c
+#define RWKCNT  	rtc_base+0x10
+#define RDAYCNT 	rtc_base+0x14
+#define RMONCNT 	rtc_base+0x18
+#define RYRCNT  	rtc_base+0x1c	/* 16bit */
+#define RSECAR  	rtc_base+0x20
+#define RMINAR  	rtc_base+0x24
+#define RHRAR   	rtc_base+0x28
+#define RWKAR   	rtc_base+0x2c
+#define RDAYAR  	rtc_base+0x30
+#define RMONAR  	rtc_base+0x34
+#define RCR1    	rtc_base+0x38
+#define RCR2    	rtc_base+0x3c
+
+#ifndef BCD_TO_BIN
+#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
+#endif
+
+#ifndef BIN_TO_BCD
+#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
+#endif
+
+#define TICK_SIZE (tick_nsec / 1000)
+
+extern unsigned long wall_jiffies;
+
+u64 jiffies_64 = INITIAL_JIFFIES;
+
+static unsigned long tmu_base, rtc_base;
+unsigned long cprc_base;
+
+/* Variables to allow interpolation of time of day to resolution better than a
+ * jiffy. */
+
+/* This is effectively protected by xtime_lock */
+static unsigned long ctc_last_interrupt;
+static unsigned long long usecs_per_jiffy = 1000000/HZ; /* Approximation */
+
+#define CTC_JIFFY_SCALE_SHIFT 40
+
+/* 2**CTC_JIFFY_SCALE_SHIFT / ctc_ticks_per_jiffy */
+static unsigned long long scaled_recip_ctc_ticks_per_jiffy;
+
+/* Estimate number of microseconds that have elapsed since the last timer tick,
+   by scaling the delta that has occured in the CTC register.
+
+   WARNING WARNING WARNING : This algorithm relies on the CTC decrementing at
+   the CPU clock rate.  If the CPU sleeps, the CTC stops counting.  Bear this
+   in mind if enabling SLEEP_WORKS in process.c.  In that case, this algorithm
+   probably needs to use TMU.TCNT0 instead.  This will work even if the CPU is
+   sleeping, though will be coarser.
+
+   FIXME : What if usecs_per_tick is moving around too much, e.g. if an adjtime
+   is running or if the freq or tick arguments of adjtimex are modified after
+   we have calibrated the scaling factor?  This will result in either a jump at
+   the end of a tick period, or a wrap backwards at the start of the next one,
+   if the application is reading the time of day often enough.  I think we
+   ought to do better than this.  For this reason, usecs_per_jiffy is left
+   separated out in the calculation below.  This allows some future hook into
+   the adjtime-related stuff in kernel/timer.c to remove this hazard.
+
+*/
+
+static unsigned long usecs_since_tick(void)
+{
+	unsigned long long current_ctc;
+	long ctc_ticks_since_interrupt;
+	unsigned long long ull_ctc_ticks_since_interrupt;
+	unsigned long result;
+
+	unsigned long long mul1_out;
+	unsigned long long mul1_out_high;
+	unsigned long long mul2_out_low, mul2_out_high;
+
+	/* Read CTC register */
+	asm ("getcon cr62, %0" : "=r" (current_ctc));
+	/* Note, the CTC counts down on each CPU clock, not up.
+	   Note(2), use long type to get correct wraparound arithmetic when
+	   the counter crosses zero. */
+	ctc_ticks_since_interrupt = (long) ctc_last_interrupt - (long) current_ctc;
+	ull_ctc_ticks_since_interrupt = (unsigned long long) ctc_ticks_since_interrupt;
+
+	/* Inline assembly to do 32x32x32->64 multiplier */
+	asm volatile ("mulu.l %1, %2, %0" :
+	     "=r" (mul1_out) :
+	     "r" (ull_ctc_ticks_since_interrupt), "r" (usecs_per_jiffy));
+
+	mul1_out_high = mul1_out >> 32;
+
+	asm volatile ("mulu.l %1, %2, %0" :
+	     "=r" (mul2_out_low) :
+	     "r" (mul1_out), "r" (scaled_recip_ctc_ticks_per_jiffy));
+
+#if 1
+	asm volatile ("mulu.l %1, %2, %0" :
+	     "=r" (mul2_out_high) :
+	     "r" (mul1_out_high), "r" (scaled_recip_ctc_ticks_per_jiffy));
+#endif
+
+	result = (unsigned long) (((mul2_out_high << 32) + mul2_out_low) >> CTC_JIFFY_SCALE_SHIFT);
+
+	return result;
+}
+
+void do_gettimeofday(struct timeval *tv)
+{
+	unsigned long flags;
+	unsigned long seq;
+	unsigned long usec, sec;
+
+	do {
+		seq = read_seqbegin_irqsave(&xtime_lock, flags);
+		usec = usecs_since_tick();
+		{
+			unsigned long lost = jiffies - wall_jiffies;
+
+			if (lost)
+				usec += lost * (1000000 / HZ);
+		}
+
+		sec = xtime.tv_sec;
+		usec += xtime.tv_nsec / 1000;
+	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
+
+	while (usec >= 1000000) {
+		usec -= 1000000;
+		sec++;
+	}
+
+	tv->tv_sec = sec;
+	tv->tv_usec = usec;
+}
+
+int do_settimeofday(struct timespec *tv)
+{
+	time_t wtm_sec, sec = tv->tv_sec;
+	long wtm_nsec, nsec = tv->tv_nsec;
+
+	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	write_seqlock_irq(&xtime_lock);
+	/*
+	 * This is revolting. We need to set "xtime" correctly. However, the
+	 * value in this location is the value at the most recent update of
+	 * wall time.  Discover what correction gettimeofday() would have
+	 * made, and then undo it!
+	 */
+	nsec -= 1000 * (usecs_since_tick() +
+				(jiffies - wall_jiffies) * (1000000 / HZ));
+
+	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
+	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+
+	set_normalized_timespec(&xtime, sec, nsec);
+	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+
+	time_adjust = 0;		/* stop active adjtime() */
+	time_status |= STA_UNSYNC;
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_esterror = NTP_PHASE_LIMIT;
+	write_sequnlock_irq(&xtime_lock);
+	clock_was_set();
+
+	return 0;
+}
+
+static int set_rtc_time(unsigned long nowtime)
+{
+	int retval = 0;
+	int real_seconds, real_minutes, cmos_minutes;
+
+	ctrl_outb(RCR2_RESET, RCR2);  /* Reset pre-scaler & stop RTC */
+
+	cmos_minutes = ctrl_inb(RMINCNT);
+	BCD_TO_BIN(cmos_minutes);
+
+	/*
+	 * since we're only adjusting minutes and seconds,
+	 * don't interfere with hour overflow. This avoids
+	 * messing with unknown time zones but requires your
+	 * RTC not to be off by more than 15 minutes
+	 */
+	real_seconds = nowtime % 60;
+	real_minutes = nowtime / 60;
+	if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
+		real_minutes += 30;	/* correct for half hour time zone */
+	real_minutes %= 60;
+
+	if (abs(real_minutes - cmos_minutes) < 30) {
+		BIN_TO_BCD(real_seconds);
+		BIN_TO_BCD(real_minutes);
+		ctrl_outb(real_seconds, RSECCNT);
+		ctrl_outb(real_minutes, RMINCNT);
+	} else {
+		printk(KERN_WARNING
+		       "set_rtc_time: can't update from %d to %d\n",
+		       cmos_minutes, real_minutes);
+		retval = -1;
+	}
+
+	ctrl_outb(RCR2_RTCEN|RCR2_START, RCR2);  /* Start RTC */
+
+	return retval;
+}
+
+/* last time the RTC clock got updated */
+static long last_rtc_update = 0;
+
+/*
+ * timer_interrupt() needs to keep up the real-time clock,
+ * as well as call the "do_timer()" routine every clocktick
+ */
+static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned long long current_ctc;
+	asm ("getcon cr62, %0" : "=r" (current_ctc));
+	ctc_last_interrupt = (unsigned long) current_ctc;
+
+	do_timer(regs);
+#ifndef CONFIG_SMP
+	update_process_times(user_mode(regs));
+#endif
+	profile_tick(CPU_PROFILING, regs);
+
+#ifdef CONFIG_HEARTBEAT
+	{
+		extern void heartbeat(void);
+
+		heartbeat();
+	}
+#endif
+
+	/*
+	 * If we have an externally synchronized Linux clock, then update
+	 * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+	 * called as close as possible to 500 ms before the new second starts.
+	 */
+	if ((time_status & STA_UNSYNC) == 0 &&
+	    xtime.tv_sec > last_rtc_update + 660 &&
+	    (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
+	    (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
+		if (set_rtc_time(xtime.tv_sec) == 0)
+			last_rtc_update = xtime.tv_sec;
+		else
+			last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
+	}
+}
+
+/*
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
+ */
+static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	unsigned long timer_status;
+
+	/* Clear UNF bit */
+	timer_status = ctrl_inw(TMU0_TCR);
+	timer_status &= ~0x100;
+	ctrl_outw(timer_status, TMU0_TCR);
+
+	/*
+	 * Here we are in the timer irq handler. We just have irqs locally
+	 * disabled but we don't know if the timer_bh is running on the other
+	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+	 * the irq version of write_lock because as just said we have irq
+	 * locally disabled. -arca
+	 */
+	write_lock(&xtime_lock);
+	do_timer_interrupt(irq, NULL, regs);
+	write_unlock(&xtime_lock);
+
+	return IRQ_HANDLED;
+}
+
+static unsigned long get_rtc_time(void)
+{
+	unsigned int sec, min, hr, wk, day, mon, yr, yr100;
+
+ again:
+	do {
+		ctrl_outb(0, RCR1);  /* Clear CF-bit */
+		sec = ctrl_inb(RSECCNT);
+		min = ctrl_inb(RMINCNT);
+		hr  = ctrl_inb(RHRCNT);
+		wk  = ctrl_inb(RWKCNT);
+		day = ctrl_inb(RDAYCNT);
+		mon = ctrl_inb(RMONCNT);
+		yr  = ctrl_inw(RYRCNT);
+		yr100 = (yr >> 8);
+		yr &= 0xff;
+	} while ((ctrl_inb(RCR1) & RCR1_CF) != 0);
+
+	BCD_TO_BIN(yr100);
+	BCD_TO_BIN(yr);
+	BCD_TO_BIN(mon);
+	BCD_TO_BIN(day);
+	BCD_TO_BIN(hr);
+	BCD_TO_BIN(min);
+	BCD_TO_BIN(sec);
+
+	if (yr > 99 || mon < 1 || mon > 12 || day > 31 || day < 1 ||
+	    hr > 23 || min > 59 || sec > 59) {
+		printk(KERN_ERR
+		       "SH RTC: invalid value, resetting to 1 Jan 2000\n");
+		ctrl_outb(RCR2_RESET, RCR2);  /* Reset & Stop */
+		ctrl_outb(0, RSECCNT);
+		ctrl_outb(0, RMINCNT);
+		ctrl_outb(0, RHRCNT);
+		ctrl_outb(6, RWKCNT);
+		ctrl_outb(1, RDAYCNT);
+		ctrl_outb(1, RMONCNT);
+		ctrl_outw(0x2000, RYRCNT);
+		ctrl_outb(RCR2_RTCEN|RCR2_START, RCR2);  /* Start */
+		goto again;
+	}
+
+	return mktime(yr100 * 100 + yr, mon, day, hr, min, sec);
+}
+
+static __init unsigned int get_cpu_hz(void)
+{
+	unsigned int count;
+	unsigned long __dummy;
+	unsigned long ctc_val_init, ctc_val;
+
+	/*
+	** Regardless the toolchain, force the compiler to use the
+	** arbitrary register r3 as a clock tick counter.
+	** NOTE: r3 must be in accordance with rtc_interrupt()
+	*/
+	register unsigned long long  __rtc_irq_flag __asm__ ("r3");
+
+	local_irq_enable();
+	do {} while (ctrl_inb(R64CNT) != 0);
+	ctrl_outb(RCR1_CIE, RCR1); /* Enable carry interrupt */
+
+	/*
+	 * r3 is arbitrary. CDC does not support "=z".
+	 */
+	ctc_val_init = 0xffffffff;
+	ctc_val = ctc_val_init;
+
+	asm volatile("gettr	tr0, %1\n\t"
+		     "putcon	%0, " __CTC "\n\t"
+		     "and	%2, r63, %2\n\t"
+		     "pta	$+4, tr0\n\t"
+		     "beq/l	%2, r63, tr0\n\t"
+		     "ptabs	%1, tr0\n\t"
+		     "getcon	" __CTC ", %0\n\t"
+		: "=r"(ctc_val), "=r" (__dummy), "=r" (__rtc_irq_flag)
+		: "0" (0));
+	local_irq_disable();
+	/*
+	 * SH-3:
+	 * CPU clock = 4 stages * loop
+	 * tst    rm,rm      if id ex
+	 * bt/s   1b            if id ex
+	 * add    #1,rd            if id ex
+         *                            (if) pipe line stole
+	 * tst    rm,rm                  if id ex
+         * ....
+	 *
+	 *
+	 * SH-4:
+	 * CPU clock = 6 stages * loop
+	 * I don't know why.
+         * ....
+	 *
+	 * SH-5:
+	 * Use CTC register to count.  This approach returns the right value
+	 * even if the I-cache is disabled (e.g. whilst debugging.)
+	 *
+	 */
+
+	count = ctc_val_init - ctc_val; /* CTC counts down */
+
+#if defined (CONFIG_SH_SIMULATOR)
+	/*
+	 * Let's pretend we are a 5MHz SH-5 to avoid a too
+	 * little timer interval. Also to keep delay
+	 * calibration within a reasonable time.
+	 */
+	return 5000000;
+#else
+	/*
+	 * This really is count by the number of clock cycles
+         * by the ratio between a complete R64CNT
+         * wrap-around (128) and CUI interrupt being raised (64).
+	 */
+	return count*2;
+#endif
+}
+
+static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	ctrl_outb(0, RCR1);	/* Disable Carry Interrupts */
+	regs->regs[3] = 1;	/* Using r3 */
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};
+static struct irqaction irq1  = { rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL};
+
+void __init time_init(void)
+{
+	unsigned int cpu_clock, master_clock, bus_clock, module_clock;
+	unsigned long interval;
+	unsigned long frqcr, ifc, pfc;
+	static int ifc_table[] = { 2, 4, 6, 8, 10, 12, 16, 24 };
+#define bfc_table ifc_table	/* Same */
+#define pfc_table ifc_table	/* Same */
+
+	tmu_base = onchip_remap(TMU_BASE, 1024, "TMU");
+	if (!tmu_base) {
+		panic("Unable to remap TMU\n");
+	}
+
+	rtc_base = onchip_remap(RTC_BASE, 1024, "RTC");
+	if (!rtc_base) {
+		panic("Unable to remap RTC\n");
+	}
+
+	cprc_base = onchip_remap(CPRC_BASE, 1024, "CPRC");
+	if (!cprc_base) {
+		panic("Unable to remap CPRC\n");
+	}
+
+	xtime.tv_sec = get_rtc_time();
+	xtime.tv_nsec = 0;
+
+	setup_irq(TIMER_IRQ, &irq0);
+	setup_irq(RTC_IRQ, &irq1);
+
+	/* Check how fast it is.. */
+	cpu_clock = get_cpu_hz();
+
+	/* Note careful order of operations to maintain reasonable precision and avoid overflow. */
+	scaled_recip_ctc_ticks_per_jiffy = ((1ULL << CTC_JIFFY_SCALE_SHIFT) / (unsigned long long)(cpu_clock / HZ));
+
+	disable_irq(RTC_IRQ);
+
+	printk("CPU clock: %d.%02dMHz\n",
+	       (cpu_clock / 1000000), (cpu_clock % 1000000)/10000);
+	{
+		unsigned short bfc;
+		frqcr = ctrl_inl(FRQCR);
+		ifc  = ifc_table[(frqcr>> 6) & 0x0007];
+		bfc  = bfc_table[(frqcr>> 3) & 0x0007];
+		pfc  = pfc_table[(frqcr>> 12) & 0x0007];
+		master_clock = cpu_clock * ifc;
+		bus_clock = master_clock/bfc;
+	}
+
+	printk("Bus clock: %d.%02dMHz\n",
+	       (bus_clock/1000000), (bus_clock % 1000000)/10000);
+	module_clock = master_clock/pfc;
+	printk("Module clock: %d.%02dMHz\n",
+	       (module_clock/1000000), (module_clock % 1000000)/10000);
+	interval = (module_clock/(HZ*4));
+
+	printk("Interval = %ld\n", interval);
+
+	current_cpu_data.cpu_clock    = cpu_clock;
+	current_cpu_data.master_clock = master_clock;
+	current_cpu_data.bus_clock    = bus_clock;
+	current_cpu_data.module_clock = module_clock;
+
+	/* Start TMU0 */
+	ctrl_outb(TMU_TSTR_OFF, TMU_TSTR);
+	ctrl_outb(TMU_TOCR_INIT, TMU_TOCR);
+	ctrl_outw(TMU0_TCR_INIT, TMU0_TCR);
+	ctrl_outl(interval, TMU0_TCOR);
+	ctrl_outl(interval, TMU0_TCNT);
+	ctrl_outb(TMU_TSTR_INIT, TMU_TSTR);
+}
+
+void enter_deep_standby(void)
+{
+	/* Disable watchdog timer */
+	ctrl_outl(0xa5000000, WTCSR);
+	/* Configure deep standby on sleep */
+	ctrl_outl(0x03, STBCR);
+
+#ifdef CONFIG_SH_ALPHANUMERIC
+	{
+		extern void mach_alphanum(int position, unsigned char value);
+		extern void mach_alphanum_brightness(int setting);
+		char halted[] = "Halted. ";
+		int i;
+		mach_alphanum_brightness(6); /* dimmest setting above off */
+		for (i=0; i<8; i++) {
+			mach_alphanum(i, halted[i]);
+		}
+		asm __volatile__ ("synco");
+	}
+#endif
+
+	asm __volatile__ ("sleep");
+	asm __volatile__ ("synci");
+	asm __volatile__ ("nop");
+	asm __volatile__ ("nop");
+	asm __volatile__ ("nop");
+	asm __volatile__ ("nop");
+	panic("Unexpected wakeup!\n");
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+	return (unsigned long long)jiffies * (1000000000 / HZ);
+}
+
diff --git a/arch/sh64/kernel/traps.c b/arch/sh64/kernel/traps.c
new file mode 100644
index 0000000..224b7f5
--- /dev/null
+++ b/arch/sh64/kernel/traps.c
@@ -0,0 +1,961 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/traps.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003, 2004  Paul Mundt
+ * Copyright (C) 2003, 2004  Richard Curnow
+ *
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'entry.S'.
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+
+#undef DEBUG_EXCEPTION
+#ifdef DEBUG_EXCEPTION
+/* implemented in ../lib/dbg.c */
+extern void show_excp_regs(char *fname, int trapnr, int signr,
+			   struct pt_regs *regs);
+#else
+#define show_excp_regs(a, b, c, d)
+#endif
+
+static void do_unhandled_exception(int trapnr, int signr, char *str, char *fn_name,
+		unsigned long error_code, struct pt_regs *regs, struct task_struct *tsk);
+
+#define DO_ERROR(trapnr, signr, str, name, tsk) \
+asmlinkage void do_##name(unsigned long error_code, struct pt_regs *regs) \
+{ \
+	do_unhandled_exception(trapnr, signr, str, __stringify(name), error_code, regs, current); \
+}
+
+spinlock_t die_lock;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	printk("%s: %lx\n", str, (err & 0xffffff));
+	show_regs(regs);
+	spin_unlock_irq(&die_lock);
+	do_exit(SIGSEGV);
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+	if (!user_mode(regs))
+		die(str, regs, err);
+}
+
+static void die_if_no_fixup(const char * str, struct pt_regs * regs, long err)
+{
+	if (!user_mode(regs)) {
+		const struct exception_table_entry *fixup;
+		fixup = search_exception_tables(regs->pc);
+		if (fixup) {
+			regs->pc = fixup->fixup;
+			return;
+		}
+		die(str, regs, err);
+	}
+}
+
+DO_ERROR(13, SIGILL,  "illegal slot instruction", illegal_slot_inst, current)
+DO_ERROR(87, SIGSEGV, "address error (exec)", address_error_exec, current)
+
+
+/* Implement misaligned load/store handling for kernel (and optionally for user
+   mode too).  Limitation : only SHmedia mode code is handled - there is no
+   handling at all for misaligned accesses occurring in SHcompact code yet. */
+
+static int misaligned_fixup(struct pt_regs *regs);
+
+asmlinkage void do_address_error_load(unsigned long error_code, struct pt_regs *regs)
+{
+	if (misaligned_fixup(regs) < 0) {
+		do_unhandled_exception(7, SIGSEGV, "address error(load)",
+				"do_address_error_load",
+				error_code, regs, current);
+	}
+	return;
+}
+
+asmlinkage void do_address_error_store(unsigned long error_code, struct pt_regs *regs)
+{
+	if (misaligned_fixup(regs) < 0) {
+		do_unhandled_exception(8, SIGSEGV, "address error(store)",
+				"do_address_error_store",
+				error_code, regs, current);
+	}
+	return;
+}
+
+#if defined(CONFIG_SH64_ID2815_WORKAROUND)
+
+#define OPCODE_INVALID      0
+#define OPCODE_USER_VALID   1
+#define OPCODE_PRIV_VALID   2
+
+/* getcon/putcon - requires checking which control register is referenced. */
+#define OPCODE_CTRL_REG     3
+
+/* Table of valid opcodes for SHmedia mode.
+   Form a 10-bit value by concatenating the major/minor opcodes i.e.
+   opcode[31:26,20:16].  The 6 MSBs of this value index into the following
+   array.  The 4 LSBs select the bit-pair in the entry (bits 1:0 correspond to
+   LSBs==4'b0000 etc). */
+static unsigned long shmedia_opcode_table[64] = {
+	0x55554044,0x54445055,0x15141514,0x14541414,0x00000000,0x10001000,0x01110055,0x04050015,
+	0x00000444,0xc0000000,0x44545515,0x40405555,0x55550015,0x10005555,0x55555505,0x04050000,
+	0x00000555,0x00000404,0x00040445,0x15151414,0x00000000,0x00000000,0x00000000,0x00000000,
+	0x00000055,0x40404444,0x00000404,0xc0009495,0x00000000,0x00000000,0x00000000,0x00000000,
+	0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,
+	0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,
+	0x80005050,0x04005055,0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,0x55555555,
+	0x81055554,0x00000404,0x55555555,0x55555555,0x00000000,0x00000000,0x00000000,0x00000000
+};
+
+void do_reserved_inst(unsigned long error_code, struct pt_regs *regs)
+{
+	/* Workaround SH5-101 cut2 silicon defect #2815 :
+	   in some situations, inter-mode branches from SHcompact -> SHmedia
+	   which should take ITLBMISS or EXECPROT exceptions at the target
+	   falsely take RESINST at the target instead. */
+
+	unsigned long opcode = 0x6ff4fff0; /* guaranteed reserved opcode */
+	unsigned long pc, aligned_pc;
+	int get_user_error;
+	int trapnr = 12;
+	int signr = SIGILL;
+	char *exception_name = "reserved_instruction";
+
+	pc = regs->pc;
+	if ((pc & 3) == 1) {
+		/* SHmedia : check for defect.  This requires executable vmas
+		   to be readable too. */
+		aligned_pc = pc & ~3;
+		if (!access_ok(VERIFY_READ, aligned_pc, sizeof(unsigned long))) {
+			get_user_error = -EFAULT;
+		} else {
+			get_user_error = __get_user(opcode, (unsigned long *)aligned_pc);
+		}
+		if (get_user_error >= 0) {
+			unsigned long index, shift;
+			unsigned long major, minor, combined;
+			unsigned long reserved_field;
+			reserved_field = opcode & 0xf; /* These bits are currently reserved as zero in all valid opcodes */
+			major = (opcode >> 26) & 0x3f;
+			minor = (opcode >> 16) & 0xf;
+			combined = (major << 4) | minor;
+			index = major;
+			shift = minor << 1;
+			if (reserved_field == 0) {
+				int opcode_state = (shmedia_opcode_table[index] >> shift) & 0x3;
+				switch (opcode_state) {
+					case OPCODE_INVALID:
+						/* Trap. */
+						break;
+					case OPCODE_USER_VALID:
+						/* Restart the instruction : the branch to the instruction will now be from an RTE
+						   not from SHcompact so the silicon defect won't be triggered. */
+						return;
+					case OPCODE_PRIV_VALID:
+						if (!user_mode(regs)) {
+							/* Should only ever get here if a module has
+							   SHcompact code inside it.  If so, the same fix up is needed. */
+							return; /* same reason */
+						}
+						/* Otherwise, user mode trying to execute a privileged instruction -
+						   fall through to trap. */
+						break;
+					case OPCODE_CTRL_REG:
+						/* If in privileged mode, return as above. */
+						if (!user_mode(regs)) return;
+						/* In user mode ... */
+						if (combined == 0x9f) { /* GETCON */
+							unsigned long regno = (opcode >> 20) & 0x3f;
+							if (regno >= 62) {
+								return;
+							}
+							/* Otherwise, reserved or privileged control register, => trap */
+						} else if (combined == 0x1bf) { /* PUTCON */
+							unsigned long regno = (opcode >> 4) & 0x3f;
+							if (regno >= 62) {
+								return;
+							}
+							/* Otherwise, reserved or privileged control register, => trap */
+						} else {
+							/* Trap */
+						}
+						break;
+					default:
+						/* Fall through to trap. */
+						break;
+				}
+			}
+			/* fall through to normal resinst processing */
+		} else {
+			/* Error trying to read opcode.  This typically means a
+			   real fault, not a RESINST any more.  So change the
+			   codes. */
+			trapnr = 87;
+			exception_name = "address error (exec)";
+			signr = SIGSEGV;
+		}
+	}
+
+	do_unhandled_exception(trapnr, signr, exception_name, "do_reserved_inst", error_code, regs, current);
+}
+
+#else /* CONFIG_SH64_ID2815_WORKAROUND */
+
+/* If the workaround isn't needed, this is just a straightforward reserved
+   instruction */
+DO_ERROR(12, SIGILL,  "reserved instruction", reserved_inst, current)
+
+#endif /* CONFIG_SH64_ID2815_WORKAROUND */
+
+
+#include <asm/system.h>
+
+/* Called with interrupts disabled */
+asmlinkage void do_exception_error(unsigned long ex, struct pt_regs *regs)
+{
+	PLS();
+	show_excp_regs(__FUNCTION__, -1, -1, regs);
+	die_if_kernel("exception", regs, ex);
+}
+
+int do_unknown_trapa(unsigned long scId, struct pt_regs *regs)
+{
+	/* Syscall debug */
+        printk("System call ID error: [0x1#args:8 #syscall:16  0x%lx]\n", scId);
+
+	die_if_kernel("unknown trapa", regs, scId);
+
+	return -ENOSYS;
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+#ifdef CONFIG_KALLSYMS
+	extern void sh64_unwind(struct pt_regs *regs);
+	struct pt_regs *regs;
+
+	regs = tsk ? tsk->thread.kregs : NULL;
+
+	sh64_unwind(regs);
+#else
+	printk(KERN_ERR "Can't backtrace on sh64 without CONFIG_KALLSYMS\n");
+#endif
+}
+
+void show_task(unsigned long *sp)
+{
+	show_stack(NULL, sp);
+}
+
+void dump_stack(void)
+{
+	show_task(NULL);
+}
+/* Needed by any user of WARN_ON in view of the defn in include/asm-sh/bug.h */
+EXPORT_SYMBOL(dump_stack);
+
+static void do_unhandled_exception(int trapnr, int signr, char *str, char *fn_name,
+		unsigned long error_code, struct pt_regs *regs, struct task_struct *tsk)
+{
+	show_excp_regs(fn_name, trapnr, signr, regs);
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = trapnr;
+
+	if (user_mode(regs))
+		force_sig(signr, tsk);
+
+	die_if_no_fixup(str, regs, error_code);
+}
+
+static int read_opcode(unsigned long long pc, unsigned long *result_opcode, int from_user_mode)
+{
+	int get_user_error;
+	unsigned long aligned_pc;
+	unsigned long opcode;
+
+	if ((pc & 3) == 1) {
+		/* SHmedia */
+		aligned_pc = pc & ~3;
+		if (from_user_mode) {
+			if (!access_ok(VERIFY_READ, aligned_pc, sizeof(unsigned long))) {
+				get_user_error = -EFAULT;
+			} else {
+				get_user_error = __get_user(opcode, (unsigned long *)aligned_pc);
+				*result_opcode = opcode;
+			}
+			return get_user_error;
+		} else {
+			/* If the fault was in the kernel, we can either read
+			 * this directly, or if not, we fault.
+			*/
+			*result_opcode = *(unsigned long *) aligned_pc;
+			return 0;
+		}
+	} else if ((pc & 1) == 0) {
+		/* SHcompact */
+		/* TODO : provide handling for this.  We don't really support
+		   user-mode SHcompact yet, and for a kernel fault, this would
+		   have to come from a module built for SHcompact.  */
+		return -EFAULT;
+	} else {
+		/* misaligned */
+		return -EFAULT;
+	}
+}
+
+static int address_is_sign_extended(__u64 a)
+{
+	__u64 b;
+#if (NEFF == 32)
+	b = (__u64)(__s64)(__s32)(a & 0xffffffffUL);
+	return (b == a) ? 1 : 0;
+#else
+#error "Sign extend check only works for NEFF==32"
+#endif
+}
+
+static int generate_and_check_address(struct pt_regs *regs,
+				      __u32 opcode,
+				      int displacement_not_indexed,
+				      int width_shift,
+				      __u64 *address)
+{
+	/* return -1 for fault, 0 for OK */
+
+	__u64 base_address, addr;
+	int basereg;
+
+	basereg = (opcode >> 20) & 0x3f;
+	base_address = regs->regs[basereg];
+	if (displacement_not_indexed) {
+		__s64 displacement;
+		displacement = (opcode >> 10) & 0x3ff;
+		displacement = ((displacement << 54) >> 54); /* sign extend */
+		addr = (__u64)((__s64)base_address + (displacement << width_shift));
+	} else {
+		__u64 offset;
+		int offsetreg;
+		offsetreg = (opcode >> 10) & 0x3f;
+		offset = regs->regs[offsetreg];
+		addr = base_address + offset;
+	}
+
+	/* Check sign extended */
+	if (!address_is_sign_extended(addr)) {
+		return -1;
+	}
+
+#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+	/* Check accessible.  For misaligned access in the kernel, assume the
+	   address is always accessible (and if not, just fault when the
+	   load/store gets done.) */
+	if (user_mode(regs)) {
+		if (addr >= TASK_SIZE) {
+			return -1;
+		}
+		/* Do access_ok check later - it depends on whether it's a load or a store. */
+	}
+#endif
+
+	*address = addr;
+	return 0;
+}
+
+/* Default value as for sh */
+#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+static int user_mode_unaligned_fixup_count = 10;
+static int user_mode_unaligned_fixup_enable = 1;
+#endif
+
+static int kernel_mode_unaligned_fixup_count = 32;
+
+static void misaligned_kernel_word_load(__u64 address, int do_sign_extend, __u64 *result)
+{
+	unsigned short x;
+	unsigned char *p, *q;
+	p = (unsigned char *) (int) address;
+	q = (unsigned char *) &x;
+	q[0] = p[0];
+	q[1] = p[1];
+
+	if (do_sign_extend) {
+		*result = (__u64)(__s64) *(short *) &x;
+	} else {
+		*result = (__u64) x;
+	}
+}
+
+static void misaligned_kernel_word_store(__u64 address, __u64 value)
+{
+	unsigned short x;
+	unsigned char *p, *q;
+	p = (unsigned char *) (int) address;
+	q = (unsigned char *) &x;
+
+	x = (__u16) value;
+	p[0] = q[0];
+	p[1] = q[1];
+}
+
+static int misaligned_load(struct pt_regs *regs,
+			   __u32 opcode,
+			   int displacement_not_indexed,
+			   int width_shift,
+			   int do_sign_extend)
+{
+	/* Return -1 for a fault, 0 for OK */
+	int error;
+	int destreg;
+	__u64 address;
+
+	error = generate_and_check_address(regs, opcode,
+			displacement_not_indexed, width_shift, &address);
+	if (error < 0) {
+		return error;
+	}
+
+	destreg = (opcode >> 4) & 0x3f;
+#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+	if (user_mode(regs)) {
+		__u64 buffer;
+
+		if (!access_ok(VERIFY_READ, (unsigned long) address, 1UL<<width_shift)) {
+			return -1;
+		}
+
+		if (__copy_user(&buffer, (const void *)(int)address, (1 << width_shift)) > 0) {
+			return -1; /* fault */
+		}
+		switch (width_shift) {
+		case 1:
+			if (do_sign_extend) {
+				regs->regs[destreg] = (__u64)(__s64) *(__s16 *) &buffer;
+			} else {
+				regs->regs[destreg] = (__u64) *(__u16 *) &buffer;
+			}
+			break;
+		case 2:
+			regs->regs[destreg] = (__u64)(__s64) *(__s32 *) &buffer;
+			break;
+		case 3:
+			regs->regs[destreg] = buffer;
+			break;
+		default:
+			printk("Unexpected width_shift %d in misaligned_load, PC=%08lx\n",
+				width_shift, (unsigned long) regs->pc);
+			break;
+		}
+	} else
+#endif
+	{
+		/* kernel mode - we can take short cuts since if we fault, it's a genuine bug */
+		__u64 lo, hi;
+
+		switch (width_shift) {
+		case 1:
+			misaligned_kernel_word_load(address, do_sign_extend, &regs->regs[destreg]);
+			break;
+		case 2:
+			asm ("ldlo.l %1, 0, %0" : "=r" (lo) : "r" (address));
+			asm ("ldhi.l %1, 3, %0" : "=r" (hi) : "r" (address));
+			regs->regs[destreg] = lo | hi;
+			break;
+		case 3:
+			asm ("ldlo.q %1, 0, %0" : "=r" (lo) : "r" (address));
+			asm ("ldhi.q %1, 7, %0" : "=r" (hi) : "r" (address));
+			regs->regs[destreg] = lo | hi;
+			break;
+
+		default:
+			printk("Unexpected width_shift %d in misaligned_load, PC=%08lx\n",
+				width_shift, (unsigned long) regs->pc);
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
+static int misaligned_store(struct pt_regs *regs,
+			    __u32 opcode,
+			    int displacement_not_indexed,
+			    int width_shift)
+{
+	/* Return -1 for a fault, 0 for OK */
+	int error;
+	int srcreg;
+	__u64 address;
+
+	error = generate_and_check_address(regs, opcode,
+			displacement_not_indexed, width_shift, &address);
+	if (error < 0) {
+		return error;
+	}
+
+	srcreg = (opcode >> 4) & 0x3f;
+#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+	if (user_mode(regs)) {
+		__u64 buffer;
+
+		if (!access_ok(VERIFY_WRITE, (unsigned long) address, 1UL<<width_shift)) {
+			return -1;
+		}
+
+		switch (width_shift) {
+		case 1:
+			*(__u16 *) &buffer = (__u16) regs->regs[srcreg];
+			break;
+		case 2:
+			*(__u32 *) &buffer = (__u32) regs->regs[srcreg];
+			break;
+		case 3:
+			buffer = regs->regs[srcreg];
+			break;
+		default:
+			printk("Unexpected width_shift %d in misaligned_store, PC=%08lx\n",
+				width_shift, (unsigned long) regs->pc);
+			break;
+		}
+
+		if (__copy_user((void *)(int)address, &buffer, (1 << width_shift)) > 0) {
+			return -1; /* fault */
+		}
+	} else
+#endif
+	{
+		/* kernel mode - we can take short cuts since if we fault, it's a genuine bug */
+		__u64 val = regs->regs[srcreg];
+
+		switch (width_shift) {
+		case 1:
+			misaligned_kernel_word_store(address, val);
+			break;
+		case 2:
+			asm ("stlo.l %1, 0, %0" : : "r" (val), "r" (address));
+			asm ("sthi.l %1, 3, %0" : : "r" (val), "r" (address));
+			break;
+		case 3:
+			asm ("stlo.q %1, 0, %0" : : "r" (val), "r" (address));
+			asm ("sthi.q %1, 7, %0" : : "r" (val), "r" (address));
+			break;
+
+		default:
+			printk("Unexpected width_shift %d in misaligned_store, PC=%08lx\n",
+				width_shift, (unsigned long) regs->pc);
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
+#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+/* Never need to fix up misaligned FPU accesses within the kernel since that's a real
+   error. */
+static int misaligned_fpu_load(struct pt_regs *regs,
+			   __u32 opcode,
+			   int displacement_not_indexed,
+			   int width_shift,
+			   int do_paired_load)
+{
+	/* Return -1 for a fault, 0 for OK */
+	int error;
+	int destreg;
+	__u64 address;
+
+	error = generate_and_check_address(regs, opcode,
+			displacement_not_indexed, width_shift, &address);
+	if (error < 0) {
+		return error;
+	}
+
+	destreg = (opcode >> 4) & 0x3f;
+	if (user_mode(regs)) {
+		__u64 buffer;
+		__u32 buflo, bufhi;
+
+		if (!access_ok(VERIFY_READ, (unsigned long) address, 1UL<<width_shift)) {
+			return -1;
+		}
+
+		if (__copy_user(&buffer, (const void *)(int)address, (1 << width_shift)) > 0) {
+			return -1; /* fault */
+		}
+		/* 'current' may be the current owner of the FPU state, so
+		   context switch the registers into memory so they can be
+		   indexed by register number. */
+		if (last_task_used_math == current) {
+			grab_fpu();
+			fpsave(&current->thread.fpu.hard);
+			release_fpu();
+			last_task_used_math = NULL;
+			regs->sr |= SR_FD;
+		}
+
+		buflo = *(__u32*) &buffer;
+		bufhi = *(1 + (__u32*) &buffer);
+
+		switch (width_shift) {
+		case 2:
+			current->thread.fpu.hard.fp_regs[destreg] = buflo;
+			break;
+		case 3:
+			if (do_paired_load) {
+				current->thread.fpu.hard.fp_regs[destreg] = buflo;
+				current->thread.fpu.hard.fp_regs[destreg+1] = bufhi;
+			} else {
+#if defined(CONFIG_LITTLE_ENDIAN)
+				current->thread.fpu.hard.fp_regs[destreg] = bufhi;
+				current->thread.fpu.hard.fp_regs[destreg+1] = buflo;
+#else
+				current->thread.fpu.hard.fp_regs[destreg] = buflo;
+				current->thread.fpu.hard.fp_regs[destreg+1] = bufhi;
+#endif
+			}
+			break;
+		default:
+			printk("Unexpected width_shift %d in misaligned_fpu_load, PC=%08lx\n",
+				width_shift, (unsigned long) regs->pc);
+			break;
+		}
+		return 0;
+	} else {
+		die ("Misaligned FPU load inside kernel", regs, 0);
+		return -1;
+	}
+
+
+}
+
+static int misaligned_fpu_store(struct pt_regs *regs,
+			   __u32 opcode,
+			   int displacement_not_indexed,
+			   int width_shift,
+			   int do_paired_load)
+{
+	/* Return -1 for a fault, 0 for OK */
+	int error;
+	int srcreg;
+	__u64 address;
+
+	error = generate_and_check_address(regs, opcode,
+			displacement_not_indexed, width_shift, &address);
+	if (error < 0) {
+		return error;
+	}
+
+	srcreg = (opcode >> 4) & 0x3f;
+	if (user_mode(regs)) {
+		__u64 buffer;
+		/* Initialise these to NaNs. */
+		__u32 buflo=0xffffffffUL, bufhi=0xffffffffUL;
+
+		if (!access_ok(VERIFY_WRITE, (unsigned long) address, 1UL<<width_shift)) {
+			return -1;
+		}
+
+		/* 'current' may be the current owner of the FPU state, so
+		   context switch the registers into memory so they can be
+		   indexed by register number. */
+		if (last_task_used_math == current) {
+			grab_fpu();
+			fpsave(&current->thread.fpu.hard);
+			release_fpu();
+			last_task_used_math = NULL;
+			regs->sr |= SR_FD;
+		}
+
+		switch (width_shift) {
+		case 2:
+			buflo = current->thread.fpu.hard.fp_regs[srcreg];
+			break;
+		case 3:
+			if (do_paired_load) {
+				buflo = current->thread.fpu.hard.fp_regs[srcreg];
+				bufhi = current->thread.fpu.hard.fp_regs[srcreg+1];
+			} else {
+#if defined(CONFIG_LITTLE_ENDIAN)
+				bufhi = current->thread.fpu.hard.fp_regs[srcreg];
+				buflo = current->thread.fpu.hard.fp_regs[srcreg+1];
+#else
+				buflo = current->thread.fpu.hard.fp_regs[srcreg];
+				bufhi = current->thread.fpu.hard.fp_regs[srcreg+1];
+#endif
+			}
+			break;
+		default:
+			printk("Unexpected width_shift %d in misaligned_fpu_store, PC=%08lx\n",
+				width_shift, (unsigned long) regs->pc);
+			break;
+		}
+
+		*(__u32*) &buffer = buflo;
+		*(1 + (__u32*) &buffer) = bufhi;
+		if (__copy_user((void *)(int)address, &buffer, (1 << width_shift)) > 0) {
+			return -1; /* fault */
+		}
+		return 0;
+	} else {
+		die ("Misaligned FPU load inside kernel", regs, 0);
+		return -1;
+	}
+}
+#endif
+
+static int misaligned_fixup(struct pt_regs *regs)
+{
+	unsigned long opcode;
+	int error;
+	int major, minor;
+
+#if !defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+	/* Never fixup user mode misaligned accesses without this option enabled. */
+	return -1;
+#else
+	if (!user_mode_unaligned_fixup_enable) return -1;
+#endif
+
+	error = read_opcode(regs->pc, &opcode, user_mode(regs));
+	if (error < 0) {
+		return error;
+	}
+	major = (opcode >> 26) & 0x3f;
+	minor = (opcode >> 16) & 0xf;
+
+#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+	if (user_mode(regs) && (user_mode_unaligned_fixup_count > 0)) {
+		--user_mode_unaligned_fixup_count;
+		/* Only do 'count' worth of these reports, to remove a potential DoS against syslog */
+		printk("Fixing up unaligned userspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
+		       current->comm, current->pid, (__u32)regs->pc, opcode);
+	} else
+#endif
+	if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
+		--kernel_mode_unaligned_fixup_count;
+		if (in_interrupt()) {
+			printk("Fixing up unaligned kernelspace access in interrupt pc=0x%08x ins=0x%08lx\n",
+			       (__u32)regs->pc, opcode);
+		} else {
+			printk("Fixing up unaligned kernelspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
+			       current->comm, current->pid, (__u32)regs->pc, opcode);
+		}
+	}
+
+
+	switch (major) {
+		case (0x84>>2): /* LD.W */
+			error = misaligned_load(regs, opcode, 1, 1, 1);
+			break;
+		case (0xb0>>2): /* LD.UW */
+			error = misaligned_load(regs, opcode, 1, 1, 0);
+			break;
+		case (0x88>>2): /* LD.L */
+			error = misaligned_load(regs, opcode, 1, 2, 1);
+			break;
+		case (0x8c>>2): /* LD.Q */
+			error = misaligned_load(regs, opcode, 1, 3, 0);
+			break;
+
+		case (0xa4>>2): /* ST.W */
+			error = misaligned_store(regs, opcode, 1, 1);
+			break;
+		case (0xa8>>2): /* ST.L */
+			error = misaligned_store(regs, opcode, 1, 2);
+			break;
+		case (0xac>>2): /* ST.Q */
+			error = misaligned_store(regs, opcode, 1, 3);
+			break;
+
+		case (0x40>>2): /* indexed loads */
+			switch (minor) {
+				case 0x1: /* LDX.W */
+					error = misaligned_load(regs, opcode, 0, 1, 1);
+					break;
+				case 0x5: /* LDX.UW */
+					error = misaligned_load(regs, opcode, 0, 1, 0);
+					break;
+				case 0x2: /* LDX.L */
+					error = misaligned_load(regs, opcode, 0, 2, 1);
+					break;
+				case 0x3: /* LDX.Q */
+					error = misaligned_load(regs, opcode, 0, 3, 0);
+					break;
+				default:
+					error = -1;
+					break;
+			}
+			break;
+
+		case (0x60>>2): /* indexed stores */
+			switch (minor) {
+				case 0x1: /* STX.W */
+					error = misaligned_store(regs, opcode, 0, 1);
+					break;
+				case 0x2: /* STX.L */
+					error = misaligned_store(regs, opcode, 0, 2);
+					break;
+				case 0x3: /* STX.Q */
+					error = misaligned_store(regs, opcode, 0, 3);
+					break;
+				default:
+					error = -1;
+					break;
+			}
+			break;
+
+#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+		case (0x94>>2): /* FLD.S */
+			error = misaligned_fpu_load(regs, opcode, 1, 2, 0);
+			break;
+		case (0x98>>2): /* FLD.P */
+			error = misaligned_fpu_load(regs, opcode, 1, 3, 1);
+			break;
+		case (0x9c>>2): /* FLD.D */
+			error = misaligned_fpu_load(regs, opcode, 1, 3, 0);
+			break;
+		case (0x1c>>2): /* floating indexed loads */
+			switch (minor) {
+			case 0x8: /* FLDX.S */
+				error = misaligned_fpu_load(regs, opcode, 0, 2, 0);
+				break;
+			case 0xd: /* FLDX.P */
+				error = misaligned_fpu_load(regs, opcode, 0, 3, 1);
+				break;
+			case 0x9: /* FLDX.D */
+				error = misaligned_fpu_load(regs, opcode, 0, 3, 0);
+				break;
+			default:
+				error = -1;
+				break;
+			}
+			break;
+		case (0xb4>>2): /* FLD.S */
+			error = misaligned_fpu_store(regs, opcode, 1, 2, 0);
+			break;
+		case (0xb8>>2): /* FLD.P */
+			error = misaligned_fpu_store(regs, opcode, 1, 3, 1);
+			break;
+		case (0xbc>>2): /* FLD.D */
+			error = misaligned_fpu_store(regs, opcode, 1, 3, 0);
+			break;
+		case (0x3c>>2): /* floating indexed stores */
+			switch (minor) {
+			case 0x8: /* FSTX.S */
+				error = misaligned_fpu_store(regs, opcode, 0, 2, 0);
+				break;
+			case 0xd: /* FSTX.P */
+				error = misaligned_fpu_store(regs, opcode, 0, 3, 1);
+				break;
+			case 0x9: /* FSTX.D */
+				error = misaligned_fpu_store(regs, opcode, 0, 3, 0);
+				break;
+			default:
+				error = -1;
+				break;
+			}
+			break;
+#endif
+
+		default:
+			/* Fault */
+			error = -1;
+			break;
+	}
+
+	if (error < 0) {
+		return error;
+	} else {
+		regs->pc += 4; /* Skip the instruction that's just been emulated */
+		return 0;
+	}
+
+}
+
+static ctl_table unaligned_table[] = {
+	{1, "kernel_reports", &kernel_mode_unaligned_fixup_count,
+		sizeof(int), 0644, NULL, &proc_dointvec},
+#if defined(CONFIG_SH64_USER_MISALIGNED_FIXUP)
+	{2, "user_reports", &user_mode_unaligned_fixup_count,
+		sizeof(int), 0644, NULL, &proc_dointvec},
+	{3, "user_enable", &user_mode_unaligned_fixup_enable,
+		sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
+	{0}
+};
+
+static ctl_table unaligned_root[] = {
+	{1, "unaligned_fixup", NULL, 0, 0555, unaligned_table},
+	{0}
+};
+
+static ctl_table sh64_root[] = {
+	{1, "sh64", NULL, 0, 0555, unaligned_root},
+	{0}
+};
+static struct ctl_table_header *sysctl_header;
+static int __init init_sysctl(void)
+{
+	sysctl_header = register_sysctl_table(sh64_root, 0);
+	return 0;
+}
+
+__initcall(init_sysctl);
+
+
+asmlinkage void do_debug_interrupt(unsigned long code, struct pt_regs *regs)
+{
+	u64 peek_real_address_q(u64 addr);
+	u64 poke_real_address_q(u64 addr, u64 val);
+	unsigned long long DM_EXP_CAUSE_PHY = 0x0c100010;
+	unsigned long long exp_cause;
+	/* It's not worth ioremapping the debug module registers for the amount
+	   of access we make to them - just go direct to their physical
+	   addresses. */
+	exp_cause = peek_real_address_q(DM_EXP_CAUSE_PHY);
+	if (exp_cause & ~4) {
+		printk("DM.EXP_CAUSE had unexpected bits set (=%08lx)\n",
+			(unsigned long)(exp_cause & 0xffffffff));
+	}
+	show_state();
+	/* Clear all DEBUGINT causes */
+	poke_real_address_q(DM_EXP_CAUSE_PHY, 0x0);
+}
+
diff --git a/arch/sh64/kernel/unwind.c b/arch/sh64/kernel/unwind.c
new file mode 100644
index 0000000..f934f97
--- /dev/null
+++ b/arch/sh64/kernel/unwind.c
@@ -0,0 +1,326 @@
+/*
+ * arch/sh64/kernel/unwind.c
+ *
+ * Copyright (C) 2004  Paul Mundt
+ * Copyright (C) 2004  Richard Curnow
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+
+static u8 regcache[63];
+
+/*
+ * Finding the previous stack frame isn't horribly straightforward as it is
+ * on some other platforms. In the sh64 case, we don't have "linked" stack
+ * frames, so we need to do a bit of work to determine the previous frame,
+ * and in turn, the previous r14/r18 pair.
+ *
+ * There are generally a few cases which determine where we can find out
+ * the r14/r18 values. In the general case, this can be determined by poking
+ * around the prologue of the symbol PC is in (note that we absolutely must
+ * have frame pointer support as well as the kernel symbol table mapped,
+ * otherwise we can't even get this far).
+ *
+ * In other cases, such as the interrupt/exception path, we can poke around
+ * the sp/fp.
+ *
+ * Notably, this entire approach is somewhat error prone, and in the event
+ * that the previous frame cannot be determined, that's all we can do.
+ * Either way, this still leaves us with a more correct backtrace then what
+ * we would be able to come up with by walking the stack (which is garbage
+ * for anything beyond the first frame).
+ *						-- PFM.
+ */
+static int lookup_prev_stack_frame(unsigned long fp, unsigned long pc,
+		      unsigned long *pprev_fp, unsigned long *pprev_pc,
+		      struct pt_regs *regs)
+{
+	const char *sym;
+	char *modname, namebuf[128];
+	unsigned long offset, size;
+	unsigned long prologue = 0;
+	unsigned long fp_displacement = 0;
+	unsigned long fp_prev = 0;
+	unsigned long offset_r14 = 0, offset_r18 = 0;
+	int i, found_prologue_end = 0;
+
+	sym = kallsyms_lookup(pc, &size, &offset, &modname, namebuf);
+	if (!sym)
+		return -EINVAL;
+
+	prologue = pc - offset;
+	if (!prologue)
+		return -EINVAL;
+
+	/* Validate fp, to avoid risk of dereferencing a bad pointer later.
+	   Assume 128Mb since that's the amount of RAM on a Cayman.  Modify
+	   when there is an SH-5 board with more. */
+	if ((fp < (unsigned long) phys_to_virt(__MEMORY_START)) ||
+	    (fp >= (unsigned long)(phys_to_virt(__MEMORY_START)) + 128*1024*1024) ||
+	    ((fp & 7) != 0)) {
+		return -EINVAL;
+	}
+
+	/*
+	 * Depth to walk, depth is completely arbitrary.
+	 */
+	for (i = 0; i < 100; i++, prologue += sizeof(unsigned long)) {
+		unsigned long op;
+		u8 major, minor;
+		u8 src, dest, disp;
+
+		op = *(unsigned long *)prologue;
+
+		major = (op >> 26) & 0x3f;
+		src   = (op >> 20) & 0x3f;
+		minor = (op >> 16) & 0xf;
+		disp  = (op >> 10) & 0x3f;
+		dest  = (op >>  4) & 0x3f;
+
+		/*
+		 * Stack frame creation happens in a number of ways.. in the
+		 * general case when the stack frame is less than 511 bytes,
+		 * it's generally created by an addi or addi.l:
+		 *
+		 *	addi/addi.l r15, -FRAME_SIZE, r15
+		 *
+		 * in the event that the frame size is bigger than this, it's
+		 * typically created using a movi/sub pair as follows:
+		 *
+		 *	movi	FRAME_SIZE, rX
+		 *	sub	r15, rX, r15
+		 */
+
+		switch (major) {
+		case (0x00 >> 2):
+			switch (minor) {
+			case 0x8: /* add.l */
+			case 0x9: /* add */
+				/* Look for r15, r63, r14 */
+				if (src == 15 && disp == 63 && dest == 14)
+					found_prologue_end = 1;
+
+				break;
+			case 0xa: /* sub.l */
+			case 0xb: /* sub */
+				if (src != 15 || dest != 15)
+					continue;
+
+				fp_displacement -= regcache[disp];
+				fp_prev = fp - fp_displacement;
+				break;
+			}
+			break;
+		case (0xa8 >> 2): /* st.l */
+			if (src != 15)
+				continue;
+
+			switch (dest) {
+			case 14:
+				if (offset_r14 || fp_displacement == 0)
+					continue;
+
+				offset_r14 = (u64)(((((s64)op >> 10) & 0x3ff) << 54) >> 54);
+				offset_r14 *= sizeof(unsigned long);
+				offset_r14 += fp_displacement;
+				break;
+			case 18:
+				if (offset_r18 || fp_displacement == 0)
+					continue;
+
+				offset_r18 = (u64)(((((s64)op >> 10) & 0x3ff) << 54) >> 54);
+				offset_r18 *= sizeof(unsigned long);
+				offset_r18 += fp_displacement;
+				break;
+			}
+
+			break;
+		case (0xcc >> 2): /* movi */
+			if (dest >= 63) {
+				printk(KERN_NOTICE "%s: Invalid dest reg %d "
+				       "specified in movi handler. Failed "
+				       "opcode was 0x%lx: ", __FUNCTION__,
+				       dest, op);
+
+				continue;
+			}
+
+			/* Sign extend */
+			regcache[dest] =
+				((((s64)(u64)op >> 10) & 0xffff) << 54) >> 54;
+			break;
+		case (0xd0 >> 2): /* addi */
+		case (0xd4 >> 2): /* addi.l */
+			/* Look for r15, -FRAME_SIZE, r15 */
+			if (src != 15 || dest != 15)
+				continue;
+
+			/* Sign extended frame size.. */
+			fp_displacement +=
+				(u64)(((((s64)op >> 10) & 0x3ff) << 54) >> 54);
+			fp_prev = fp - fp_displacement;
+			break;
+		}
+
+		if (found_prologue_end && offset_r14 && (offset_r18 || *pprev_pc) && fp_prev)
+			break;
+	}
+
+	if (offset_r14 == 0 || fp_prev == 0) {
+		if (!offset_r14)
+			pr_debug("Unable to find r14 offset\n");
+		if (!fp_prev)
+			pr_debug("Unable to find previous fp\n");
+
+		return -EINVAL;
+	}
+
+	/* For innermost leaf function, there might not be a offset_r18 */
+	if (!*pprev_pc && (offset_r18 == 0))
+		return -EINVAL;
+
+	*pprev_fp = *(unsigned long *)(fp_prev + offset_r14);
+
+	if (offset_r18)
+		*pprev_pc = *(unsigned long *)(fp_prev + offset_r18);
+
+	*pprev_pc &= ~1;
+
+	return 0;
+}
+
+/* Don't put this on the stack since we'll want to call sh64_unwind
+ * when we're close to underflowing the stack anyway. */
+static struct pt_regs here_regs;
+
+extern const char syscall_ret;
+extern const char ret_from_syscall;
+extern const char ret_from_exception;
+extern const char ret_from_irq;
+
+static void sh64_unwind_inner(struct pt_regs *regs);
+
+static void unwind_nested (unsigned long pc, unsigned long fp)
+{
+	if ((fp >= __MEMORY_START) &&
+	    ((fp & 7) == 0)) {
+		sh64_unwind_inner((struct pt_regs *) fp);
+	}
+}
+
+static void sh64_unwind_inner(struct pt_regs *regs)
+{
+	unsigned long pc, fp;
+	int ofs = 0;
+	int first_pass;
+
+	pc = regs->pc & ~1;
+	fp = regs->regs[14];
+
+	first_pass = 1;
+	for (;;) {
+		int cond;
+		unsigned long next_fp, next_pc;
+
+		if (pc == ((unsigned long) &syscall_ret & ~1)) {
+			printk("SYSCALL\n");
+			unwind_nested(pc,fp);
+			return;
+		}
+
+		if (pc == ((unsigned long) &ret_from_syscall & ~1)) {
+			printk("SYSCALL (PREEMPTED)\n");
+			unwind_nested(pc,fp);
+			return;
+		}
+
+		/* In this case, the PC is discovered by lookup_prev_stack_frame but
+		   it has 4 taken off it to look like the 'caller' */
+		if (pc == ((unsigned long) &ret_from_exception & ~1)) {
+			printk("EXCEPTION\n");
+			unwind_nested(pc,fp);
+			return;
+		}
+
+		if (pc == ((unsigned long) &ret_from_irq & ~1)) {
+			printk("IRQ\n");
+			unwind_nested(pc,fp);
+			return;
+		}
+
+		cond = ((pc >= __MEMORY_START) && (fp >= __MEMORY_START) &&
+			((pc & 3) == 0) && ((fp & 7) == 0));
+
+		pc -= ofs;
+
+		printk("[<%08lx>] ", pc);
+		print_symbol("%s\n", pc);
+
+		if (first_pass) {
+			/* If the innermost frame is a leaf function, it's
+			 * possible that r18 is never saved out to the stack.
+			 */
+			next_pc = regs->regs[18];
+		} else {
+			next_pc = 0;
+		}
+
+		if (lookup_prev_stack_frame(fp, pc, &next_fp, &next_pc, regs) == 0) {
+			ofs = sizeof(unsigned long);
+			pc = next_pc & ~1;
+			fp = next_fp;
+		} else {
+			printk("Unable to lookup previous stack frame\n");
+			break;
+		}
+		first_pass = 0;
+	}
+
+	printk("\n");
+
+}
+
+void sh64_unwind(struct pt_regs *regs)
+{
+	if (!regs) {
+		/*
+		 * Fetch current regs if we have no other saved state to back
+		 * trace from.
+		 */
+		regs = &here_regs;
+
+		__asm__ __volatile__ ("ori r14, 0, %0" : "=r" (regs->regs[14]));
+		__asm__ __volatile__ ("ori r15, 0, %0" : "=r" (regs->regs[15]));
+		__asm__ __volatile__ ("ori r18, 0, %0" : "=r" (regs->regs[18]));
+
+		__asm__ __volatile__ ("gettr tr0, %0" : "=r" (regs->tregs[0]));
+		__asm__ __volatile__ ("gettr tr1, %0" : "=r" (regs->tregs[1]));
+		__asm__ __volatile__ ("gettr tr2, %0" : "=r" (regs->tregs[2]));
+		__asm__ __volatile__ ("gettr tr3, %0" : "=r" (regs->tregs[3]));
+		__asm__ __volatile__ ("gettr tr4, %0" : "=r" (regs->tregs[4]));
+		__asm__ __volatile__ ("gettr tr5, %0" : "=r" (regs->tregs[5]));
+		__asm__ __volatile__ ("gettr tr6, %0" : "=r" (regs->tregs[6]));
+		__asm__ __volatile__ ("gettr tr7, %0" : "=r" (regs->tregs[7]));
+
+		__asm__ __volatile__ (
+			"pta 0f, tr0\n\t"
+			"blink tr0, %0\n\t"
+			"0: nop"
+			: "=r" (regs->pc)
+		);
+	}
+
+	printk("\nCall Trace:\n");
+	sh64_unwind_inner(regs);
+}
+
diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S
new file mode 100644
index 0000000..7d9f7a6
--- /dev/null
+++ b/arch/sh64/kernel/vmlinux.lds.S
@@ -0,0 +1,181 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh5/vmlinux.lds.S
+ *
+ * ld script to make ST50 Linux kernel
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * benedict.gaster@superh.com:	 2nd May 2002
+ *    Add definition of empty_zero_page to be the first page of kernel image.
+ *
+ * benedict.gaster@superh.com:	 3rd May 2002
+ *    Added support for ramdisk, removing statically linked romfs at the same time.
+ *
+ * lethal@linux-sh.org:          9th May 2003
+ *    Kill off GLOBAL_NAME() usage and other CDC-isms.
+ *
+ * lethal@linux-sh.org:         19th May 2003
+ *    Remove support for ancient toolchains.
+ */
+
+#include <linux/config.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+#define LOAD_OFFSET	CONFIG_CACHED_MEMORY_OFFSET
+#include <asm-generic/vmlinux.lds.h>
+
+#ifdef NOTDEF
+#ifdef CONFIG_LITTLE_ENDIAN
+OUTPUT_FORMAT("elf32-sh64l-linux", "elf32-sh64l-linux", "elf32-sh64l-linux")
+#else
+OUTPUT_FORMAT("elf32-sh64", "elf32-sh64", "elf32-sh64")
+#endif
+#endif
+
+OUTPUT_ARCH(sh:sh5)
+
+#define C_PHYS(x) AT (ADDR(x) - LOAD_OFFSET)
+
+ENTRY(__start)
+SECTIONS
+{
+  . = CONFIG_CACHED_MEMORY_OFFSET + CONFIG_MEMORY_START + PAGE_SIZE;
+  _text = .;			/* Text and read-only data */
+  text = .;			/* Text and read-only data */
+
+  .empty_zero_page : C_PHYS(.empty_zero_page) {
+	*(.empty_zero_page)
+	} = 0
+
+  .text : C_PHYS(.text) {
+	*(.text)
+	*(.text64)
+        *(.text..SHmedia32)
+	SCHED_TEXT
+	LOCK_TEXT
+	*(.fixup)
+	*(.gnu.warning)
+#ifdef CONFIG_LITTLE_ENDIAN
+	} = 0x6ff0fff0
+#else
+	} = 0xf0fff06f
+#endif
+
+  /* We likely want __ex_table to be Cache Line aligned */
+  . = ALIGN(L1_CACHE_BYTES);		/* Exception table */
+  __start___ex_table = .;
+  __ex_table : C_PHYS(__ex_table) { *(__ex_table) }
+  __stop___ex_table = .;
+
+  RODATA
+
+  _etext = .;			/* End of text section */
+
+  .data : C_PHYS(.data) {			/* Data */
+	*(.data)
+	CONSTRUCTORS
+	}
+
+  . = ALIGN(PAGE_SIZE);
+  .data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) }
+
+  . = ALIGN(L1_CACHE_BYTES);
+  __per_cpu_start = .;
+  .data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) }
+  __per_cpu_end = . ;
+  .data.cacheline_aligned : C_PHYS(.data.cacheline_aligned) { *(.data.cacheline_aligned) }
+
+  _edata = .;			/* End of data section */
+
+  . = ALIGN(THREAD_SIZE);	/* init_task: structure size aligned */
+  .data.init_task : C_PHYS(.data.init_task) { *(.data.init_task) }
+
+  . = ALIGN(PAGE_SIZE);		/* Init code and data */
+  __init_begin = .;
+  _sinittext = .;
+  .init.text : C_PHYS(.init.text) { *(.init.text) }
+  _einittext = .;
+  .init.data : C_PHYS(.init.data) { *(.init.data) }
+  . = ALIGN(L1_CACHE_BYTES);	/* Better if Cache Line aligned */
+  __setup_start = .;
+  .init.setup : C_PHYS(.init.setup) { *(.init.setup) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : C_PHYS(.initcall.init) {
+  	*(.initcall1.init)
+  	*(.initcall2.init)
+  	*(.initcall3.init)
+  	*(.initcall4.init)
+  	*(.initcall5.init)
+  	*(.initcall6.init)
+  	*(.initcall7.init)
+  }
+  __initcall_end = .;
+  __con_initcall_start = .;
+  .con_initcall.init : C_PHYS(.con_initcall.init) { *(.con_initcall.init) }
+  __con_initcall_end = .;
+  SECURITY_INIT
+  __initramfs_start = .;
+  .init.ramfs : C_PHYS(.init.ramfs) { *(.init.ramfs) }
+  __initramfs_end = .;
+  . = ALIGN(PAGE_SIZE);
+  __init_end = .;
+
+  /* Align to the biggest single data representation, head and tail */
+  . = ALIGN(8);
+  __bss_start = .;		/* BSS */
+  .bss : C_PHYS(.bss) {
+	*(.bss)
+	}
+  . = ALIGN(8);
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+	*(.exit.text)
+	*(.exit.data)
+	*(.exitcall.exit)
+	}
+
+  /* Stabs debugging sections.  */
+  .stab 0 : C_PHYS(.stab) { *(.stab) }
+  .stabstr 0 : C_PHYS(.stabstr) { *(.stabstr) }
+  .stab.excl 0 : C_PHYS(.stab.excl) { *(.stab.excl) }
+  .stab.exclstr 0 : C_PHYS(.stab.exclstr) { *(.stab.exclstr) }
+  .stab.index 0 : C_PHYS(.stab.index) { *(.stab.index) }
+  .stab.indexstr 0 : C_PHYS(.stab.indexstr) { *(.stab.indexstr) }
+  .comment 0 : C_PHYS(.comment) { *(.comment) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging section are relative to the beginning
+     of the section so we begin .debug at 0.  */
+  /* DWARF 1 */
+  .debug          0 : C_PHYS(.debug) { *(.debug) }
+  .line           0 : C_PHYS(.line) { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : C_PHYS(.debug_srcinfo) { *(.debug_srcinfo) }
+  .debug_sfnames  0 : C_PHYS(.debug_sfnames) { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : C_PHYS(.debug_aranges) { *(.debug_aranges) }
+  .debug_pubnames 0 : C_PHYS(.debug_pubnames) { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : C_PHYS(.debug_info) { *(.debug_info) }
+  .debug_abbrev   0 : C_PHYS(.debug_abbrev) { *(.debug_abbrev) }
+  .debug_line     0 : C_PHYS(.debug_line) { *(.debug_line) }
+  .debug_frame    0 : C_PHYS(.debug_frame) { *(.debug_frame) }
+  .debug_str      0 : C_PHYS(.debug_str) { *(.debug_str) }
+  .debug_loc      0 : C_PHYS(.debug_loc) { *(.debug_loc) }
+  .debug_macinfo  0 : C_PHYS(.debug_macinfo) { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : C_PHYS(.debug_weaknames) { *(.debug_weaknames) }
+  .debug_funcnames 0 : C_PHYS(.debug_funcnames) { *(.debug_funcnames) }
+  .debug_typenames 0 : C_PHYS(.debug_typenames) { *(.debug_typenames) }
+  .debug_varnames  0 : C_PHYS(.debug_varnames) { *(.debug_varnames) }
+  /* These must appear regardless of  .  */
+}
diff --git a/arch/sh64/lib/Makefile b/arch/sh64/lib/Makefile
new file mode 100644
index 0000000..6a4cc3f
--- /dev/null
+++ b/arch/sh64/lib/Makefile
@@ -0,0 +1,19 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2000, 2001  Paolo Alberelli
+# Coprygith (C) 2003  Paul Mundt
+#
+# Makefile for the SH-5 specific library files..
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+
+# Panic should really be compiled as PIC
+lib-y  := udelay.o c-checksum.o dbg.o io.o panic.o memcpy.o copy_user_memcpy.o \
+		page_copy.o page_clear.o iomap.o
+
diff --git a/arch/sh64/lib/c-checksum.c b/arch/sh64/lib/c-checksum.c
new file mode 100644
index 0000000..a82d8f1
--- /dev/null
+++ b/arch/sh64/lib/c-checksum.c
@@ -0,0 +1,231 @@
+/*
+ * arch/sh/lib/csum_parial.c
+ *
+ * This file contains network checksum routines that are better done
+ * in an architecture-specific manner due to speed..
+ */
+
+#undef DEBUG
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <asm/byteorder.h>
+#include <asm/uaccess.h>
+
+static inline unsigned short from64to16(unsigned long long x)
+{
+	/* add up 32-bit words for 33 bits */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* add up 16-bit and 17-bit words for 17+c bits */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up 16-bit and 2-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+static inline unsigned short foldto16(unsigned long x)
+{
+	/* add up 16-bit for 17 bits */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+static inline unsigned short myfoldto16(unsigned long long x)
+{
+	/* Fold down to 32-bits so we don't loose in the typedef-less
+	   network stack.  */
+	/* 64 to 33 */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* 33 to 32 */
+	x = (x & 0xffffffff) + (x >> 32);
+
+	/* add up 16-bit for 17 bits */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+#define odd(x) ((x)&1)
+#define U16(x) ntohs(x)
+
+static unsigned long do_csum(const unsigned char *buff, int len)
+{
+	int odd, count;
+	unsigned long result = 0;
+
+	pr_debug("do_csum buff %p, len %d (0x%x)\n", buff, len, len);
+#ifdef DEBUG
+	for (i = 0; i < len; i++) {
+		if ((i % 26) == 0)
+			printk("\n");
+		printk("%02X ", buff[i]);
+	}
+#endif
+
+	if (len <= 0)
+		goto out;
+
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = *buff << 8;
+		len--;
+		buff++;
+	}
+	count = len >> 1;	/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(unsigned short *) buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;	/* nr of 32-bit words.. */
+		if (count) {
+			unsigned long carry = 0;
+			do {
+				unsigned long w = *(unsigned long *) buff;
+				buff += 4;
+				count--;
+				result += carry;
+				result += w;
+				carry = (w > result);
+			} while (count);
+			result += carry;
+			result = (result & 0xffff) + (result >> 16);
+		}
+		if (len & 2) {
+			result += *(unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += *buff;
+	result = foldto16(result);
+	if (odd)
+		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+
+	pr_debug("\nCHECKSUM is 0x%x\n", result);
+
+      out:
+	return result;
+}
+
+/* computes the checksum of a memory block at buff, length len,
+   and adds in "sum" (32-bit)  */
+unsigned int csum_partial(const unsigned char *buff, int len, unsigned int sum)
+{
+	unsigned long long result = do_csum(buff, len);
+
+	/* add in old sum, and carry.. */
+	result += sum;
+	/* 32+c bits -> 32 bits */
+	result = (result & 0xffffffff) + (result >> 32);
+
+	pr_debug("csum_partial, buff %p len %d sum 0x%x result=0x%016Lx\n",
+		buff, len, sum, result);
+
+	return result;
+}
+
+/* Copy while checksumming, otherwise like csum_partial.  */
+unsigned int
+csum_partial_copy(const unsigned char *src, unsigned char *dst, int len, unsigned int sum)
+{
+	sum = csum_partial(src, len, sum);
+	memcpy(dst, src, len);
+
+	return sum;
+}
+
+/* Copy from userspace and compute checksum.  If we catch an exception
+   then zero the rest of the buffer.  */
+unsigned int
+csum_partial_copy_from_user(const unsigned char *src, unsigned char *dst, int len,
+			    unsigned int sum, int *err_ptr)
+{
+	int missing;
+
+	pr_debug
+	    ("csum_partial_copy_from_user src %p, dest %p, len %d, sum %08x, err_ptr %p\n",
+	     src, dst, len, sum, err_ptr);
+	missing = copy_from_user(dst, src, len);
+	pr_debug("  access_ok %d\n", __access_ok((unsigned long) src, len));
+	pr_debug("  missing %d\n", missing);
+	if (missing) {
+		memset(dst + len - missing, 0, missing);
+		*err_ptr = -EFAULT;
+	}
+
+	return csum_partial(dst, len, sum);
+}
+
+/* Copy to userspace and compute checksum.  */
+unsigned int
+csum_partial_copy_to_user(const unsigned char *src, unsigned char *dst, int len,
+			  unsigned int sum, int *err_ptr)
+{
+	sum = csum_partial(src, len, sum);
+
+	if (copy_to_user(dst, src, len))
+		*err_ptr = -EFAULT;
+
+	return sum;
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ */
+unsigned short ip_fast_csum(unsigned char *iph, unsigned int ihl)
+{
+	pr_debug("ip_fast_csum %p,%d\n", iph, ihl);
+
+	return ~do_csum(iph, ihl * 4);
+}
+
+unsigned int csum_tcpudp_nofold(unsigned long saddr,
+				unsigned long daddr,
+				unsigned short len,
+				unsigned short proto, unsigned int sum)
+{
+	unsigned long long result;
+
+	pr_debug("ntohs(0x%x)=0x%x\n", 0xdead, ntohs(0xdead));
+	pr_debug("htons(0x%x)=0x%x\n", 0xdead, htons(0xdead));
+
+	result = ((unsigned long long) saddr +
+		  (unsigned long long) daddr +
+		  (unsigned long long) sum +
+		  ((unsigned long long) ntohs(len) << 16) +
+		  ((unsigned long long) proto << 8));
+
+	/* Fold down to 32-bits so we don't loose in the typedef-less
+	   network stack.  */
+	/* 64 to 33 */
+	result = (result & 0xffffffff) + (result >> 32);
+	/* 33 to 32 */
+	result = (result & 0xffffffff) + (result >> 32);
+
+	pr_debug("%s saddr %x daddr %x len %x proto %x sum %x result %08Lx\n",
+		__FUNCTION__, saddr, daddr, len, proto, sum, result);
+
+	return result;
+}
+
+// Post SIM:
+unsigned int
+csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len, unsigned int sum)
+{
+	//  unsigned dummy;
+	pr_debug("csum_partial_copy_nocheck src %p dst %p len %d\n", src, dst,
+		len);
+
+	return csum_partial_copy(src, dst, len, sum);
+}
diff --git a/arch/sh64/lib/copy_user_memcpy.S b/arch/sh64/lib/copy_user_memcpy.S
new file mode 100644
index 0000000..2a62816
--- /dev/null
+++ b/arch/sh64/lib/copy_user_memcpy.S
@@ -0,0 +1,217 @@
+!
+! Fast SH memcpy
+!
+! by Toshiyasu Morita (tm@netcom.com)
+! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
+! SH5 code Copyright 2002 SuperH Ltd.
+!
+! Entry: ARG0: destination pointer
+!        ARG1: source pointer
+!        ARG2: byte count
+!
+! Exit:  RESULT: destination pointer
+!        any other registers in the range r0-r7: trashed
+!
+! Notes: Usually one wants to do small reads and write a longword, but
+!        unfortunately it is difficult in some cases to concatanate bytes
+!        into a longword on the SH, so this does a longword read and small
+!        writes.
+!
+! This implementation makes two assumptions about how it is called:
+!
+! 1.: If the byte count is nonzero, the address of the last byte to be
+!     copied is unsigned greater than the address of the first byte to
+!     be copied.  This could be easily swapped for a signed comparison,
+!     but the algorithm used needs some comparison.
+!
+! 2.: When there are two or three bytes in the last word of an 11-or-more
+!     bytes memory chunk to b copied, the rest of the word can be read
+!     without side effects.
+!     This could be easily changed by increasing the minumum size of
+!     a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
+!     however, this would cost a few extra cyles on average.
+!     For SHmedia, the assumption is that any quadword can be read in its
+!     enirety if at least one byte is included in the copy.
+
+/* Imported into Linux kernel by Richard Curnow.  This is used to implement the
+   __copy_user function in the general case, so it has to be a distinct
+   function from intra-kernel memcpy to allow for exception fix-ups in the
+   event that the user pointer is bad somewhere in the copy (e.g. due to
+   running off the end of the vma).
+
+   Note, this algorithm will be slightly wasteful in the case where the source
+   and destination pointers are equally aligned, because the stlo/sthi pairs
+   could then be merged back into single stores.  If there are a lot of cache
+   misses, this is probably offset by the stall lengths on the preloads.
+
+*/
+
+/* NOTE : Prefetches removed and allocos guarded by synco to avoid TAKum03020
+ * erratum.  The first two prefetches are nop-ed out to avoid upsetting the
+ * instruction counts used in the jump address calculation.
+ * */
+
+	.section .text..SHmedia32,"ax"
+	.little
+	.balign 32
+	.global copy_user_memcpy
+	.global copy_user_memcpy_end
+copy_user_memcpy:
+
+#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
+#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
+#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
+#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
+
+	nop ! ld.b r3,0,r63 ! TAKum03020
+	pta/l Large,tr0
+	movi 25,r0
+	bgeu/u r4,r0,tr0
+	nsb r4,r0
+	shlli r0,5,r0
+	movi (L1-L0+63*32 + 1) & 0xffff,r1
+	sub r1, r0, r0
+L0:	ptrel r0,tr0
+	add r2,r4,r5
+	ptabs r18,tr1
+	add r3,r4,r6
+	blink tr0,r63
+
+/* Rearranged to make cut2 safe */
+	.balign 8
+L4_7:	/* 4..7 byte memcpy cntd. */
+	stlo.l r2, 0, r0
+	or r6, r7, r6
+	sthi.l r5, -1, r6
+	stlo.l r5, -4, r6
+	blink tr1,r63
+
+	.balign 8
+L1:	/* 0 byte memcpy */
+	nop
+	blink tr1,r63
+	nop
+	nop
+	nop
+	nop
+
+L2_3:	/* 2 or 3 byte memcpy cntd. */
+	st.b r5,-1,r6
+	blink tr1,r63
+
+	/* 1 byte memcpy */
+	ld.b r3,0,r0
+	st.b r2,0,r0
+	blink tr1,r63
+
+L8_15:	/* 8..15 byte memcpy cntd. */
+	stlo.q r2, 0, r0
+	or r6, r7, r6
+	sthi.q r5, -1, r6
+	stlo.q r5, -8, r6
+	blink tr1,r63
+
+	/* 2 or 3 byte memcpy */
+	ld.b r3,0,r0
+	nop ! ld.b r2,0,r63 ! TAKum03020
+	ld.b r3,1,r1
+	st.b r2,0,r0
+	pta/l L2_3,tr0
+	ld.b r6,-1,r6
+	st.b r2,1,r1
+	blink tr0, r63
+
+	/* 4 .. 7 byte memcpy */
+	LDUAL (r3, 0, r0, r1)
+	pta L4_7, tr0
+	ldlo.l r6, -4, r7
+	or r0, r1, r0
+	sthi.l r2, 3, r0
+	ldhi.l r6, -1, r6
+	blink tr0, r63
+
+	/* 8 .. 15 byte memcpy */
+	LDUAQ (r3, 0, r0, r1)
+	pta L8_15, tr0
+	ldlo.q r6, -8, r7
+	or r0, r1, r0
+	sthi.q r2, 7, r0
+	ldhi.q r6, -1, r6
+	blink tr0, r63
+
+	/* 16 .. 24 byte memcpy */
+	LDUAQ (r3, 0, r0, r1)
+	LDUAQ (r3, 8, r8, r9)
+	or r0, r1, r0
+	sthi.q r2, 7, r0
+	or r8, r9, r8
+	sthi.q r2, 15, r8
+	ldlo.q r6, -8, r7
+	ldhi.q r6, -1, r6
+	stlo.q r2, 8, r8
+	stlo.q r2, 0, r0
+	or r6, r7, r6
+	sthi.q r5, -1, r6
+	stlo.q r5, -8, r6
+	blink tr1,r63
+
+Large:
+	! ld.b r2, 0, r63 ! TAKum03020
+	pta/l  Loop_ua, tr1
+	ori r3, -8, r7
+	sub r2, r7, r22
+	sub r3, r2, r6
+	add r2, r4, r5
+	ldlo.q r3, 0, r0
+	addi r5, -16, r5
+	movi 64+8, r27 ! could subtract r7 from that.
+	stlo.q r2, 0, r0
+	sthi.q r2, 7, r0
+	ldx.q r22, r6, r0
+	bgtu/l r27, r4, tr1
+
+	addi r5, -48, r27
+	pta/l Loop_line, tr0
+	addi r6, 64, r36
+	addi r6, -24, r19
+	addi r6, -16, r20
+	addi r6, -8, r21
+
+Loop_line:
+	! ldx.q r22, r36, r63 ! TAKum03020
+	alloco r22, 32
+	synco
+	addi r22, 32, r22
+	ldx.q r22, r19, r23
+	sthi.q r22, -25, r0
+	ldx.q r22, r20, r24
+	ldx.q r22, r21, r25
+	stlo.q r22, -32, r0
+	ldx.q r22, r6,  r0
+	sthi.q r22, -17, r23
+	sthi.q r22,  -9, r24
+	sthi.q r22,  -1, r25
+	stlo.q r22, -24, r23
+	stlo.q r22, -16, r24
+	stlo.q r22,  -8, r25
+	bgeu r27, r22, tr0
+
+Loop_ua:
+	addi r22, 8, r22
+	sthi.q r22, -1, r0
+	stlo.q r22, -8, r0
+	ldx.q r22, r6, r0
+	bgtu/l r5, r22, tr1
+
+	add r3, r4, r7
+	ldlo.q r7, -8, r1
+	sthi.q r22, 7, r0
+	ldhi.q r7, -1, r7
+	ptabs r18,tr1
+	stlo.q r22, 0, r0
+	or r1, r7, r1
+	sthi.q r5, 15, r1
+	stlo.q r5, 8, r1
+	blink tr1, r63
+copy_user_memcpy_end:
+	nop
diff --git a/arch/sh64/lib/dbg.c b/arch/sh64/lib/dbg.c
new file mode 100644
index 0000000..526feda
--- /dev/null
+++ b/arch/sh64/lib/dbg.c
@@ -0,0 +1,430 @@
+/*--------------------------------------------------------------------------
+--
+-- Identity : Linux50 Debug Funcions
+--
+-- File     : arch/sh64/lib/dbg.C
+--
+-- Copyright 2000, 2001 STMicroelectronics Limited.
+-- Copyright 2004 Richard Curnow (evt_debug etc)
+--
+--------------------------------------------------------------------------*/
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+
+typedef u64 regType_t;
+
+static regType_t getConfigReg(u64 id)
+{
+	register u64 reg __asm__("r2");
+	asm volatile ("getcfg   %1, 0, %0":"=r" (reg):"r"(id));
+	return (reg);
+}
+
+/* ======================================================================= */
+
+static char *szTab[] = { "4k", "64k", "1M", "512M" };
+static char *protTab[] = { "----",
+	"---R",
+	"--X-",
+	"--XR",
+	"-W--",
+	"-W-R",
+	"-WX-",
+	"-WXR",
+	"U---",
+	"U--R",
+	"U-X-",
+	"U-XR",
+	"UW--",
+	"UW-R",
+	"UWX-",
+	"UWXR"
+};
+#define  ITLB_BASE	0x00000000
+#define  DTLB_BASE	0x00800000
+#define  MAX_TLBs		64
+/* PTE High */
+#define  GET_VALID(pte)        ((pte) & 0x1)
+#define  GET_SHARED(pte)       ((pte) & 0x2)
+#define  GET_ASID(pte)         ((pte >> 2) & 0x0ff)
+#define  GET_EPN(pte)          ((pte) & 0xfffff000)
+
+/* PTE Low */
+#define  GET_CBEHAVIOR(pte)    ((pte) & 0x3)
+#define  GET_PAGE_SIZE(pte)    szTab[((pte >> 3) & 0x3)]
+#define  GET_PROTECTION(pte)   protTab[((pte >> 6) & 0xf)]
+#define  GET_PPN(pte)          ((pte) & 0xfffff000)
+
+#define PAGE_1K_MASK           0x00000000
+#define PAGE_4K_MASK           0x00000010
+#define PAGE_64K_MASK          0x00000080
+#define MMU_PAGESIZE_MASK      (PAGE_64K_MASK | PAGE_4K_MASK)
+#define PAGE_1MB_MASK          MMU_PAGESIZE_MASK
+#define PAGE_1K                (1024)
+#define PAGE_4K                (1024 * 4)
+#define PAGE_64K               (1024 * 64)
+#define PAGE_1MB               (1024 * 1024)
+
+#define HOW_TO_READ_TLB_CONTENT  \
+       "[ ID]  PPN         EPN        ASID  Share  CB  P.Size   PROT.\n"
+
+void print_single_tlb(unsigned long tlb, int single_print)
+{
+	regType_t pteH;
+	regType_t pteL;
+	unsigned int valid, shared, asid, epn, cb, ppn;
+	char *pSize;
+	char *pProt;
+
+	/*
+	   ** in case of single print <single_print> is true, this implies:
+	   **   1) print the TLB in any case also if NOT VALID
+	   **   2) print out the header
+	 */
+
+	pteH = getConfigReg(tlb);
+	valid = GET_VALID(pteH);
+	if (single_print)
+		printk(HOW_TO_READ_TLB_CONTENT);
+	else if (!valid)
+		return;
+
+	pteL = getConfigReg(tlb + 1);
+
+	shared = GET_SHARED(pteH);
+	asid = GET_ASID(pteH);
+	epn = GET_EPN(pteH);
+	cb = GET_CBEHAVIOR(pteL);
+	pSize = GET_PAGE_SIZE(pteL);
+	pProt = GET_PROTECTION(pteL);
+	ppn = GET_PPN(pteL);
+	printk("[%c%2ld]  0x%08x  0x%08x  %03d   %02x    %02x   %4s    %s\n",
+	       ((valid) ? ' ' : 'u'), ((tlb & 0x0ffff) / TLB_STEP),
+	       ppn, epn, asid, shared, cb, pSize, pProt);
+}
+
+void print_dtlb(void)
+{
+	int count;
+	unsigned long tlb;
+
+	printk(" ================= SH-5 D-TLBs Status ===================\n");
+	printk(HOW_TO_READ_TLB_CONTENT);
+	tlb = DTLB_BASE;
+	for (count = 0; count < MAX_TLBs; count++, tlb += TLB_STEP)
+		print_single_tlb(tlb, 0);
+	printk
+	    (" =============================================================\n");
+}
+
+void print_itlb(void)
+{
+	int count;
+	unsigned long tlb;
+
+	printk(" ================= SH-5 I-TLBs Status ===================\n");
+	printk(HOW_TO_READ_TLB_CONTENT);
+	tlb = ITLB_BASE;
+	for (count = 0; count < MAX_TLBs; count++, tlb += TLB_STEP)
+		print_single_tlb(tlb, 0);
+	printk
+	    (" =============================================================\n");
+}
+
+/* ======================================================================= */
+
+#ifdef CONFIG_POOR_MANS_STRACE
+
+#include "syscalltab.h"
+
+struct ring_node {
+	int evt;
+	int ret_addr;
+	int event;
+	int tra;
+	int pid;
+	unsigned long sp;
+	unsigned long pc;
+};
+
+static struct ring_node event_ring[16];
+static int event_ptr = 0;
+
+struct stored_syscall_data {
+	int pid;
+	int syscall_number;
+};
+
+#define N_STORED_SYSCALLS 16
+
+static struct stored_syscall_data stored_syscalls[N_STORED_SYSCALLS];
+static int syscall_next=0;
+static int syscall_next_print=0;
+
+void evt_debug(int evt, int ret_addr, int event, int tra, struct pt_regs *regs)
+{
+	int syscallno = tra & 0xff;
+	unsigned long sp;
+	unsigned long stack_bottom;
+	int pid;
+	struct ring_node *rr;
+
+	pid = current->pid;
+	stack_bottom = (unsigned long) current->thread_info;
+	asm volatile("ori r15, 0, %0" : "=r" (sp));
+	rr = event_ring + event_ptr;
+	rr->evt = evt;
+	rr->ret_addr = ret_addr;
+	rr->event = event;
+	rr->tra = tra;
+	rr->pid = pid;
+	rr->sp = sp;
+	rr->pc = regs->pc;
+
+	if (sp < stack_bottom + 3092) {
+		printk("evt_debug : stack underflow report\n");
+		int i, j;
+		for (j=0, i = event_ptr; j<16; j++) {
+			rr = event_ring + i;
+			printk("evt=%08x event=%08x tra=%08x pid=%5d sp=%08lx pc=%08lx\n",
+				rr->evt, rr->event, rr->tra, rr->pid, rr->sp, rr->pc);
+			i--;
+			i &= 15;
+		}
+		panic("STACK UNDERFLOW\n");
+	}
+
+	event_ptr = (event_ptr + 1) & 15;
+
+	if ((event == 2) && (evt == 0x160)) {
+		if (syscallno < NUM_SYSCALL_INFO_ENTRIES) {
+			/* Store the syscall information to print later.  We
+			 * can't print this now - currently we're running with
+			 * SR.BL=1, so we can't take a tlbmiss (which could occur
+			 * in the console drivers under printk).
+			 *
+			 * Just overwrite old entries on ring overflow - this
+			 * is only for last-hope debugging. */
+			stored_syscalls[syscall_next].pid = current->pid;
+			stored_syscalls[syscall_next].syscall_number = syscallno;
+			syscall_next++;
+			syscall_next &= (N_STORED_SYSCALLS - 1);
+		}
+	}
+}
+
+static void drain_syscalls(void) {
+	while (syscall_next_print != syscall_next) {
+		printk("Task %d: %s()\n",
+			stored_syscalls[syscall_next_print].pid,
+			syscall_info_table[stored_syscalls[syscall_next_print].syscall_number].name);
+			syscall_next_print++;
+			syscall_next_print &= (N_STORED_SYSCALLS - 1);
+	}
+}
+
+void evt_debug2(unsigned int ret)
+{
+	drain_syscalls();
+	printk("Task %d: syscall returns %08x\n", current->pid, ret);
+}
+
+void evt_debug_ret_from_irq(struct pt_regs *regs)
+{
+	int pid;
+	struct ring_node *rr;
+
+	pid = current->pid;
+	rr = event_ring + event_ptr;
+	rr->evt = 0xffff;
+	rr->ret_addr = 0;
+	rr->event = 0;
+	rr->tra = 0;
+	rr->pid = pid;
+	rr->pc = regs->pc;
+	event_ptr = (event_ptr + 1) & 15;
+}
+
+void evt_debug_ret_from_exc(struct pt_regs *regs)
+{
+	int pid;
+	struct ring_node *rr;
+
+	pid = current->pid;
+	rr = event_ring + event_ptr;
+	rr->evt = 0xfffe;
+	rr->ret_addr = 0;
+	rr->event = 0;
+	rr->tra = 0;
+	rr->pid = pid;
+	rr->pc = regs->pc;
+	event_ptr = (event_ptr + 1) & 15;
+}
+
+#endif /* CONFIG_POOR_MANS_STRACE */
+
+/* ======================================================================= */
+
+void show_excp_regs(char *from, int trapnr, int signr, struct pt_regs *regs)
+{
+
+	unsigned long long ah, al, bh, bl, ch, cl;
+
+	printk("\n");
+	printk("EXCEPTION - %s: task %d; Linux trap # %d; signal = %d\n",
+	       ((from) ? from : "???"), current->pid, trapnr, signr);
+
+	asm volatile ("getcon   " __EXPEVT ", %0":"=r"(ah));
+	asm volatile ("getcon   " __EXPEVT ", %0":"=r"(al));
+	ah = (ah) >> 32;
+	al = (al) & 0xffffffff;
+	asm volatile ("getcon   " __KCR1 ", %0":"=r"(bh));
+	asm volatile ("getcon   " __KCR1 ", %0":"=r"(bl));
+	bh = (bh) >> 32;
+	bl = (bl) & 0xffffffff;
+	asm volatile ("getcon   " __INTEVT ", %0":"=r"(ch));
+	asm volatile ("getcon   " __INTEVT ", %0":"=r"(cl));
+	ch = (ch) >> 32;
+	cl = (cl) & 0xffffffff;
+	printk("EXPE: %08Lx%08Lx KCR1: %08Lx%08Lx INTE: %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	asm volatile ("getcon   " __PEXPEVT ", %0":"=r"(ah));
+	asm volatile ("getcon   " __PEXPEVT ", %0":"=r"(al));
+	ah = (ah) >> 32;
+	al = (al) & 0xffffffff;
+	asm volatile ("getcon   " __PSPC ", %0":"=r"(bh));
+	asm volatile ("getcon   " __PSPC ", %0":"=r"(bl));
+	bh = (bh) >> 32;
+	bl = (bl) & 0xffffffff;
+	asm volatile ("getcon   " __PSSR ", %0":"=r"(ch));
+	asm volatile ("getcon   " __PSSR ", %0":"=r"(cl));
+	ch = (ch) >> 32;
+	cl = (cl) & 0xffffffff;
+	printk("PEXP: %08Lx%08Lx PSPC: %08Lx%08Lx PSSR: %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->pc) >> 32;
+	al = (regs->pc) & 0xffffffff;
+	bh = (regs->regs[18]) >> 32;
+	bl = (regs->regs[18]) & 0xffffffff;
+	ch = (regs->regs[15]) >> 32;
+	cl = (regs->regs[15]) & 0xffffffff;
+	printk("PC  : %08Lx%08Lx LINK: %08Lx%08Lx SP  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->sr) >> 32;
+	al = (regs->sr) & 0xffffffff;
+	asm volatile ("getcon   " __TEA ", %0":"=r"(bh));
+	asm volatile ("getcon   " __TEA ", %0":"=r"(bl));
+	bh = (bh) >> 32;
+	bl = (bl) & 0xffffffff;
+	asm volatile ("getcon   " __KCR0 ", %0":"=r"(ch));
+	asm volatile ("getcon   " __KCR0 ", %0":"=r"(cl));
+	ch = (ch) >> 32;
+	cl = (cl) & 0xffffffff;
+	printk("SR  : %08Lx%08Lx TEA : %08Lx%08Lx KCR0: %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[0]) >> 32;
+	al = (regs->regs[0]) & 0xffffffff;
+	bh = (regs->regs[1]) >> 32;
+	bl = (regs->regs[1]) & 0xffffffff;
+	ch = (regs->regs[2]) >> 32;
+	cl = (regs->regs[2]) & 0xffffffff;
+	printk("R0  : %08Lx%08Lx R1  : %08Lx%08Lx R2  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[3]) >> 32;
+	al = (regs->regs[3]) & 0xffffffff;
+	bh = (regs->regs[4]) >> 32;
+	bl = (regs->regs[4]) & 0xffffffff;
+	ch = (regs->regs[5]) >> 32;
+	cl = (regs->regs[5]) & 0xffffffff;
+	printk("R3  : %08Lx%08Lx R4  : %08Lx%08Lx R5  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[6]) >> 32;
+	al = (regs->regs[6]) & 0xffffffff;
+	bh = (regs->regs[7]) >> 32;
+	bl = (regs->regs[7]) & 0xffffffff;
+	ch = (regs->regs[8]) >> 32;
+	cl = (regs->regs[8]) & 0xffffffff;
+	printk("R6  : %08Lx%08Lx R7  : %08Lx%08Lx R8  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+
+	ah = (regs->regs[9]) >> 32;
+	al = (regs->regs[9]) & 0xffffffff;
+	bh = (regs->regs[10]) >> 32;
+	bl = (regs->regs[10]) & 0xffffffff;
+	ch = (regs->regs[11]) >> 32;
+	cl = (regs->regs[11]) & 0xffffffff;
+	printk("R9  : %08Lx%08Lx R10 : %08Lx%08Lx R11 : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+	printk("....\n");
+
+	ah = (regs->tregs[0]) >> 32;
+	al = (regs->tregs[0]) & 0xffffffff;
+	bh = (regs->tregs[1]) >> 32;
+	bl = (regs->tregs[1]) & 0xffffffff;
+	ch = (regs->tregs[2]) >> 32;
+	cl = (regs->tregs[2]) & 0xffffffff;
+	printk("T0  : %08Lx%08Lx T1  : %08Lx%08Lx T2  : %08Lx%08Lx\n",
+	       ah, al, bh, bl, ch, cl);
+	printk("....\n");
+
+	print_dtlb();
+	print_itlb();
+}
+
+/* ======================================================================= */
+
+/*
+** Depending on <base> scan the MMU, Data or Instrction side
+** looking for a valid mapping matching Eaddr & asid.
+** Return -1 if not found or the TLB id entry otherwise.
+** Note: it works only for 4k pages!
+*/
+static unsigned long
+lookup_mmu_side(unsigned long base, unsigned long Eaddr, unsigned long asid)
+{
+	regType_t pteH;
+	unsigned long epn;
+	int count;
+
+	epn = Eaddr & 0xfffff000;
+
+	for (count = 0; count < MAX_TLBs; count++, base += TLB_STEP) {
+		pteH = getConfigReg(base);
+		if (GET_VALID(pteH))
+			if ((unsigned long) GET_EPN(pteH) == epn)
+				if ((unsigned long) GET_ASID(pteH) == asid)
+					break;
+	}
+	return ((unsigned long) ((count < MAX_TLBs) ? base : -1));
+}
+
+unsigned long lookup_dtlb(unsigned long Eaddr)
+{
+	unsigned long asid = get_asid();
+	return (lookup_mmu_side((u64) DTLB_BASE, Eaddr, asid));
+}
+
+unsigned long lookup_itlb(unsigned long Eaddr)
+{
+	unsigned long asid = get_asid();
+	return (lookup_mmu_side((u64) ITLB_BASE, Eaddr, asid));
+}
+
+void print_page(struct page *page)
+{
+	printk("  page[%p] -> index 0x%lx,  count 0x%x,  flags 0x%lx\n",
+	       page, page->index, page_count(page), page->flags);
+	printk("       address_space = %p, pages =%ld\n", page->mapping,
+	       page->mapping->nrpages);
+
+}
diff --git a/arch/sh64/lib/io.c b/arch/sh64/lib/io.c
new file mode 100644
index 0000000..277e11b
--- /dev/null
+++ b/arch/sh64/lib/io.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2000 David J. Mckay (david.mckay@st.com)
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * This file contains the I/O routines for use on the overdrive board
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the SuperH architecture, we just read/write the
+ * memory location directly.
+ */
+
+/* This is horrible at the moment - needs more work to do something sensible */
+#define IO_DELAY()
+
+#define OUT_DELAY(x,type) \
+void out##x##_p(unsigned type value,unsigned long port){out##x(value,port);IO_DELAY();}
+
+#define IN_DELAY(x,type) \
+unsigned type in##x##_p(unsigned long port) {unsigned type tmp=in##x(port);IO_DELAY();return tmp;}
+
+#if 1
+OUT_DELAY(b, long) OUT_DELAY(w, long) OUT_DELAY(l, long)
+ IN_DELAY(b, long) IN_DELAY(w, long) IN_DELAY(l, long)
+#endif
+/*  Now for the string version of these functions */
+void outsb(unsigned long port, const void *addr, unsigned long count)
+{
+	int i;
+	unsigned char *p = (unsigned char *) addr;
+
+	for (i = 0; i < count; i++, p++) {
+		outb(*p, port);
+	}
+}
+
+void insb(unsigned long port, void *addr, unsigned long count)
+{
+	int i;
+	unsigned char *p = (unsigned char *) addr;
+
+	for (i = 0; i < count; i++, p++) {
+		*p = inb(port);
+	}
+}
+
+/* For the 16 and 32 bit string functions, we have to worry about alignment.
+ * The SH does not do unaligned accesses, so we have to read as bytes and
+ * then write as a word or dword.
+ * This can be optimised a lot more, especially in the case where the data
+ * is aligned
+ */
+
+void outsw(unsigned long port, const void *addr, unsigned long count)
+{
+	int i;
+	unsigned short tmp;
+	unsigned char *p = (unsigned char *) addr;
+
+	for (i = 0; i < count; i++, p += 2) {
+		tmp = (*p) | ((*(p + 1)) << 8);
+		outw(tmp, port);
+	}
+}
+
+void insw(unsigned long port, void *addr, unsigned long count)
+{
+	int i;
+	unsigned short tmp;
+	unsigned char *p = (unsigned char *) addr;
+
+	for (i = 0; i < count; i++, p += 2) {
+		tmp = inw(port);
+		p[0] = tmp & 0xff;
+		p[1] = (tmp >> 8) & 0xff;
+	}
+}
+
+void outsl(unsigned long port, const void *addr, unsigned long count)
+{
+	int i;
+	unsigned tmp;
+	unsigned char *p = (unsigned char *) addr;
+
+	for (i = 0; i < count; i++, p += 4) {
+		tmp = (*p) | ((*(p + 1)) << 8) | ((*(p + 2)) << 16) |
+		    ((*(p + 3)) << 24);
+		outl(tmp, port);
+	}
+}
+
+void insl(unsigned long port, void *addr, unsigned long count)
+{
+	int i;
+	unsigned tmp;
+	unsigned char *p = (unsigned char *) addr;
+
+	for (i = 0; i < count; i++, p += 4) {
+		tmp = inl(port);
+		p[0] = tmp & 0xff;
+		p[1] = (tmp >> 8) & 0xff;
+		p[2] = (tmp >> 16) & 0xff;
+		p[3] = (tmp >> 24) & 0xff;
+
+	}
+}
+
+void memcpy_toio(void __iomem *to, const void *from, long count)
+{
+	unsigned char *p = (unsigned char *) from;
+
+	while (count) {
+		count--;
+		writeb(*p++, to++);
+	}
+}
+
+void memcpy_fromio(void *to, void __iomem *from, long count)
+{
+	int i;
+	unsigned char *p = (unsigned char *) to;
+
+	for (i = 0; i < count; i++) {
+		p[i] = readb(from);
+		from++;
+	}
+}
diff --git a/arch/sh64/lib/iomap.c b/arch/sh64/lib/iomap.c
new file mode 100644
index 0000000..83c5f0c
--- /dev/null
+++ b/arch/sh64/lib/iomap.c
@@ -0,0 +1,55 @@
+/*
+ * arch/sh64/lib/iomap.c
+ *
+ * Generic sh64 iomap interface
+ *
+ * Copyright (C) 2004  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/config.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+
+void __iomem *__attribute__ ((weak))
+ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *)port;
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+	/* Nothing .. */
+}
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
+{
+	unsigned long start = pci_resource_start(dev, bar);
+	unsigned long len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (!len)
+		return NULL;
+	if (max && len > max)
+		len = max;
+	if (flags & IORESOURCE_IO)
+		return ioport_map(start + pciio_virt, len);
+	if (flags & IORESOURCE_MEM)
+		return (void __iomem *)start;
+
+	/* What? */
+	return NULL;
+}
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	/* Nothing .. */
+}
+
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iounmap);
+
diff --git a/arch/sh64/lib/memcpy.c b/arch/sh64/lib/memcpy.c
new file mode 100644
index 0000000..c785d0a
--- /dev/null
+++ b/arch/sh64/lib/memcpy.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2002 Mark Debbage (Mark.Debbage@superh.com)
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <asm/string.h>
+
+// This is a simplistic optimization of memcpy to increase the
+// granularity of access beyond one byte using aligned
+// loads and stores. This is not an optimal implementation
+// for SH-5 (especially with regard to prefetching and the cache),
+// and a better version should be provided later ...
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+	char *d = (char *) dest, *s = (char *) src;
+
+	if (count >= 32) {
+		int i = 8 - (((unsigned long) d) & 0x7);
+
+		if (i != 8)
+			while (i-- && count--) {
+				*d++ = *s++;
+			}
+
+		if (((((unsigned long) d) & 0x7) == 0) &&
+		    ((((unsigned long) s) & 0x7) == 0)) {
+			while (count >= 32) {
+				unsigned long long t1, t2, t3, t4;
+				t1 = *(unsigned long long *) (s);
+				t2 = *(unsigned long long *) (s + 8);
+				t3 = *(unsigned long long *) (s + 16);
+				t4 = *(unsigned long long *) (s + 24);
+				*(unsigned long long *) (d) = t1;
+				*(unsigned long long *) (d + 8) = t2;
+				*(unsigned long long *) (d + 16) = t3;
+				*(unsigned long long *) (d + 24) = t4;
+				d += 32;
+				s += 32;
+				count -= 32;
+			}
+			while (count >= 8) {
+				*(unsigned long long *) d =
+				    *(unsigned long long *) s;
+				d += 8;
+				s += 8;
+				count -= 8;
+			}
+		}
+
+		if (((((unsigned long) d) & 0x3) == 0) &&
+		    ((((unsigned long) s) & 0x3) == 0)) {
+			while (count >= 4) {
+				*(unsigned long *) d = *(unsigned long *) s;
+				d += 4;
+				s += 4;
+				count -= 4;
+			}
+		}
+
+		if (((((unsigned long) d) & 0x1) == 0) &&
+		    ((((unsigned long) s) & 0x1) == 0)) {
+			while (count >= 2) {
+				*(unsigned short *) d = *(unsigned short *) s;
+				d += 2;
+				s += 2;
+				count -= 2;
+			}
+		}
+	}
+
+	while (count--) {
+		*d++ = *s++;
+	}
+
+	return d;
+}
diff --git a/arch/sh64/lib/page_clear.S b/arch/sh64/lib/page_clear.S
new file mode 100644
index 0000000..ac0111d
--- /dev/null
+++ b/arch/sh64/lib/page_clear.S
@@ -0,0 +1,54 @@
+/*
+   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
+
+   This file is subject to the terms and conditions of the GNU General Public
+   License.  See the file "COPYING" in the main directory of this archive
+   for more details.
+
+   Tight version of memset for the case of just clearing a page.  It turns out
+   that having the alloco's spaced out slightly due to the increment/branch
+   pair causes them to contend less for access to the cache.  Similarly,
+   keeping the stores apart from the allocos causes less contention.  => Do two
+   separate loops.  Do multiple stores per loop to amortise the
+   increment/branch cost a little.
+
+   Parameters:
+   r2 : source effective address (start of page)
+
+   Always clears 4096 bytes.
+
+   Note : alloco guarded by synco to avoid TAKum03020 erratum
+
+*/
+
+	.section .text..SHmedia32,"ax"
+	.little
+
+	.balign 8
+	.global sh64_page_clear
+sh64_page_clear:
+	pta/l 1f, tr1
+	pta/l 2f, tr2
+	ptabs/l r18, tr0
+
+	movi 4096, r7
+	add  r2, r7, r7
+	add  r2, r63, r6
+1:
+	alloco r6, 0
+	synco	! TAKum03020
+	addi	r6, 32, r6
+	bgt/l	r7, r6, tr1
+
+	add  r2, r63, r6
+2:
+	st.q  r6,   0, r63
+	st.q  r6,   8, r63
+	st.q  r6,  16, r63
+	st.q  r6,  24, r63
+	addi r6, 32, r6
+	bgt/l r7, r6, tr2
+
+	blink tr0, r63
+
+
diff --git a/arch/sh64/lib/page_copy.S b/arch/sh64/lib/page_copy.S
new file mode 100644
index 0000000..e159c3c
--- /dev/null
+++ b/arch/sh64/lib/page_copy.S
@@ -0,0 +1,91 @@
+/*
+   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
+
+   This file is subject to the terms and conditions of the GNU General Public
+   License.  See the file "COPYING" in the main directory of this archive
+   for more details.
+
+   Tight version of mempy for the case of just copying a page.
+   Prefetch strategy empirically optimised against RTL simulations
+   of SH5-101 cut2 eval chip with Cayman board DDR memory.
+
+   Parameters:
+   r2 : source effective address (start of page)
+   r3 : destination effective address (start of page)
+
+   Always copies 4096 bytes.
+
+   Points to review.
+   * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
+     It seems like the prefetch needs to be at at least 4 lines ahead to get
+     the data into the cache in time, and the allocos contend with outstanding
+     prefetches for the same cache set, so it's better to have the numbers
+     different.
+   */
+
+	.section .text..SHmedia32,"ax"
+	.little
+
+	.balign 8
+	.global sh64_page_copy
+sh64_page_copy:
+
+	/* Copy 4096 bytes worth of data from r2 to r3.
+	   Do prefetches 4 lines ahead.
+	   Do alloco 2 lines ahead */
+
+	pta 1f, tr1
+	pta 2f, tr2
+	pta 3f, tr3
+	ptabs r18, tr0
+
+#if 0
+	/* TAKum03020 */
+	ld.q r2, 0x00, r63
+	ld.q r2, 0x20, r63
+	ld.q r2, 0x40, r63
+	ld.q r2, 0x60, r63
+#endif
+	alloco r3, 0x00
+	synco		! TAKum03020
+	alloco r3, 0x20
+	synco		! TAKum03020
+
+	movi 3968, r6
+	add  r3, r6, r6
+	addi r6, 64, r7
+	addi r7, 64, r8
+	sub r2, r3, r60
+	addi r60, 8, r61
+	addi r61, 8, r62
+	addi r62, 8, r23
+	addi r60, 0x80, r22
+
+/* Minimal code size.  The extra branches inside the loop don't cost much
+   because they overlap with the time spent waiting for prefetches to
+   complete. */
+1:
+#if 0
+	/* TAKum03020 */
+	bge/u r3, r6, tr2  ! skip prefetch for last 4 lines
+	ldx.q r3, r22, r63 ! prefetch 4 lines hence
+#endif
+2:
+	bge/u r3, r7, tr3  ! skip alloco for last 2 lines
+	alloco r3, 0x40    ! alloc destination line 2 lines ahead
+	synco		! TAKum03020
+3:
+	ldx.q r3, r60, r36
+	ldx.q r3, r61, r37
+	ldx.q r3, r62, r38
+	ldx.q r3, r23, r39
+	st.q  r3,   0, r36
+	st.q  r3,   8, r37
+	st.q  r3,  16, r38
+	st.q  r3,  24, r39
+	addi r3, 32, r3
+	bgt/l r8, r3, tr1
+
+	blink tr0, r63	   ! return
+
+
diff --git a/arch/sh64/lib/panic.c b/arch/sh64/lib/panic.c
new file mode 100644
index 0000000..c9eb1cb
--- /dev/null
+++ b/arch/sh64/lib/panic.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2003  Richard Curnow, SuperH UK Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/kernel.h>
+#include <asm/io.h>
+#include <asm/registers.h>
+
+/* THIS IS A PHYSICAL ADDRESS */
+#define HDSP2534_ADDR (0x04002100)
+
+#ifdef CONFIG_SH_CAYMAN
+
+static void poor_mans_delay(void)
+{
+	int i;
+	for (i = 0; i < 2500000; i++) {
+	}		/* poor man's delay */
+}
+
+static void show_value(unsigned long x)
+{
+	int i;
+	unsigned nibble;
+	for (i = 0; i < 8; i++) {
+		nibble = ((x >> (i * 4)) & 0xf);
+
+		ctrl_outb(nibble + ((nibble > 9) ? 55 : 48),
+			  HDSP2534_ADDR + 0xe0 + ((7 - i) << 2));
+	}
+}
+
+#endif
+
+void
+panic_handler(unsigned long panicPC, unsigned long panicSSR,
+	      unsigned long panicEXPEVT)
+{
+#ifdef CONFIG_SH_CAYMAN
+	while (1) {
+		/* This piece of code displays the PC on the LED display */
+		show_value(panicPC);
+		poor_mans_delay();
+		show_value(panicSSR);
+		poor_mans_delay();
+		show_value(panicEXPEVT);
+		poor_mans_delay();
+	}
+#endif
+
+	/* Never return from the panic handler */
+	for (;;) ;
+
+}
diff --git a/arch/sh64/lib/udelay.c b/arch/sh64/lib/udelay.c
new file mode 100644
index 0000000..dad2f25
--- /dev/null
+++ b/arch/sh64/lib/udelay.c
@@ -0,0 +1,60 @@
+/*
+ * arch/sh64/lib/udelay.c
+ *
+ * Delay routines, using a pre-computed "loops_per_jiffy" value.
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003, 2004  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/param.h>
+
+extern unsigned long loops_per_jiffy;
+
+/*
+ * Use only for very small delays (< 1 msec).
+ *
+ * The active part of our cycle counter is only 32-bits wide, and
+ * we're treating the difference between two marks as signed.  On
+ * a 1GHz box, that's about 2 seconds.
+ */
+
+void __delay(int loops)
+{
+	long long dummy;
+	__asm__ __volatile__("gettr	tr0, %1\n\t"
+			     "pta	$+4, tr0\n\t"
+			     "addi	%0, -1, %0\n\t"
+			     "bne	%0, r63, tr0\n\t"
+			     "ptabs	%1, tr0\n\t":"=r"(loops),
+			     "=r"(dummy)
+			     :"0"(loops));
+}
+
+void __udelay(unsigned long long usecs, unsigned long lpj)
+{
+	usecs *= (((unsigned long long) HZ << 32) / 1000000) * lpj;
+	__delay((long long) usecs >> 32);
+}
+
+void __ndelay(unsigned long long nsecs, unsigned long lpj)
+{
+	nsecs *= (((unsigned long long) HZ << 32) / 1000000000) * lpj;
+	__delay((long long) nsecs >> 32);
+}
+
+void udelay(unsigned long usecs)
+{
+	__udelay(usecs, loops_per_jiffy);
+}
+
+void ndelay(unsigned long nsecs)
+{
+	__ndelay(nsecs, loops_per_jiffy);
+}
+
diff --git a/arch/sh64/mach-cayman/Makefile b/arch/sh64/mach-cayman/Makefile
new file mode 100644
index 0000000..67a2258
--- /dev/null
+++ b/arch/sh64/mach-cayman/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the Hitachi Cayman specific parts of the kernel
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+
+obj-y := setup.o irq.o iomap.o
+obj-$(CONFIG_HEARTBEAT)	+= led.o
+
diff --git a/arch/sh64/mach-cayman/iomap.c b/arch/sh64/mach-cayman/iomap.c
new file mode 100644
index 0000000..d6a538c
--- /dev/null
+++ b/arch/sh64/mach-cayman/iomap.c
@@ -0,0 +1,24 @@
+/*
+ * arch/sh64/mach-cayman/iomap.c
+ *
+ * Cayman iomap interface
+ *
+ * Copyright (C) 2004  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/config.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+#include <asm/cayman.h>
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	if (port < 0x400)
+		return (void __iomem *)((port << 2) | smsc_superio_virt);
+
+	return (void __iomem *)port;
+}
+
diff --git a/arch/sh64/mach-cayman/irq.c b/arch/sh64/mach-cayman/irq.c
new file mode 100644
index 0000000..f797c84
--- /dev/null
+++ b/arch/sh64/mach-cayman/irq.c
@@ -0,0 +1,196 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/kernel/irq_cayman.c
+ *
+ * SH-5 Cayman Interrupt Support
+ *
+ * This file handles the board specific parts of the Cayman interrupt system
+ *
+ * Copyright (C) 2002 Stuart Menefy
+ */
+
+#include <linux/config.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/signal.h>
+#include <asm/cayman.h>
+
+unsigned long epld_virt;
+
+#define EPLD_BASE        0x04002000
+#define EPLD_STATUS_BASE (epld_virt + 0x10)
+#define EPLD_MASK_BASE   (epld_virt + 0x20)
+
+/* Note the SMSC SuperIO chip and SMSC LAN chip interrupts are all muxed onto
+   the same SH-5 interrupt */
+
+static irqreturn_t cayman_interrupt_smsc(int irq, void *dev_id, struct pt_regs *regs)
+{
+        printk(KERN_INFO "CAYMAN: spurious SMSC interrupt\n");
+	return IRQ_NONE;
+}
+
+static irqreturn_t cayman_interrupt_pci2(int irq, void *dev_id, struct pt_regs *regs)
+{
+        printk(KERN_INFO "CAYMAN: spurious PCI interrupt, IRQ %d\n", irq);
+	return IRQ_NONE;
+}
+
+static struct irqaction cayman_action_smsc = {
+	.name		= "Cayman SMSC Mux",
+	.handler	= cayman_interrupt_smsc,
+	.flags		= SA_INTERRUPT,
+};
+
+static struct irqaction cayman_action_pci2 = {
+	.name		= "Cayman PCI2 Mux",
+	.handler	= cayman_interrupt_pci2,
+	.flags		= SA_INTERRUPT,
+};
+
+static void enable_cayman_irq(unsigned int irq)
+{
+	unsigned long flags;
+	unsigned long mask;
+	unsigned int reg;
+	unsigned char bit;
+
+	irq -= START_EXT_IRQS;
+	reg = EPLD_MASK_BASE + ((irq / 8) << 2);
+	bit = 1<<(irq % 8);
+	local_irq_save(flags);
+	mask = ctrl_inl(reg);
+	mask |= bit;
+	ctrl_outl(mask, reg);
+	local_irq_restore(flags);
+}
+
+void disable_cayman_irq(unsigned int irq)
+{
+	unsigned long flags;
+	unsigned long mask;
+	unsigned int reg;
+	unsigned char bit;
+
+	irq -= START_EXT_IRQS;
+	reg = EPLD_MASK_BASE + ((irq / 8) << 2);
+	bit = 1<<(irq % 8);
+	local_irq_save(flags);
+	mask = ctrl_inl(reg);
+	mask &= ~bit;
+	ctrl_outl(mask, reg);
+	local_irq_restore(flags);
+}
+
+static void ack_cayman_irq(unsigned int irq)
+{
+	disable_cayman_irq(irq);
+}
+
+static void end_cayman_irq(unsigned int irq)
+{
+	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+		enable_cayman_irq(irq);
+}
+
+static unsigned int startup_cayman_irq(unsigned int irq)
+{
+	enable_cayman_irq(irq);
+	return 0; /* never anything pending */
+}
+
+static void shutdown_cayman_irq(unsigned int irq)
+{
+	disable_cayman_irq(irq);
+}
+
+struct hw_interrupt_type cayman_irq_type = {
+	.typename	= "Cayman-IRQ",
+	.startup	= startup_cayman_irq,
+	.shutdown	= shutdown_cayman_irq,
+	.enable		= enable_cayman_irq,
+	.disable	= disable_cayman_irq,
+	.ack		= ack_cayman_irq,
+	.end		= end_cayman_irq,
+};
+
+int cayman_irq_demux(int evt)
+{
+	int irq = intc_evt_to_irq[evt];
+
+	if (irq == SMSC_IRQ) {
+		unsigned long status;
+		int i;
+
+		status = ctrl_inl(EPLD_STATUS_BASE) &
+			 ctrl_inl(EPLD_MASK_BASE) & 0xff;
+		if (status == 0) {
+			irq = -1;
+		} else {
+			for (i=0; i<8; i++) {
+				if (status & (1<<i))
+					break;
+			}
+			irq = START_EXT_IRQS + i;
+		}
+	}
+
+	if (irq == PCI2_IRQ) {
+		unsigned long status;
+		int i;
+
+		status = ctrl_inl(EPLD_STATUS_BASE + 3 * sizeof(u32)) &
+			 ctrl_inl(EPLD_MASK_BASE + 3 * sizeof(u32)) & 0xff;
+		if (status == 0) {
+			irq = -1;
+		} else {
+			for (i=0; i<8; i++) {
+				if (status & (1<<i))
+					break;
+			}
+			irq = START_EXT_IRQS + (3 * 8) + i;
+		}
+	}
+
+	return irq;
+}
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_SYSCTL)
+int cayman_irq_describe(char* p, int irq)
+{
+	if (irq < NR_INTC_IRQS) {
+		return intc_irq_describe(p, irq);
+	} else if (irq < NR_INTC_IRQS + 8) {
+		return sprintf(p, "(SMSC %d)", irq - NR_INTC_IRQS);
+	} else if ((irq >= NR_INTC_IRQS + 24) && (irq < NR_INTC_IRQS + 32)) {
+		return sprintf(p, "(PCI2 %d)", irq - (NR_INTC_IRQS + 24));
+	}
+
+	return 0;
+}
+#endif
+
+void init_cayman_irq(void)
+{
+	int i;
+
+	epld_virt = onchip_remap(EPLD_BASE, 1024, "EPLD");
+	if (!epld_virt) {
+		printk(KERN_ERR "Cayman IRQ: Unable to remap EPLD\n");
+		return;
+	}
+
+	for (i=0; i<NR_EXT_IRQS; i++) {
+		irq_desc[START_EXT_IRQS + i].handler = &cayman_irq_type;
+	}
+
+	/* Setup the SMSC interrupt */
+	setup_irq(SMSC_IRQ, &cayman_action_smsc);
+	setup_irq(PCI2_IRQ, &cayman_action_pci2);
+}
diff --git a/arch/sh64/mach-cayman/led.c b/arch/sh64/mach-cayman/led.c
new file mode 100644
index 0000000..8b3cc4c
--- /dev/null
+++ b/arch/sh64/mach-cayman/led.c
@@ -0,0 +1,51 @@
+/*
+ * arch/sh64/kernel/led_cayman.c
+ *
+ * Copyright (C) 2002 Stuart Menefy <stuart.menefy@st.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Flash the LEDs
+ */
+#include <asm/io.h>
+
+/*
+** It is supposed these functions to be used for a low level
+** debugging (via Cayman LEDs), hence to be available as soon
+** as possible.
+** Unfortunately Cayman LEDs relies on Cayman EPLD to be mapped
+** (this happen when IRQ are initialized... quite late).
+** These triky dependencies should be removed. Temporary, it
+** may be enough to NOP until EPLD is mapped.
+*/
+
+extern unsigned long epld_virt;
+
+#define LED_ADDR      (epld_virt + 0x008)
+#define HDSP2534_ADDR (epld_virt + 0x100)
+
+void mach_led(int position, int value)
+{
+	if (!epld_virt)
+		return;
+
+	if (value)
+		ctrl_outl(0, LED_ADDR);
+	else
+		ctrl_outl(1, LED_ADDR);
+
+}
+
+void mach_alphanum(int position, unsigned char value)
+{
+	if (!epld_virt)
+		return;
+
+	ctrl_outb(value, HDSP2534_ADDR + 0xe0 + (position << 2));
+}
+
+void mach_alphanum_brightness(int setting)
+{
+	ctrl_outb(setting & 7, HDSP2534_ADDR + 0xc0);
+}
diff --git a/arch/sh64/mach-cayman/setup.c b/arch/sh64/mach-cayman/setup.c
new file mode 100644
index 0000000..c793245
--- /dev/null
+++ b/arch/sh64/mach-cayman/setup.c
@@ -0,0 +1,246 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mach-cayman/setup.c
+ *
+ * SH5 Cayman support
+ *
+ * This file handles the architecture-dependent parts of initialization
+ *
+ * Copyright David J. Mckay.
+ * Needs major work!
+ *
+ * benedict.gaster@superh.com:	 3rd May 2002
+ *    Added support for ramdisk, removing statically linked romfs at the same time.
+ *
+ * lethal@linux-sh.org:          15th May 2003
+ *    Use the generic procfs cpuinfo interface, just return a valid board name.
+ */
+
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <asm/processor.h>
+#include <asm/platform.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+
+/*
+ * Platform Dependent Interrupt Priorities.
+ */
+
+/* Using defaults defined in irq.h */
+#define	RES NO_PRIORITY		/* Disabled */
+#define IR0 IRL0_PRIORITY	/* IRLs */
+#define IR1 IRL1_PRIORITY
+#define IR2 IRL2_PRIORITY
+#define IR3 IRL3_PRIORITY
+#define PCA INTA_PRIORITY	/* PCI Ints */
+#define PCB INTB_PRIORITY
+#define PCC INTC_PRIORITY
+#define PCD INTD_PRIORITY
+#define SER TOP_PRIORITY
+#define ERR TOP_PRIORITY
+#define PW0 TOP_PRIORITY
+#define PW1 TOP_PRIORITY
+#define PW2 TOP_PRIORITY
+#define PW3 TOP_PRIORITY
+#define DM0 NO_PRIORITY		/* DMA Ints */
+#define DM1 NO_PRIORITY
+#define DM2 NO_PRIORITY
+#define DM3 NO_PRIORITY
+#define DAE NO_PRIORITY
+#define TU0 TIMER_PRIORITY	/* TMU Ints */
+#define TU1 NO_PRIORITY
+#define TU2 NO_PRIORITY
+#define TI2 NO_PRIORITY
+#define ATI NO_PRIORITY		/* RTC Ints */
+#define PRI NO_PRIORITY
+#define CUI RTC_PRIORITY
+#define ERI SCIF_PRIORITY	/* SCIF Ints */
+#define RXI SCIF_PRIORITY
+#define BRI SCIF_PRIORITY
+#define TXI SCIF_PRIORITY
+#define ITI TOP_PRIORITY	/* WDT Ints */
+
+/* Setup for the SMSC FDC37C935 */
+#define SMSC_SUPERIO_BASE	0x04000000
+#define SMSC_CONFIG_PORT_ADDR	0x3f0
+#define SMSC_INDEX_PORT_ADDR	SMSC_CONFIG_PORT_ADDR
+#define SMSC_DATA_PORT_ADDR	0x3f1
+
+#define SMSC_ENTER_CONFIG_KEY	0x55
+#define SMSC_EXIT_CONFIG_KEY	0xaa
+
+#define SMCS_LOGICAL_DEV_INDEX	0x07
+#define SMSC_DEVICE_ID_INDEX	0x20
+#define SMSC_DEVICE_REV_INDEX	0x21
+#define SMSC_ACTIVATE_INDEX	0x30
+#define SMSC_PRIMARY_BASE_INDEX  0x60
+#define SMSC_SECONDARY_BASE_INDEX 0x62
+#define SMSC_PRIMARY_INT_INDEX	0x70
+#define SMSC_SECONDARY_INT_INDEX 0x72
+
+#define SMSC_IDE1_DEVICE	1
+#define SMSC_KEYBOARD_DEVICE	7
+#define SMSC_CONFIG_REGISTERS	8
+
+#define SMSC_SUPERIO_READ_INDEXED(index) ({ \
+	outb((index), SMSC_INDEX_PORT_ADDR); \
+	inb(SMSC_DATA_PORT_ADDR); })
+#define SMSC_SUPERIO_WRITE_INDEXED(val, index) ({ \
+	outb((index), SMSC_INDEX_PORT_ADDR); \
+	outb((val),   SMSC_DATA_PORT_ADDR); })
+
+#define IDE1_PRIMARY_BASE	0x01f0
+#define IDE1_SECONDARY_BASE	0x03f6
+
+unsigned long smsc_superio_virt;
+
+/*
+ * Platform dependent structures: maps and parms block.
+ */
+struct resource io_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct resource kram_resources[] = {
+	{ "Kernel code", 0, 0 },	/* These must be last in the array */
+	{ "Kernel data", 0, 0 }		/* These must be last in the array */
+};
+
+struct resource xram_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct resource rom_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct sh64_platform platform_parms = {
+	.readonly_rootfs =	1,
+	.initial_root_dev =	0x0100,
+	.loader_type =		1,
+	.io_res_p =		io_resources,
+	.io_res_count =		ARRAY_SIZE(io_resources),
+	.kram_res_p =		kram_resources,
+	.kram_res_count =	ARRAY_SIZE(kram_resources),
+	.xram_res_p =		xram_resources,
+	.xram_res_count =	ARRAY_SIZE(xram_resources),
+	.rom_res_p =		rom_resources,
+	.rom_res_count =	ARRAY_SIZE(rom_resources),
+};
+
+int platform_int_priority[NR_INTC_IRQS] = {
+	IR0, IR1, IR2, IR3, PCA, PCB, PCC, PCD,	/* IRQ  0- 7 */
+	RES, RES, RES, RES, SER, ERR, PW3, PW2,	/* IRQ  8-15 */
+	PW1, PW0, DM0, DM1, DM2, DM3, DAE, RES,	/* IRQ 16-23 */
+	RES, RES, RES, RES, RES, RES, RES, RES,	/* IRQ 24-31 */
+	TU0, TU1, TU2, TI2, ATI, PRI, CUI, ERI,	/* IRQ 32-39 */
+	RXI, BRI, TXI, RES, RES, RES, RES, RES,	/* IRQ 40-47 */
+	RES, RES, RES, RES, RES, RES, RES, RES,	/* IRQ 48-55 */
+	RES, RES, RES, RES, RES, RES, RES, ITI,	/* IRQ 56-63 */
+};
+
+static int __init smsc_superio_setup(void)
+{
+	unsigned char devid, devrev;
+
+	smsc_superio_virt = onchip_remap(SMSC_SUPERIO_BASE, 1024, "SMSC SuperIO");
+	if (!smsc_superio_virt) {
+		panic("Unable to remap SMSC SuperIO\n");
+	}
+
+	/* Initially the chip is in run state */
+	/* Put it into configuration state */
+	outb(SMSC_ENTER_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
+	outb(SMSC_ENTER_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
+
+	/* Read device ID info */
+	devid = SMSC_SUPERIO_READ_INDEXED(SMSC_DEVICE_ID_INDEX);
+	devrev = SMSC_SUPERIO_READ_INDEXED(SMSC_DEVICE_REV_INDEX);
+	printk("SMSC SuperIO devid %02x rev %02x\n", devid, devrev);
+
+	/* Select the keyboard device */
+	SMSC_SUPERIO_WRITE_INDEXED(SMSC_KEYBOARD_DEVICE, SMCS_LOGICAL_DEV_INDEX);
+
+	/* enable it */
+	SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_ACTIVATE_INDEX);
+
+	/* Select the interrupts */
+	/* On a PC keyboard is IRQ1, mouse is IRQ12 */
+	SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX);
+	SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX);
+
+#ifdef CONFIG_IDE
+	/*
+	 * Only IDE1 exists on the Cayman
+	 */
+
+	/* Power it on */
+	SMSC_SUPERIO_WRITE_INDEXED(1 << SMSC_IDE1_DEVICE, 0x22);
+
+	SMSC_SUPERIO_WRITE_INDEXED(SMSC_IDE1_DEVICE, SMCS_LOGICAL_DEV_INDEX);
+	SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_ACTIVATE_INDEX);
+
+	SMSC_SUPERIO_WRITE_INDEXED(IDE1_PRIMARY_BASE >> 8,
+				   SMSC_PRIMARY_BASE_INDEX + 0);
+	SMSC_SUPERIO_WRITE_INDEXED(IDE1_PRIMARY_BASE & 0xff,
+				   SMSC_PRIMARY_BASE_INDEX + 1);
+
+	SMSC_SUPERIO_WRITE_INDEXED(IDE1_SECONDARY_BASE >> 8,
+				   SMSC_SECONDARY_BASE_INDEX + 0);
+	SMSC_SUPERIO_WRITE_INDEXED(IDE1_SECONDARY_BASE & 0xff,
+				   SMSC_SECONDARY_BASE_INDEX + 1);
+
+	SMSC_SUPERIO_WRITE_INDEXED(14, SMSC_PRIMARY_INT_INDEX);
+
+	SMSC_SUPERIO_WRITE_INDEXED(SMSC_CONFIG_REGISTERS,
+				   SMCS_LOGICAL_DEV_INDEX);
+
+	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc2); /* GP42 = nIDE1_OE */
+	SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */
+	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */
+	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */
+#endif
+
+	/* Exit the configuraton state */
+	outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);
+
+	return 0;
+}
+
+/* This is grotty, but, because kernel is always referenced on the link line
+ * before any devices, this is safe.
+ */
+__initcall(smsc_superio_setup);
+
+void __init platform_setup(void)
+{
+	/* Cayman platform leaves the decision to head.S, for now */
+	platform_parms.fpu_flags = fpu_in_use;
+}
+
+void __init platform_monitor(void)
+{
+	/* Nothing yet .. */
+}
+
+void __init platform_reserve(void)
+{
+	/* Nothing yet .. */
+}
+
+const char *get_system_type(void)
+{
+	return "Hitachi Cayman";
+}
+
diff --git a/arch/sh64/mach-harp/Makefile b/arch/sh64/mach-harp/Makefile
new file mode 100644
index 0000000..63f065b
--- /dev/null
+++ b/arch/sh64/mach-harp/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for the ST50 Harp specific parts of the kernel
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+
+O_TARGET := harp.o
+
+obj-y := setup.o
+
+include $(TOPDIR)/Rules.make
+
diff --git a/arch/sh64/mach-harp/setup.c b/arch/sh64/mach-harp/setup.c
new file mode 100644
index 0000000..3938a65
--- /dev/null
+++ b/arch/sh64/mach-harp/setup.c
@@ -0,0 +1,139 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mach-harp/setup.c
+ *
+ * SH-5 Simulator Platform Support
+ *
+ * This file handles the architecture-dependent parts of initialization
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * benedict.gaster@superh.com:	 3rd May 2002
+ *    Added support for ramdisk, removing statically linked romfs at the same time. *
+ *
+ * lethal@linux-sh.org:          15th May 2003
+ *    Use the generic procfs cpuinfo interface, just return a valid board name.
+ */
+
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <asm/processor.h>
+#include <asm/platform.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+
+#define RES_COUNT(res) ((sizeof((res))/sizeof(struct resource)))
+
+/*
+ * Platform Dependent Interrupt Priorities.
+ */
+
+/* Using defaults defined in irq.h */
+#define	RES NO_PRIORITY		/* Disabled */
+#define IR0 IRL0_PRIORITY	/* IRLs */
+#define IR1 IRL1_PRIORITY
+#define IR2 IRL2_PRIORITY
+#define IR3 IRL3_PRIORITY
+#define PCA INTA_PRIORITY	/* PCI Ints */
+#define PCB INTB_PRIORITY
+#define PCC INTC_PRIORITY
+#define PCD INTD_PRIORITY
+#define SER TOP_PRIORITY
+#define ERR TOP_PRIORITY
+#define PW0 TOP_PRIORITY
+#define PW1 TOP_PRIORITY
+#define PW2 TOP_PRIORITY
+#define PW3 TOP_PRIORITY
+#define DM0 NO_PRIORITY		/* DMA Ints */
+#define DM1 NO_PRIORITY
+#define DM2 NO_PRIORITY
+#define DM3 NO_PRIORITY
+#define DAE NO_PRIORITY
+#define TU0 TIMER_PRIORITY	/* TMU Ints */
+#define TU1 NO_PRIORITY
+#define TU2 NO_PRIORITY
+#define TI2 NO_PRIORITY
+#define ATI NO_PRIORITY		/* RTC Ints */
+#define PRI NO_PRIORITY
+#define CUI RTC_PRIORITY
+#define ERI SCIF_PRIORITY	/* SCIF Ints */
+#define RXI SCIF_PRIORITY
+#define BRI SCIF_PRIORITY
+#define TXI SCIF_PRIORITY
+#define ITI TOP_PRIORITY	/* WDT Ints */
+
+/*
+ * Platform dependent structures: maps and parms block.
+ */
+struct resource io_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct resource kram_resources[] = {
+	{ "Kernel code", 0, 0 },	/* These must be last in the array */
+	{ "Kernel data", 0, 0 }		/* These must be last in the array */
+};
+
+struct resource xram_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct resource rom_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct sh64_platform platform_parms = {
+	.readonly_rootfs =	1,
+	.initial_root_dev =	0x0100,
+	.loader_type =		1,
+	.io_res_p =		io_resources,
+	.io_res_count =		RES_COUNT(io_resources),
+	.kram_res_p =		kram_resources,
+	.kram_res_count =	RES_COUNT(kram_resources),
+	.xram_res_p =		xram_resources,
+	.xram_res_count =	RES_COUNT(xram_resources),
+	.rom_res_p =		rom_resources,
+	.rom_res_count =	RES_COUNT(rom_resources),
+};
+
+int platform_int_priority[NR_INTC_IRQS] = {
+	IR0, IR1, IR2, IR3, PCA, PCB, PCC, PCD,	/* IRQ  0- 7 */
+	RES, RES, RES, RES, SER, ERR, PW3, PW2,	/* IRQ  8-15 */
+	PW1, PW0, DM0, DM1, DM2, DM3, DAE, RES,	/* IRQ 16-23 */
+	RES, RES, RES, RES, RES, RES, RES, RES,	/* IRQ 24-31 */
+	TU0, TU1, TU2, TI2, ATI, PRI, CUI, ERI,	/* IRQ 32-39 */
+	RXI, BRI, TXI, RES, RES, RES, RES, RES,	/* IRQ 40-47 */
+	RES, RES, RES, RES, RES, RES, RES, RES,	/* IRQ 48-55 */
+	RES, RES, RES, RES, RES, RES, RES, ITI,	/* IRQ 56-63 */
+};
+
+void __init platform_setup(void)
+{
+	/* Harp platform leaves the decision to head.S, for now */
+	platform_parms.fpu_flags = fpu_in_use;
+}
+
+void __init platform_monitor(void)
+{
+	/* Nothing yet .. */
+}
+
+void __init platform_reserve(void)
+{
+	/* Nothing yet .. */
+}
+
+const char *get_system_type(void)
+{
+	return "ST50 Harp";
+}
+
diff --git a/arch/sh64/mach-romram/Makefile b/arch/sh64/mach-romram/Makefile
new file mode 100644
index 0000000..02d05c0
--- /dev/null
+++ b/arch/sh64/mach-romram/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for the SH-5 ROM/RAM specific parts of the kernel
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+
+O_TARGET := romram.o
+
+obj-y := setup.o
+
+include $(TOPDIR)/Rules.make
+
diff --git a/arch/sh64/mach-romram/setup.c b/arch/sh64/mach-romram/setup.c
new file mode 100644
index 0000000..a9ba03f
--- /dev/null
+++ b/arch/sh64/mach-romram/setup.c
@@ -0,0 +1,142 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mach-romram/setup.c
+ *
+ * SH-5 ROM/RAM Platform Support
+ *
+ * This file handles the architecture-dependent parts of initialization
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * benedict.gaster@superh.com:	 3rd May 2002
+ *    Added support for ramdisk, removing statically linked romfs at the same time. *
+ *
+ * lethal@linux-sh.org:          15th May 2003
+ *    Use the generic procfs cpuinfo interface, just return a valid board name.
+ *
+ * Sean.McGoogan@superh.com	17th Feb 2004
+ * 	copied from arch/sh64/mach-harp/setup.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <asm/processor.h>
+#include <asm/platform.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+
+#define RES_COUNT(res) ((sizeof((res))/sizeof(struct resource)))
+
+/*
+ * Platform Dependent Interrupt Priorities.
+ */
+
+/* Using defaults defined in irq.h */
+#define	RES NO_PRIORITY		/* Disabled */
+#define IR0 IRL0_PRIORITY	/* IRLs */
+#define IR1 IRL1_PRIORITY
+#define IR2 IRL2_PRIORITY
+#define IR3 IRL3_PRIORITY
+#define PCA INTA_PRIORITY	/* PCI Ints */
+#define PCB INTB_PRIORITY
+#define PCC INTC_PRIORITY
+#define PCD INTD_PRIORITY
+#define SER TOP_PRIORITY
+#define ERR TOP_PRIORITY
+#define PW0 TOP_PRIORITY
+#define PW1 TOP_PRIORITY
+#define PW2 TOP_PRIORITY
+#define PW3 TOP_PRIORITY
+#define DM0 NO_PRIORITY		/* DMA Ints */
+#define DM1 NO_PRIORITY
+#define DM2 NO_PRIORITY
+#define DM3 NO_PRIORITY
+#define DAE NO_PRIORITY
+#define TU0 TIMER_PRIORITY	/* TMU Ints */
+#define TU1 NO_PRIORITY
+#define TU2 NO_PRIORITY
+#define TI2 NO_PRIORITY
+#define ATI NO_PRIORITY		/* RTC Ints */
+#define PRI NO_PRIORITY
+#define CUI RTC_PRIORITY
+#define ERI SCIF_PRIORITY	/* SCIF Ints */
+#define RXI SCIF_PRIORITY
+#define BRI SCIF_PRIORITY
+#define TXI SCIF_PRIORITY
+#define ITI TOP_PRIORITY	/* WDT Ints */
+
+/*
+ * Platform dependent structures: maps and parms block.
+ */
+struct resource io_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct resource kram_resources[] = {
+	{ "Kernel code", 0, 0 },	/* These must be last in the array */
+	{ "Kernel data", 0, 0 }		/* These must be last in the array */
+};
+
+struct resource xram_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct resource rom_resources[] = {
+	/* To be updated with external devices */
+};
+
+struct sh64_platform platform_parms = {
+	.readonly_rootfs =	1,
+	.initial_root_dev =	0x0100,
+	.loader_type =		1,
+	.io_res_p =		io_resources,
+	.io_res_count =		RES_COUNT(io_resources),
+	.kram_res_p =		kram_resources,
+	.kram_res_count =	RES_COUNT(kram_resources),
+	.xram_res_p =		xram_resources,
+	.xram_res_count =	RES_COUNT(xram_resources),
+	.rom_res_p =		rom_resources,
+	.rom_res_count =	RES_COUNT(rom_resources),
+};
+
+int platform_int_priority[NR_INTC_IRQS] = {
+	IR0, IR1, IR2, IR3, PCA, PCB, PCC, PCD,	/* IRQ  0- 7 */
+	RES, RES, RES, RES, SER, ERR, PW3, PW2,	/* IRQ  8-15 */
+	PW1, PW0, DM0, DM1, DM2, DM3, DAE, RES,	/* IRQ 16-23 */
+	RES, RES, RES, RES, RES, RES, RES, RES,	/* IRQ 24-31 */
+	TU0, TU1, TU2, TI2, ATI, PRI, CUI, ERI,	/* IRQ 32-39 */
+	RXI, BRI, TXI, RES, RES, RES, RES, RES,	/* IRQ 40-47 */
+	RES, RES, RES, RES, RES, RES, RES, RES,	/* IRQ 48-55 */
+	RES, RES, RES, RES, RES, RES, RES, ITI,	/* IRQ 56-63 */
+};
+
+void __init platform_setup(void)
+{
+	/* ROM/RAM platform leaves the decision to head.S, for now */
+	platform_parms.fpu_flags = fpu_in_use;
+}
+
+void __init platform_monitor(void)
+{
+	/* Nothing yet .. */
+}
+
+void __init platform_reserve(void)
+{
+	/* Nothing yet .. */
+}
+
+const char *get_system_type(void)
+{
+	return "ROM/RAM";
+}
+
diff --git a/arch/sh64/mach-sim/Makefile b/arch/sh64/mach-sim/Makefile
new file mode 100644
index 0000000..819c407
--- /dev/null
+++ b/arch/sh64/mach-sim/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for the SH-5 Simulator specific parts of the kernel
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+
+O_TARGET := sim.o
+
+obj-y := setup.o
+
+include $(TOPDIR)/Rules.make
+
diff --git a/arch/sh64/mach-sim/setup.c b/arch/sh64/mach-sim/setup.c
new file mode 100644
index 0000000..a68639c
--- /dev/null
+++ b/arch/sh64/mach-sim/setup.c
@@ -0,0 +1,164 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mach-sim/setup.c
+ *
+ * ST50 Simulator Platform Support
+ *
+ * This file handles the architecture-dependent parts of initialization
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * lethal@linux-sh.org:          15th May 2003
+ *    Use the generic procfs cpuinfo interface, just return a valid board name.
+ */
+
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <asm/addrspace.h>
+#include <asm/processor.h>
+#include <asm/platform.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_BLK_DEV_INITRD
+#include "../rootfs/rootfs.h"
+#endif
+
+static	__init void platform_monitor(void);
+static	__init void platform_setup(void);
+static	__init void platform_reserve(void);
+
+
+#define	PHYS_MEMORY	CONFIG_MEMORY_SIZE_IN_MB*1024*1024
+
+#if (PHYS_MEMORY < P1SEG_FOOTPRINT_RAM)
+#error "Invalid kernel configuration. Physical memory below footprint requirements."
+#endif
+
+#define RAM_DISK_START	CONFIG_MEMORY_START+P1SEG_INITRD_BLOCK	/* Top of 4MB */
+#ifdef PLATFORM_ROMFS_SIZE
+#define	RAM_DISK_SIZE	(PAGE_ALIGN(PLATFORM_ROMFS_SIZE))     /* Variable Top */
+#if ((RAM_DISK_START + RAM_DISK_SIZE) > (CONFIG_MEMORY_START + PHYS_MEMORY))
+#error "Invalid kernel configuration. ROM RootFS exceeding physical memory."
+#endif
+#else
+#define RAM_DISK_SIZE	P1SEG_INITRD_BLOCK_SIZE			/* Top of 4MB */
+#endif
+
+#define RES_COUNT(res) ((sizeof((res))/sizeof(struct resource)))
+
+/*
+ * Platform Dependent Interrupt Priorities.
+ */
+
+/* Using defaults defined in irq.h */
+#define	RES NO_PRIORITY		/* Disabled */
+#define IR0 IRL0_PRIORITY	/* IRLs */
+#define IR1 IRL1_PRIORITY
+#define IR2 IRL2_PRIORITY
+#define IR3 IRL3_PRIORITY
+#define PCA INTA_PRIORITY	/* PCI Ints */
+#define PCB INTB_PRIORITY
+#define PCC INTC_PRIORITY
+#define PCD INTD_PRIORITY
+#define SER TOP_PRIORITY
+#define ERR TOP_PRIORITY
+#define PW0 TOP_PRIORITY
+#define PW1 TOP_PRIORITY
+#define PW2 TOP_PRIORITY
+#define PW3 TOP_PRIORITY
+#define DM0 NO_PRIORITY		/* DMA Ints */
+#define DM1 NO_PRIORITY
+#define DM2 NO_PRIORITY
+#define DM3 NO_PRIORITY
+#define DAE NO_PRIORITY
+#define TU0 TIMER_PRIORITY	/* TMU Ints */
+#define TU1 NO_PRIORITY
+#define TU2 NO_PRIORITY
+#define TI2 NO_PRIORITY
+#define ATI NO_PRIORITY		/* RTC Ints */
+#define PRI NO_PRIORITY
+#define CUI RTC_PRIORITY
+#define ERI SCIF_PRIORITY	/* SCIF Ints */
+#define RXI SCIF_PRIORITY
+#define BRI SCIF_PRIORITY
+#define TXI SCIF_PRIORITY
+#define ITI TOP_PRIORITY	/* WDT Ints */
+
+/*
+ * Platform dependent structures: maps and parms block.
+ */
+struct resource io_resources[] = {
+	/* Nothing yet .. */
+};
+
+struct resource kram_resources[] = {
+	{ "Kernel code", 0, 0 },	/* These must be last in the array */
+	{ "Kernel data", 0, 0 }		/* These must be last in the array */
+};
+
+struct resource xram_resources[] = {
+	/* Nothing yet .. */
+};
+
+struct resource rom_resources[] = {
+	/* Nothing yet .. */
+};
+
+struct sh64_platform platform_parms = {
+	.readonly_rootfs =	1,
+	.initial_root_dev =	0x0100,
+	.loader_type =		1,
+	.initrd_start =		RAM_DISK_START,
+	.initrd_size =		RAM_DISK_SIZE,
+	.io_res_p =		io_resources,
+	.io_res_count =		RES_COUNT(io_resources),
+	.kram_res_p =		kram_resources,
+	.kram_res_count =	RES_COUNT(kram_resources),
+	.xram_res_p =		xram_resources,
+	.xram_res_count =	RES_COUNT(xram_resources),
+	.rom_res_p =		rom_resources,
+	.rom_res_count =	RES_COUNT(rom_resources),
+};
+
+int platform_int_priority[NR_IRQS] = {
+	IR0, IR1, IR2, IR3, PCA, PCB, PCC, PCD,	/* IRQ  0- 7 */
+	RES, RES, RES, RES, SER, ERR, PW3, PW2,	/* IRQ  8-15 */
+	PW1, PW0, DM0, DM1, DM2, DM3, DAE, RES,	/* IRQ 16-23 */
+	RES, RES, RES, RES, RES, RES, RES, RES,	/* IRQ 24-31 */
+	TU0, TU1, TU2, TI2, ATI, PRI, CUI, ERI,	/* IRQ 32-39 */
+	RXI, BRI, TXI, RES, RES, RES, RES, RES,	/* IRQ 40-47 */
+	RES, RES, RES, RES, RES, RES, RES, RES,	/* IRQ 48-55 */
+	RES, RES, RES, RES, RES, RES, RES, ITI,	/* IRQ 56-63 */
+};
+
+void __init platform_setup(void)
+{
+	/* Simulator platform leaves the decision to head.S */
+	platform_parms.fpu_flags = fpu_in_use;
+}
+
+void __init platform_monitor(void)
+{
+	/* Nothing yet .. */
+}
+
+void __init platform_reserve(void)
+{
+	/* Nothing yet .. */
+}
+
+const char *get_system_type(void)
+{
+	return "SH-5 Simulator";
+}
+
diff --git a/arch/sh64/mm/Makefile b/arch/sh64/mm/Makefile
new file mode 100644
index 0000000..ff19378
--- /dev/null
+++ b/arch/sh64/mm/Makefile
@@ -0,0 +1,44 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2000, 2001  Paolo Alberelli
+# Copyright (C) 2003, 2004  Paul Mundt
+#
+# Makefile for the sh64-specific parts of the Linux memory manager.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+
+obj-y := init.o fault.o ioremap.o extable.o cache.o tlbmiss.o tlb.o
+
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+
+# Special flags for tlbmiss.o.  This puts restrictions on the number of
+# caller-save registers that the compiler can target when building this file.
+# This is required because the code is called from a context in entry.S where
+# very few registers have been saved in the exception handler (for speed
+# reasons).
+# The caller save registers that have been saved and which can be used are
+# r2,r3,r4,r5 : argument passing
+# r15, r18 : SP and LINK
+# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
+#         use of them, so it's probably beneficial to performance to save them
+#         and have them available for it.
+#
+# The resources not listed below are callee save, i.e. the compiler is free to
+# use any of them and will spill them to the stack itself.
+
+CFLAGS_tlbmiss.o += -ffixed-r7 \
+	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
+	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
+	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
+	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
+	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
+	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
+	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
+	-fomit-frame-pointer
+
diff --git a/arch/sh64/mm/cache.c b/arch/sh64/mm/cache.c
new file mode 100644
index 0000000..3b87e25
--- /dev/null
+++ b/arch/sh64/mm/cache.c
@@ -0,0 +1,1041 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mm/cache.c
+ *
+ * Original version Copyright (C) 2000, 2001  Paolo Alberelli
+ * Second version Copyright (C) benedict.gaster@superh.com 2002
+ * Third version Copyright Richard.Curnow@superh.com 2003
+ * Hacks to third version Copyright (C) 2003 Paul Mundt
+ */
+
+/****************************************************************************/
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/tlb.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h> /* for flush_itlb_range */
+
+#include <linux/proc_fs.h>
+
+/* This function is in entry.S */
+extern unsigned long switch_and_save_asid(unsigned long new_asid);
+
+/* Wired TLB entry for the D-cache */
+static unsigned long long dtlb_cache_slot;
+
+/**
+ * sh64_cache_init()
+ *
+ * This is pretty much just a straightforward clone of the SH
+ * detect_cpu_and_cache_system().
+ *
+ * This function is responsible for setting up all of the cache
+ * info dynamically as well as taking care of CPU probing and
+ * setting up the relevant subtype data.
+ *
+ * FIXME: For the time being, we only really support the SH5-101
+ * out of the box, and don't support dynamic probing for things
+ * like the SH5-103 or even cut2 of the SH5-101. Implement this
+ * later!
+ */
+int __init sh64_cache_init(void)
+{
+	/*
+	 * First, setup some sane values for the I-cache.
+	 */
+	cpu_data->icache.ways		= 4;
+	cpu_data->icache.sets		= 256;
+	cpu_data->icache.linesz		= L1_CACHE_BYTES;
+
+	/*
+	 * FIXME: This can probably be cleaned up a bit as well.. for example,
+	 * do we really need the way shift _and_ the way_step_shift ?? Judging
+	 * by the existing code, I would guess no.. is there any valid reason
+	 * why we need to be tracking this around?
+	 */
+	cpu_data->icache.way_shift	= 13;
+	cpu_data->icache.entry_shift	= 5;
+	cpu_data->icache.set_shift	= 4;
+	cpu_data->icache.way_step_shift	= 16;
+	cpu_data->icache.asid_shift	= 2;
+
+	/*
+	 * way offset = cache size / associativity, so just don't factor in
+	 * associativity in the first place..
+	 */
+	cpu_data->icache.way_ofs	= cpu_data->icache.sets *
+					  cpu_data->icache.linesz;
+
+	cpu_data->icache.asid_mask	= 0x3fc;
+	cpu_data->icache.idx_mask	= 0x1fe0;
+	cpu_data->icache.epn_mask	= 0xffffe000;
+	cpu_data->icache.flags		= 0;
+
+	/*
+	 * Next, setup some sane values for the D-cache.
+	 *
+	 * On the SH5, these are pretty consistent with the I-cache settings,
+	 * so we just copy over the existing definitions.. these can be fixed
+	 * up later, especially if we add runtime CPU probing.
+	 *
+	 * Though in the meantime it saves us from having to duplicate all of
+	 * the above definitions..
+	 */
+	cpu_data->dcache		= cpu_data->icache;
+
+	/*
+	 * Setup any cache-related flags here
+	 */
+#if defined(CONFIG_DCACHE_WRITE_THROUGH)
+	set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
+#elif defined(CONFIG_DCACHE_WRITE_BACK)
+	set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
+#endif
+
+	/*
+	 * We also need to reserve a slot for the D-cache in the DTLB, so we
+	 * do this now ..
+	 */
+	dtlb_cache_slot			= sh64_get_wired_dtlb_entry();
+
+	return 0;
+}
+
+#ifdef CONFIG_DCACHE_DISABLED
+#define sh64_dcache_purge_all()					do { } while (0)
+#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)	do { } while (0)
+#define sh64_dcache_purge_user_range(mm, start, end)		do { } while (0)
+#define sh64_dcache_purge_phy_page(paddr)			do { } while (0)
+#define sh64_dcache_purge_virt_page(mm, eaddr)			do { } while (0)
+#define sh64_dcache_purge_kernel_range(start, end)		do { } while (0)
+#define sh64_dcache_wback_current_user_range(start, end)	do { } while (0)
+#endif
+
+/*##########################################################################*/
+
+/* From here onwards, a rewrite of the implementation,
+   by Richard.Curnow@superh.com.
+
+   The major changes in this compared to the old version are;
+   1. use more selective purging through OCBP instead of using ALLOCO to purge
+      by natural replacement.  This avoids purging out unrelated cache lines
+      that happen to be in the same set.
+   2. exploit the APIs copy_user_page and clear_user_page better
+   3. be more selective about I-cache purging, in particular use invalidate_all
+      more sparingly.
+
+   */
+
+/*##########################################################################
+			       SUPPORT FUNCTIONS
+  ##########################################################################*/
+
+/****************************************************************************/
+/* The following group of functions deal with mapping and unmapping a temporary
+   page into the DTLB slot that have been set aside for our exclusive use. */
+/* In order to accomplish this, we use the generic interface for adding and
+   removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
+/****************************************************************************/
+
+static unsigned long slot_own_flags;
+
+static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
+{
+	local_irq_save(slot_own_flags);
+	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
+}
+
+static inline void sh64_teardown_dtlb_cache_slot(void)
+{
+	sh64_teardown_tlb_slot(dtlb_cache_slot);
+	local_irq_restore(slot_own_flags);
+}
+
+/****************************************************************************/
+
+#ifndef CONFIG_ICACHE_DISABLED
+
+static void __inline__ sh64_icache_inv_all(void)
+{
+	unsigned long long addr, flag, data;
+	unsigned int flags;
+
+	addr=ICCR0;
+	flag=ICCR0_ICI;
+	data=0;
+
+	/* Make this a critical section for safety (probably not strictly necessary.) */
+	local_irq_save(flags);
+
+	/* Without %1 it gets unexplicably wrong */
+	asm volatile("getcfg	%3, 0, %0\n\t"
+			"or	%0, %2, %0\n\t"
+			"putcfg	%3, 0, %0\n\t"
+			"synci"
+			: "=&r" (data)
+			: "0" (data), "r" (flag), "r" (addr));
+
+	local_irq_restore(flags);
+}
+
+static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
+{
+	/* Invalidate range of addresses [start,end] from the I-cache, where
+	 * the addresses lie in the kernel superpage. */
+
+	unsigned long long ullend, addr, aligned_start;
+#if (NEFF == 32)
+	aligned_start = (unsigned long long)(signed long long)(signed long) start;
+#else
+#error "NEFF != 32"
+#endif
+	aligned_start &= L1_CACHE_ALIGN_MASK;
+	addr = aligned_start;
+#if (NEFF == 32)
+	ullend = (unsigned long long) (signed long long) (signed long) end;
+#else
+#error "NEFF != 32"
+#endif
+	while (addr <= ullend) {
+		asm __volatile__ ("icbi %0, 0" : : "r" (addr));
+		addr += L1_CACHE_BYTES;
+	}
+}
+
+static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
+{
+	/* If we get called, we know that vma->vm_flags contains VM_EXEC.
+	   Also, eaddr is page-aligned. */
+
+	unsigned long long addr, end_addr;
+	unsigned long flags = 0;
+	unsigned long running_asid, vma_asid;
+	addr = eaddr;
+	end_addr = addr + PAGE_SIZE;
+
+	/* Check whether we can use the current ASID for the I-cache
+	   invalidation.  For example, if we're called via
+	   access_process_vm->flush_cache_page->here, (e.g. when reading from
+	   /proc), 'running_asid' will be that of the reader, not of the
+	   victim.
+
+	   Also, note the risk that we might get pre-empted between the ASID
+	   compare and blocking IRQs, and before we regain control, the
+	   pid->ASID mapping changes.  However, the whole cache will get
+	   invalidated when the mapping is renewed, so the worst that can
+	   happen is that the loop below ends up invalidating somebody else's
+	   cache entries.
+	*/
+
+	running_asid = get_asid();
+	vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
+	if (running_asid != vma_asid) {
+		local_irq_save(flags);
+		switch_and_save_asid(vma_asid);
+	}
+	while (addr < end_addr) {
+		/* Worth unrolling a little */
+		asm __volatile__("icbi %0,  0" : : "r" (addr));
+		asm __volatile__("icbi %0, 32" : : "r" (addr));
+		asm __volatile__("icbi %0, 64" : : "r" (addr));
+		asm __volatile__("icbi %0, 96" : : "r" (addr));
+		addr += 128;
+	}
+	if (running_asid != vma_asid) {
+		switch_and_save_asid(running_asid);
+		local_irq_restore(flags);
+	}
+}
+
+/****************************************************************************/
+
+static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
+			  unsigned long start, unsigned long end)
+{
+	/* Used for invalidating big chunks of I-cache, i.e. assume the range
+	   is whole pages.  If 'start' or 'end' is not page aligned, the code
+	   is conservative and invalidates to the ends of the enclosing pages.
+	   This is functionally OK, just a performance loss. */
+
+	/* See the comments below in sh64_dcache_purge_user_range() regarding
+	   the choice of algorithm.  However, for the I-cache option (2) isn't
+	   available because there are no physical tags so aliases can't be
+	   resolved.  The icbi instruction has to be used through the user
+	   mapping.   Because icbi is cheaper than ocbp on a cache hit, it
+	   would be cheaper to use the selective code for a large range than is
+	   possible with the D-cache.  Just assume 64 for now as a working
+	   figure.
+	   */
+
+	int n_pages;
+
+	if (!mm) return;
+
+	n_pages = ((end - start) >> PAGE_SHIFT);
+	if (n_pages >= 64) {
+		sh64_icache_inv_all();
+	} else {
+		unsigned long aligned_start;
+		unsigned long eaddr;
+		unsigned long after_last_page_start;
+		unsigned long mm_asid, current_asid;
+		unsigned long long flags = 0ULL;
+
+		mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
+		current_asid = get_asid();
+
+		if (mm_asid != current_asid) {
+			/* Switch ASID and run the invalidate loop under cli */
+			local_irq_save(flags);
+			switch_and_save_asid(mm_asid);
+		}
+
+		aligned_start = start & PAGE_MASK;
+		after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
+
+		while (aligned_start < after_last_page_start) {
+			struct vm_area_struct *vma;
+			unsigned long vma_end;
+			vma = find_vma(mm, aligned_start);
+			if (!vma || (aligned_start <= vma->vm_end)) {
+				/* Avoid getting stuck in an error condition */
+				aligned_start += PAGE_SIZE;
+				continue;
+			}
+			vma_end = vma->vm_end;
+			if (vma->vm_flags & VM_EXEC) {
+				/* Executable */
+				eaddr = aligned_start;
+				while (eaddr < vma_end) {
+					sh64_icache_inv_user_page(vma, eaddr);
+					eaddr += PAGE_SIZE;
+				}
+			}
+			aligned_start = vma->vm_end; /* Skip to start of next region */
+		}
+		if (mm_asid != current_asid) {
+			switch_and_save_asid(current_asid);
+			local_irq_restore(flags);
+		}
+	}
+}
+
+static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
+						unsigned long start, int len)
+{
+
+	/* Invalidate a small range of user context I-cache, not necessarily
+	   page (or even cache-line) aligned. */
+
+	unsigned long long eaddr = start;
+	unsigned long long eaddr_end = start + len;
+	unsigned long current_asid, mm_asid;
+	unsigned long long flags;
+	unsigned long long epage_start;
+
+	/* Since this is used inside ptrace, the ASID in the mm context
+	   typically won't match current_asid.  We'll have to switch ASID to do
+	   this.  For safety, and given that the range will be small, do all
+	   this under cli.
+
+	   Note, there is a hazard that the ASID in mm->context is no longer
+	   actually associated with mm, i.e. if the mm->context has started a
+	   new cycle since mm was last active.  However, this is just a
+	   performance issue: all that happens is that we invalidate lines
+	   belonging to another mm, so the owning process has to refill them
+	   when that mm goes live again.  mm itself can't have any cache
+	   entries because there will have been a flush_cache_all when the new
+	   mm->context cycle started. */
+
+	/* Align to start of cache line.  Otherwise, suppose len==8 and start
+	   was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
+	eaddr = start & L1_CACHE_ALIGN_MASK;
+	eaddr_end = start + len;
+
+	local_irq_save(flags);
+	mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
+	current_asid = switch_and_save_asid(mm_asid);
+
+	epage_start = eaddr & PAGE_MASK;
+
+	while (eaddr < eaddr_end)
+	{
+		asm __volatile__("icbi %0, 0" : : "r" (eaddr));
+		eaddr += L1_CACHE_BYTES;
+	}
+	switch_and_save_asid(current_asid);
+	local_irq_restore(flags);
+}
+
+static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
+{
+	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
+	   cache hit on the virtual tag the instruction ends there, without a
+	   TLB lookup. */
+
+	unsigned long long aligned_start;
+	unsigned long long ull_end;
+	unsigned long long addr;
+
+	ull_end = end;
+
+	/* Just invalidate over the range using the natural addresses.  TLB
+	   miss handling will be OK (TBC).  Since it's for the current process,
+	   either we're already in the right ASID context, or the ASIDs have
+	   been recycled since we were last active in which case we might just
+	   invalidate another processes I-cache entries : no worries, just a
+	   performance drop for him. */
+	aligned_start = start & L1_CACHE_ALIGN_MASK;
+	addr = aligned_start;
+	while (addr < ull_end) {
+		asm __volatile__ ("icbi %0, 0" : : "r" (addr));
+		asm __volatile__ ("nop");
+		asm __volatile__ ("nop");
+		addr += L1_CACHE_BYTES;
+	}
+}
+
+#endif /* !CONFIG_ICACHE_DISABLED */
+
+/****************************************************************************/
+
+#ifndef CONFIG_DCACHE_DISABLED
+
+/* Buffer used as the target of alloco instructions to purge data from cache
+   sets by natural eviction. -- RPC */
+#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
+static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
+
+/****************************************************************************/
+
+static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
+{
+	/* Purge all ways in a particular block of sets, specified by the base
+	   set number and number of sets.  Can handle wrap-around, if that's
+	   needed.  */
+
+	int dummy_buffer_base_set;
+	unsigned long long eaddr, eaddr0, eaddr1;
+	int j;
+	int set_offset;
+
+	dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
+	set_offset = sets_to_purge_base - dummy_buffer_base_set;
+
+	for (j=0; j<n_sets; j++, set_offset++) {
+		set_offset &= (cpu_data->dcache.sets - 1);
+		eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
+
+		/* Do one alloco which hits the required set per cache way.  For
+		   write-back mode, this will purge the #ways resident lines.   There's
+		   little point unrolling this loop because the allocos stall more if
+		   they're too close together. */
+		eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
+		for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
+			asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
+			asm __volatile__ ("synco"); /* TAKum03020 */
+		}
+
+		eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
+		for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
+			/* Load from each address.  Required because alloco is a NOP if
+			   the cache is write-through.  Write-through is a config option. */
+			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
+				*(volatile unsigned char *)(int)eaddr;
+		}
+	}
+
+	/* Don't use OCBI to invalidate the lines.  That costs cycles directly.
+	   If the dummy block is just left resident, it will naturally get
+	   evicted as required.  */
+
+	return;
+}
+
+/****************************************************************************/
+
+static void sh64_dcache_purge_all(void)
+{
+	/* Purge the entire contents of the dcache.  The most efficient way to
+	   achieve this is to use alloco instructions on a region of unused
+	   memory equal in size to the cache, thereby causing the current
+	   contents to be discarded by natural eviction.  The alternative,
+	   namely reading every tag, setting up a mapping for the corresponding
+	   page and doing an OCBP for the line, would be much more expensive.
+	   */
+
+	sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
+
+	return;
+
+}
+
+/****************************************************************************/
+
+static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
+{
+	/* Purge the range of addresses [start,end] from the D-cache.  The
+	   addresses lie in the superpage mapping.  There's no harm if we
+	   overpurge at either end - just a small performance loss. */
+	unsigned long long ullend, addr, aligned_start;
+#if (NEFF == 32)
+	aligned_start = (unsigned long long)(signed long long)(signed long) start;
+#else
+#error "NEFF != 32"
+#endif
+	aligned_start &= L1_CACHE_ALIGN_MASK;
+	addr = aligned_start;
+#if (NEFF == 32)
+	ullend = (unsigned long long) (signed long long) (signed long) end;
+#else
+#error "NEFF != 32"
+#endif
+	while (addr <= ullend) {
+		asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
+		addr += L1_CACHE_BYTES;
+	}
+	return;
+}
+
+/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
+   anything else in the kernel */
+#define MAGIC_PAGE0_START 0xffffffffec000000ULL
+
+static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
+{
+	/* Purge the physical page 'paddr' from the cache.  It's known that any
+	   cache lines requiring attention have the same page colour as the the
+	   address 'eaddr'.
+
+	   This relies on the fact that the D-cache matches on physical tags
+	   when no virtual tag matches.  So we create an alias for the original
+	   page and purge through that.  (Alternatively, we could have done
+	   this by switching ASID to match the original mapping and purged
+	   through that, but that involves ASID switching cost + probably a
+	   TLBMISS + refill anyway.)
+	   */
+
+	unsigned long long magic_page_start;
+	unsigned long long magic_eaddr, magic_eaddr_end;
+
+	magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
+
+	/* As long as the kernel is not pre-emptible, this doesn't need to be
+	   under cli/sti. */
+
+	sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
+
+	magic_eaddr = magic_page_start;
+	magic_eaddr_end = magic_eaddr + PAGE_SIZE;
+	while (magic_eaddr < magic_eaddr_end) {
+		/* Little point in unrolling this loop - the OCBPs are blocking
+		   and won't go any quicker (i.e. the loop overhead is parallel
+		   to part of the OCBP execution.) */
+		asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
+		magic_eaddr += L1_CACHE_BYTES;
+	}
+
+	sh64_teardown_dtlb_cache_slot();
+}
+
+/****************************************************************************/
+
+static void sh64_dcache_purge_phy_page(unsigned long paddr)
+{
+	/* Pure a page given its physical start address, by creating a
+	   temporary 1 page mapping and purging across that.  Even if we know
+	   the virtual address (& vma or mm) of the page, the method here is
+	   more elegant because it avoids issues of coping with page faults on
+	   the purge instructions (i.e. no special-case code required in the
+	   critical path in the TLB miss handling). */
+
+	unsigned long long eaddr_start, eaddr, eaddr_end;
+	int i;
+
+	/* As long as the kernel is not pre-emptible, this doesn't need to be
+	   under cli/sti. */
+
+	eaddr_start = MAGIC_PAGE0_START;
+	for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
+		sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
+
+		eaddr = eaddr_start;
+		eaddr_end = eaddr + PAGE_SIZE;
+		while (eaddr < eaddr_end) {
+			asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
+			eaddr += L1_CACHE_BYTES;
+		}
+
+		sh64_teardown_dtlb_cache_slot();
+		eaddr_start += PAGE_SIZE;
+	}
+}
+
+static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+	unsigned long paddr;
+
+	/* NOTE : all the callers of this have mm->page_table_lock held, so the
+	   following page table traversal is safe even on SMP/pre-emptible. */
+
+	if (!mm) return; /* No way to find physical address of page */
+	pgd = pgd_offset(mm, eaddr);
+	if (pgd_bad(*pgd)) return;
+
+	pmd = pmd_offset(pgd, eaddr);
+	if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
+
+	pte = pte_offset_kernel(pmd, eaddr);
+	entry = *pte;
+	if (pte_none(entry) || !pte_present(entry)) return;
+
+	paddr = pte_val(entry) & PAGE_MASK;
+
+	sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
+
+}
+/****************************************************************************/
+
+static void sh64_dcache_purge_user_range(struct mm_struct *mm,
+			  unsigned long start, unsigned long end)
+{
+	/* There are at least 5 choices for the implementation of this, with
+	   pros (+), cons(-), comments(*):
+
+	   1. ocbp each line in the range through the original user's ASID
+	      + no lines spuriously evicted
+	      - tlbmiss handling (must either handle faults on demand => extra
+		special-case code in tlbmiss critical path), or map the page in
+		advance (=> flush_tlb_range in advance to avoid multiple hits)
+	      - ASID switching
+	      - expensive for large ranges
+
+	   2. temporarily map each page in the range to a special effective
+	      address and ocbp through the temporary mapping; relies on the
+	      fact that SH-5 OCB* always do TLB lookup and match on ptags (they
+	      never look at the etags)
+	      + no spurious evictions
+	      - expensive for large ranges
+	      * surely cheaper than (1)
+
+	   3. walk all the lines in the cache, check the tags, if a match
+	      occurs create a page mapping to ocbp the line through
+	      + no spurious evictions
+	      - tag inspection overhead
+	      - (especially for small ranges)
+	      - potential cost of setting up/tearing down page mapping for
+		every line that matches the range
+	      * cost partly independent of range size
+
+	   4. walk all the lines in the cache, check the tags, if a match
+	      occurs use 4 * alloco to purge the line (+3 other probably
+	      innocent victims) by natural eviction
+	      + no tlb mapping overheads
+	      - spurious evictions
+	      - tag inspection overhead
+
+	   5. implement like flush_cache_all
+	      + no tag inspection overhead
+	      - spurious evictions
+	      - bad for small ranges
+
+	   (1) can be ruled out as more expensive than (2).  (2) appears best
+	   for small ranges.  The choice between (3), (4) and (5) for large
+	   ranges and the range size for the large/small boundary need
+	   benchmarking to determine.
+
+	   For now use approach (2) for small ranges and (5) for large ones.
+
+	   */
+
+	int n_pages;
+
+	n_pages = ((end - start) >> PAGE_SHIFT);
+	if (n_pages >= 64) {
+#if 1
+		sh64_dcache_purge_all();
+#else
+		unsigned long long set, way;
+		unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
+		for (set = 0; set < cpu_data->dcache.sets; set++) {
+			unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
+			for (way = 0; way < cpu_data->dcache.ways; way++) {
+				unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
+				unsigned long long tag0;
+				unsigned long line_valid;
+
+				asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
+				line_valid = tag0 & SH_CACHE_VALID;
+				if (line_valid) {
+					unsigned long cache_asid;
+					unsigned long epn;
+
+					cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
+					/* The next line needs some
+					   explanation.  The virtual tags
+					   encode bits [31:13] of the virtual
+					   address, bit [12] of the 'tag' being
+					   implied by the cache set index. */
+					epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
+
+					if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
+						/* TODO : could optimise this
+						   call by batching multiple
+						   adjacent sets together. */
+						sh64_dcache_purge_sets(set, 1);
+						break; /* Don't waste time inspecting other ways for this set */
+					}
+				}
+			}
+		}
+#endif
+	} else {
+		/* 'Small' range */
+		unsigned long aligned_start;
+		unsigned long eaddr;
+		unsigned long last_page_start;
+
+		aligned_start = start & PAGE_MASK;
+		/* 'end' is 1 byte beyond the end of the range */
+		last_page_start = (end - 1) & PAGE_MASK;
+
+		eaddr = aligned_start;
+		while (eaddr <= last_page_start) {
+			sh64_dcache_purge_user_page(mm, eaddr);
+			eaddr += PAGE_SIZE;
+		}
+	}
+	return;
+}
+
+static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
+{
+	unsigned long long aligned_start;
+	unsigned long long ull_end;
+	unsigned long long addr;
+
+	ull_end = end;
+
+	/* Just wback over the range using the natural addresses.  TLB miss
+	   handling will be OK (TBC) : the range has just been written to by
+	   the signal frame setup code, so the PTEs must exist.
+
+	   Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
+	   it doesn't matter, even if the pid->ASID mapping changes whilst
+	   we're away.  In that case the cache will have been flushed when the
+	   mapping was renewed.  So the writebacks below will be nugatory (and
+	   we'll doubtless have to fault the TLB entry/ies in again with the
+	   new ASID), but it's a rare case.
+	   */
+	aligned_start = start & L1_CACHE_ALIGN_MASK;
+	addr = aligned_start;
+	while (addr < ull_end) {
+		asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
+		addr += L1_CACHE_BYTES;
+	}
+}
+
+/****************************************************************************/
+
+/* These *MUST* lie in an area of virtual address space that's otherwise unused. */
+#define UNIQUE_EADDR_START 0xe0000000UL
+#define UNIQUE_EADDR_END   0xe8000000UL
+
+static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
+{
+	/* Given a physical address paddr, and a user virtual address
+	   user_eaddr which will eventually be mapped to it, create a one-off
+	   kernel-private eaddr mapped to the same paddr.  This is used for
+	   creating special destination pages for copy_user_page and
+	   clear_user_page */
+
+	static unsigned long current_pointer = UNIQUE_EADDR_START;
+	unsigned long coloured_pointer;
+
+	if (current_pointer == UNIQUE_EADDR_END) {
+		sh64_dcache_purge_all();
+		current_pointer = UNIQUE_EADDR_START;
+	}
+
+	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
+	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
+
+	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
+
+	return coloured_pointer;
+}
+
+/****************************************************************************/
+
+static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
+{
+	void *coloured_to;
+
+	/* Discard any existing cache entries of the wrong colour.  These are
+	   present quite often, if the kernel has recently used the page
+	   internally, then given it up, then it's been allocated to the user.
+	   */
+	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
+
+	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
+	sh64_page_copy(from, coloured_to);
+
+	sh64_teardown_dtlb_cache_slot();
+}
+
+static void sh64_clear_user_page_coloured(void *to, unsigned long address)
+{
+	void *coloured_to;
+
+	/* Discard any existing kernel-originated lines of the wrong colour (as
+	   above) */
+	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
+
+	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
+	sh64_page_clear(coloured_to);
+
+	sh64_teardown_dtlb_cache_slot();
+}
+
+#endif /* !CONFIG_DCACHE_DISABLED */
+
+/****************************************************************************/
+
+/*##########################################################################
+			    EXTERNALLY CALLABLE API.
+  ##########################################################################*/
+
+/* These functions are described in Documentation/cachetlb.txt.
+   Each one of these functions varies in behaviour depending on whether the
+   I-cache and/or D-cache are configured out.
+
+   Note that the Linux term 'flush' corresponds to what is termed 'purge' in
+   the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
+   invalidate the cache lines, and 'invalidate' for the I-cache.
+   */
+
+#undef FLUSH_TRACE
+
+void flush_cache_all(void)
+{
+	/* Invalidate the entire contents of both caches, after writing back to
+	   memory any dirty data from the D-cache. */
+	sh64_dcache_purge_all();
+	sh64_icache_inv_all();
+}
+
+/****************************************************************************/
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	/* Invalidate an entire user-address space from both caches, after
+	   writing back dirty data (e.g. for shared mmap etc). */
+
+	/* This could be coded selectively by inspecting all the tags then
+	   doing 4*alloco on any set containing a match (as for
+	   flush_cache_range), but fork/exit/execve (where this is called from)
+	   are expensive anyway. */
+
+	/* Have to do a purge here, despite the comments re I-cache below.
+	   There could be odd-coloured dirty data associated with the mm still
+	   in the cache - if this gets written out through natural eviction
+	   after the kernel has reused the page there will be chaos.
+	   */
+
+	sh64_dcache_purge_all();
+
+	/* The mm being torn down won't ever be active again, so any Icache
+	   lines tagged with its ASID won't be visible for the rest of the
+	   lifetime of this ASID cycle.  Before the ASID gets reused, there
+	   will be a flush_cache_all.  Hence we don't need to touch the
+	   I-cache.  This is similar to the lack of action needed in
+	   flush_tlb_mm - see fault.c. */
+}
+
+/****************************************************************************/
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	/* Invalidate (from both caches) the range [start,end) of virtual
+	   addresses from the user address space specified by mm, after writing
+	   back any dirty data.
+
+	   Note(1), 'end' is 1 byte beyond the end of the range to flush.
+
+	   Note(2), this is called with mm->page_table_lock held.*/
+
+	sh64_dcache_purge_user_range(mm, start, end);
+	sh64_icache_inv_user_page_range(mm, start, end);
+}
+
+/****************************************************************************/
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
+{
+	/* Invalidate any entries in either cache for the vma within the user
+	   address space vma->vm_mm for the page starting at virtual address
+	   'eaddr'.   This seems to be used primarily in breaking COW.  Note,
+	   the I-cache must be searched too in case the page in question is
+	   both writable and being executed from (e.g. stack trampolines.)
+
+	   Note(1), this is called with mm->page_table_lock held.
+	   */
+
+	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
+
+	if (vma->vm_flags & VM_EXEC) {
+		sh64_icache_inv_user_page(vma, eaddr);
+	}
+}
+
+/****************************************************************************/
+
+#ifndef CONFIG_DCACHE_DISABLED
+
+void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
+{
+	/* 'from' and 'to' are kernel virtual addresses (within the superpage
+	   mapping of the physical RAM).  'address' is the user virtual address
+	   where the copy 'to' will be mapped after.  This allows a custom
+	   mapping to be used to ensure that the new copy is placed in the
+	   right cache sets for the user to see it without having to bounce it
+	   out via memory.  Note however : the call to flush_page_to_ram in
+	   (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
+	   very important case!
+
+	   TBD : can we guarantee that on every call, any cache entries for
+	   'from' are in the same colour sets as 'address' also?  i.e. is this
+	   always used just to deal with COW?  (I suspect not). */
+
+	/* There are two possibilities here for when the page 'from' was last accessed:
+	   * by the kernel : this is OK, no purge required.
+	   * by the/a user (e.g. for break_COW) : need to purge.
+
+	   If the potential user mapping at 'address' is the same colour as
+	   'from' there is no need to purge any cache lines from the 'from'
+	   page mapped into cache sets of colour 'address'.  (The copy will be
+	   accessing the page through 'from').
+	   */
+
+	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
+		sh64_dcache_purge_coloured_phy_page(__pa(from), address);
+	}
+
+	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
+		/* No synonym problem on destination */
+		sh64_page_copy(from, to);
+	} else {
+		sh64_copy_user_page_coloured(to, from, address);
+	}
+
+	/* Note, don't need to flush 'from' page from the cache again - it's
+	   done anyway by the generic code */
+}
+
+void clear_user_page(void *to, unsigned long address, struct page *page)
+{
+	/* 'to' is a kernel virtual address (within the superpage
+	   mapping of the physical RAM).  'address' is the user virtual address
+	   where the 'to' page will be mapped after.  This allows a custom
+	   mapping to be used to ensure that the new copy is placed in the
+	   right cache sets for the user to see it without having to bounce it
+	   out via memory.
+	*/
+
+	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
+		/* No synonym problem on destination */
+		sh64_page_clear(to);
+	} else {
+		sh64_clear_user_page_coloured(to, address);
+	}
+}
+
+#endif /* !CONFIG_DCACHE_DISABLED */
+
+/****************************************************************************/
+
+void flush_dcache_page(struct page *page)
+{
+	sh64_dcache_purge_phy_page(page_to_phys(page));
+	wmb();
+}
+
+/****************************************************************************/
+
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	/* Flush the range [start,end] of kernel virtual adddress space from
+	   the I-cache.  The corresponding range must be purged from the
+	   D-cache also because the SH-5 doesn't have cache snooping between
+	   the caches.  The addresses will be visible through the superpage
+	   mapping, therefore it's guaranteed that there no cache entries for
+	   the range in cache sets of the wrong colour.
+
+	   Primarily used for cohering the I-cache after a module has
+	   been loaded.  */
+
+	/* We also make sure to purge the same range from the D-cache since
+	   flush_page_to_ram() won't be doing this for us! */
+
+	sh64_dcache_purge_kernel_range(start, end);
+	wmb();
+	sh64_icache_inv_kernel_range(start, end);
+}
+
+/****************************************************************************/
+
+void flush_icache_user_range(struct vm_area_struct *vma,
+			struct page *page, unsigned long addr, int len)
+{
+	/* Flush the range of user (defined by vma->vm_mm) address space
+	   starting at 'addr' for 'len' bytes from the cache.  The range does
+	   not straddle a page boundary, the unique physical page containing
+	   the range is 'page'.  This seems to be used mainly for invalidating
+	   an address range following a poke into the program text through the
+	   ptrace() call from another process (e.g. for BRK instruction
+	   insertion). */
+
+	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
+	mb();
+
+	if (vma->vm_flags & VM_EXEC) {
+		sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
+	}
+}
+
+/*##########################################################################
+			ARCH/SH64 PRIVATE CALLABLE API.
+  ##########################################################################*/
+
+void flush_cache_sigtramp(unsigned long start, unsigned long end)
+{
+	/* For the address range [start,end), write back the data from the
+	   D-cache and invalidate the corresponding region of the I-cache for
+	   the current process.  Used to flush signal trampolines on the stack
+	   to make them executable. */
+
+	sh64_dcache_wback_current_user_range(start, end);
+	wmb();
+	sh64_icache_inv_current_user_range(start, end);
+}
+
diff --git a/arch/sh64/mm/extable.c b/arch/sh64/mm/extable.c
new file mode 100644
index 0000000..9da50e28b
--- /dev/null
+++ b/arch/sh64/mm/extable.c
@@ -0,0 +1,81 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mm/extable.c
+ *
+ * Copyright (C) 2003 Richard Curnow
+ * Copyright (C) 2003, 2004  Paul Mundt
+ *
+ * Cloned from the 2.5 SH version..
+ */
+#include <linux/config.h>
+#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+extern unsigned long copy_user_memcpy, copy_user_memcpy_end;
+extern void __copy_user_fixup(void);
+
+static const struct exception_table_entry __copy_user_fixup_ex = {
+	.fixup = (unsigned long)&__copy_user_fixup,
+};
+
+/* Some functions that may trap due to a bad user-mode address have too many loads
+   and stores in them to make it at all practical to label each one and put them all in
+   the main exception table.
+
+   In particular, the fast memcpy routine is like this.  It's fix-up is just to fall back
+   to a slow byte-at-a-time copy, which is handled the conventional way.  So it's functionally
+   OK to just handle any trap occurring in the fast memcpy with that fixup. */
+static const struct exception_table_entry *check_exception_ranges(unsigned long addr)
+{
+	if ((addr >= (unsigned long)&copy_user_memcpy) &&
+	    (addr <= (unsigned long)&copy_user_memcpy_end))
+		return &__copy_user_fixup_ex;
+
+	return NULL;
+}
+
+/* Simple binary search */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+		 const struct exception_table_entry *last,
+		 unsigned long value)
+{
+	const struct exception_table_entry *mid;
+
+	mid = check_exception_ranges(value);
+	if (mid)
+		return mid;
+
+        while (first <= last) {
+		long diff;
+
+		mid = (last - first) / 2 + first;
+		diff = mid->insn - value;
+                if (diff == 0)
+                        return mid;
+                else if (diff < 0)
+                        first = mid+1;
+                else
+                        last = mid-1;
+        }
+
+        return NULL;
+}
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return 1;
+	}
+
+	return 0;
+}
+
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c
new file mode 100644
index 0000000..a249328
--- /dev/null
+++ b/arch/sh64/mm/fault.c
@@ -0,0 +1,601 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mm/fault.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003  Richard Curnow (/proc/tlb, bug fixes)
+ * Copyright (C) 2003  Paul Mundt
+ *
+ */
+
+#include <linux/signal.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/tlb.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/registers.h>		/* required by inline asm statements */
+
+#if defined(CONFIG_SH64_PROC_TLB)
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+/* Count numbers of tlb refills in each region */
+static unsigned long long calls_to_update_mmu_cache = 0ULL;
+static unsigned long long calls_to_flush_tlb_page   = 0ULL;
+static unsigned long long calls_to_flush_tlb_range  = 0ULL;
+static unsigned long long calls_to_flush_tlb_mm     = 0ULL;
+static unsigned long long calls_to_flush_tlb_all    = 0ULL;
+unsigned long long calls_to_do_slow_page_fault = 0ULL;
+unsigned long long calls_to_do_fast_page_fault = 0ULL;
+
+/* Count size of ranges for flush_tlb_range */
+static unsigned long long flush_tlb_range_1         = 0ULL;
+static unsigned long long flush_tlb_range_2         = 0ULL;
+static unsigned long long flush_tlb_range_3_4       = 0ULL;
+static unsigned long long flush_tlb_range_5_7       = 0ULL;
+static unsigned long long flush_tlb_range_8_11      = 0ULL;
+static unsigned long long flush_tlb_range_12_15     = 0ULL;
+static unsigned long long flush_tlb_range_16_up     = 0ULL;
+
+static unsigned long long page_not_present          = 0ULL;
+
+#endif
+
+extern void die(const char *,struct pt_regs *,long);
+
+#define PFLAG(val,flag)   (( (val) & (flag) ) ? #flag : "" )
+#define PPROT(flag) PFLAG(pgprot_val(prot),flag)
+
+static inline void print_prots(pgprot_t prot)
+{
+	printk("prot is 0x%08lx\n",pgprot_val(prot));
+
+	printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ),
+	       PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER));
+}
+
+static inline void print_vma(struct vm_area_struct *vma)
+{
+	printk("vma start 0x%08lx\n", vma->vm_start);
+	printk("vma end   0x%08lx\n", vma->vm_end);
+
+	print_prots(vma->vm_page_prot);
+	printk("vm_flags 0x%08lx\n", vma->vm_flags);
+}
+
+static inline void print_task(struct task_struct *tsk)
+{
+	printk("Task pid %d\n", tsk->pid);
+}
+
+static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
+{
+	pgd_t *dir;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	dir = pgd_offset(mm, address);
+	if (pgd_none(*dir)) {
+		return NULL;
+	}
+
+	pmd = pmd_offset(dir, address);
+	if (pmd_none(*pmd)) {
+		return NULL;
+	}
+
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+
+	if (pte_none(entry)) {
+		return NULL;
+	}
+	if (!pte_present(entry)) {
+		return NULL;
+	}
+
+	return pte;
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+			      unsigned long textaccess, unsigned long address)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	const struct exception_table_entry *fixup;
+	pte_t *pte;
+
+#if defined(CONFIG_SH64_PROC_TLB)
+        ++calls_to_do_slow_page_fault;
+#endif
+
+	/* SIM
+	 * Note this is now called with interrupts still disabled
+	 * This is to cope with being called for a missing IO port
+	 * address with interupts disabled. This should be fixed as
+	 * soon as we have a better 'fast path' miss handler.
+	 *
+	 * Plus take care how you try and debug this stuff.
+	 * For example, writing debug data to a port which you
+	 * have just faulted on is not going to work.
+	 */
+
+	tsk = current;
+	mm = tsk->mm;
+
+	/* Not an IO address, so reenable interrupts */
+	local_irq_enable();
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_interrupt() || !mm)
+		goto no_context;
+
+	/* TLB misses upon some cache flushes get done under cli() */
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+
+	if (!vma) {
+#ifdef DEBUG_FAULT
+		print_task(tsk);
+		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
+		       __FUNCTION__,__LINE__,
+		       address,regs->pc,textaccess,writeaccess);
+		show_regs(regs);
+#endif
+		goto bad_area;
+	}
+	if (vma->vm_start <= address) {
+		goto good_area;
+	}
+
+	if (!(vma->vm_flags & VM_GROWSDOWN)) {
+#ifdef DEBUG_FAULT
+		print_task(tsk);
+		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
+		       __FUNCTION__,__LINE__,
+		       address,regs->pc,textaccess,writeaccess);
+		show_regs(regs);
+
+		print_vma(vma);
+#endif
+		goto bad_area;
+	}
+	if (expand_stack(vma, address)) {
+#ifdef DEBUG_FAULT
+		print_task(tsk);
+		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
+		       __FUNCTION__,__LINE__,
+		       address,regs->pc,textaccess,writeaccess);
+		show_regs(regs);
+#endif
+		goto bad_area;
+	}
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	if (textaccess) {
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+	} else {
+		if (writeaccess) {
+			if (!(vma->vm_flags & VM_WRITE))
+				goto bad_area;
+		} else {
+			if (!(vma->vm_flags & VM_READ))
+				goto bad_area;
+		}
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+survive:
+	switch (handle_mm_fault(mm, vma, address, writeaccess)) {
+	case 1:
+		tsk->min_flt++;
+		break;
+	case 2:
+		tsk->maj_flt++;
+		break;
+	case 0:
+		goto do_sigbus;
+	default:
+		goto out_of_memory;
+	}
+	/* If we get here, the page fault has been handled.  Do the TLB refill
+	   now from the newly-setup PTE, to avoid having to fault again right
+	   away on the same instruction. */
+	pte = lookup_pte (mm, address);
+	if (!pte) {
+		/* From empirical evidence, we can get here, due to
+		   !pte_present(pte).  (e.g. if a swap-in occurs, and the page
+		   is swapped back out again before the process that wanted it
+		   gets rescheduled?) */
+		goto no_pte;
+	}
+
+	__do_tlb_refill(address, textaccess, pte);
+
+no_pte:
+
+	up_read(&mm->mmap_sem);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+#ifdef DEBUG_FAULT
+	printk("fault:bad area\n");
+#endif
+	up_read(&mm->mmap_sem);
+
+	if (user_mode(regs)) {
+		static int count=0;
+		siginfo_t info;
+		if (count < 4) {
+			/* This is really to help debug faults when starting
+			 * usermode, so only need a few */
+			count++;
+			printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
+				address, current->pid, current->comm,
+				(unsigned long) regs->pc);
+#if 0
+			show_regs(regs);
+#endif
+		}
+		if (tsk->pid == 1) {
+			panic("INIT had user mode bad_area\n");
+		}
+		tsk->thread.address = address;
+		tsk->thread.error_code = writeaccess;
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_addr = (void *) address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+no_context:
+#ifdef DEBUG_FAULT
+	printk("fault:No context\n");
+#endif
+	/* Are we prepared to handle this kernel fault?  */
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return;
+	}
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ *
+ */
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n", address);
+	printk(KERN_ALERT "pc = %08Lx%08Lx\n", regs->pc >> 32, regs->pc & 0xffffffff);
+	die("Oops", regs, writeaccess);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	if (current->pid == 1) {
+		panic("INIT out of memory\n");
+		yield();
+		goto survive;
+	}
+	printk("fault:Out of memory\n");
+	up_read(&mm->mmap_sem);
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", tsk->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	printk("fault:Do sigbus\n");
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.address = address;
+	tsk->thread.error_code = writeaccess;
+	tsk->thread.trap_no = 14;
+	force_sig(SIGBUS, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+}
+
+
+void flush_tlb_all(void);
+
+void update_mmu_cache(struct vm_area_struct * vma,
+			unsigned long address, pte_t pte)
+{
+#if defined(CONFIG_SH64_PROC_TLB)
+	++calls_to_update_mmu_cache;
+#endif
+
+	/*
+	 * This appears to get called once for every pte entry that gets
+	 * established => I don't think it's efficient to try refilling the
+	 * TLBs with the pages - some may not get accessed even.  Also, for
+	 * executable pages, it is impossible to determine reliably here which
+	 * TLB they should be mapped into (or both even).
+	 *
+	 * So, just do nothing here and handle faults on demand.  In the
+	 * TLBMISS handling case, the refill is now done anyway after the pte
+	 * has been fixed up, so that deals with most useful cases.
+	 */
+}
+
+static void __flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	unsigned long long match, pteh=0, lpage;
+	unsigned long tlb;
+	struct mm_struct *mm;
+
+	mm = vma->vm_mm;
+
+	if (mm->context == NO_CONTEXT)
+		return;
+
+	/*
+	 * Sign-extend based on neff.
+	 */
+	lpage = (page & NEFF_SIGN) ? (page | NEFF_MASK) : page;
+	match = ((mm->context & MMU_CONTEXT_ASID_MASK) << PTEH_ASID_SHIFT) | PTEH_VALID;
+	match |= lpage;
+
+        /* Do ITLB : don't bother for pages in non-exectutable VMAs */
+	if (vma->vm_flags & VM_EXEC) {
+		for_each_itlb_entry(tlb) {
+			asm volatile ("getcfg	%1, 0, %0"
+				      : "=r" (pteh)
+				      : "r" (tlb) );
+
+			if (pteh == match) {
+				__flush_tlb_slot(tlb);
+				break;
+			}
+
+		}
+	}
+
+        /* Do DTLB : any page could potentially be in here. */
+	for_each_dtlb_entry(tlb) {
+		asm volatile ("getcfg	%1, 0, %0"
+			      : "=r" (pteh)
+			      : "r" (tlb) );
+
+		if (pteh == match) {
+			__flush_tlb_slot(tlb);
+			break;
+		}
+
+	}
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	unsigned long flags;
+
+#if defined(CONFIG_SH64_PROC_TLB)
+        ++calls_to_flush_tlb_page;
+#endif
+
+	if (vma->vm_mm) {
+		page &= PAGE_MASK;
+		local_irq_save(flags);
+		__flush_tlb_page(vma, page);
+		local_irq_restore(flags);
+	}
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	unsigned long flags;
+	unsigned long long match, pteh=0, pteh_epn, pteh_low;
+	unsigned long tlb;
+	struct mm_struct *mm;
+
+	mm = vma->vm_mm;
+
+#if defined(CONFIG_SH64_PROC_TLB)
+	++calls_to_flush_tlb_range;
+
+	{
+		unsigned long size = (end - 1) - start;
+		size >>= 12; /* divide by PAGE_SIZE */
+		size++; /* end=start+4096 => 1 page */
+		switch (size) {
+		  case  1        : flush_tlb_range_1++;     break;
+		  case  2        : flush_tlb_range_2++;     break;
+		  case  3 ...  4 : flush_tlb_range_3_4++;   break;
+		  case  5 ...  7 : flush_tlb_range_5_7++;   break;
+		  case  8 ... 11 : flush_tlb_range_8_11++;  break;
+		  case 12 ... 15 : flush_tlb_range_12_15++; break;
+		  default        : flush_tlb_range_16_up++; break;
+		}
+	}
+#endif
+
+	if (mm->context == NO_CONTEXT)
+		return;
+
+	local_irq_save(flags);
+
+	start &= PAGE_MASK;
+	end &= PAGE_MASK;
+
+	match = ((mm->context & MMU_CONTEXT_ASID_MASK) << PTEH_ASID_SHIFT) | PTEH_VALID;
+
+	/* Flush ITLB */
+	for_each_itlb_entry(tlb) {
+		asm volatile ("getcfg	%1, 0, %0"
+			      : "=r" (pteh)
+			      : "r" (tlb) );
+
+		pteh_epn = pteh & PAGE_MASK;
+		pteh_low = pteh & ~PAGE_MASK;
+
+		if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
+			__flush_tlb_slot(tlb);
+	}
+
+	/* Flush DTLB */
+	for_each_dtlb_entry(tlb) {
+		asm volatile ("getcfg	%1, 0, %0"
+			      : "=r" (pteh)
+			      : "r" (tlb) );
+
+		pteh_epn = pteh & PAGE_MASK;
+		pteh_low = pteh & ~PAGE_MASK;
+
+		if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
+			__flush_tlb_slot(tlb);
+	}
+
+	local_irq_restore(flags);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	unsigned long flags;
+
+#if defined(CONFIG_SH64_PROC_TLB)
+	++calls_to_flush_tlb_mm;
+#endif
+
+	if (mm->context == NO_CONTEXT)
+		return;
+
+	local_irq_save(flags);
+
+	mm->context=NO_CONTEXT;
+	if(mm==current->mm)
+		activate_context(mm);
+
+	local_irq_restore(flags);
+
+}
+
+void flush_tlb_all(void)
+{
+	/* Invalidate all, including shared pages, excluding fixed TLBs */
+
+	unsigned long flags, tlb;
+
+#if defined(CONFIG_SH64_PROC_TLB)
+	++calls_to_flush_tlb_all;
+#endif
+
+	local_irq_save(flags);
+
+	/* Flush each ITLB entry */
+	for_each_itlb_entry(tlb) {
+		__flush_tlb_slot(tlb);
+	}
+
+	/* Flush each DTLB entry */
+	for_each_dtlb_entry(tlb) {
+		__flush_tlb_slot(tlb);
+	}
+
+	local_irq_restore(flags);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+        /* FIXME: Optimize this later.. */
+        flush_tlb_all();
+}
+
+#if defined(CONFIG_SH64_PROC_TLB)
+/* Procfs interface to read the performance information */
+
+static int
+tlb_proc_info(char *buf, char **start, off_t fpos, int length, int *eof, void *data)
+{
+  int len=0;
+  len += sprintf(buf+len, "do_fast_page_fault   called %12lld times\n", calls_to_do_fast_page_fault);
+  len += sprintf(buf+len, "do_slow_page_fault   called %12lld times\n", calls_to_do_slow_page_fault);
+  len += sprintf(buf+len, "update_mmu_cache     called %12lld times\n", calls_to_update_mmu_cache);
+  len += sprintf(buf+len, "flush_tlb_page       called %12lld times\n", calls_to_flush_tlb_page);
+  len += sprintf(buf+len, "flush_tlb_range      called %12lld times\n", calls_to_flush_tlb_range);
+  len += sprintf(buf+len, "flush_tlb_mm         called %12lld times\n", calls_to_flush_tlb_mm);
+  len += sprintf(buf+len, "flush_tlb_all        called %12lld times\n", calls_to_flush_tlb_all);
+  len += sprintf(buf+len, "flush_tlb_range_sizes\n"
+                          " 1      : %12lld\n"
+                          " 2      : %12lld\n"
+                          " 3 -  4 : %12lld\n"
+                          " 5 -  7 : %12lld\n"
+                          " 8 - 11 : %12lld\n"
+                          "12 - 15 : %12lld\n"
+                          "16+     : %12lld\n",
+                          flush_tlb_range_1, flush_tlb_range_2, flush_tlb_range_3_4,
+                          flush_tlb_range_5_7, flush_tlb_range_8_11, flush_tlb_range_12_15,
+                          flush_tlb_range_16_up);
+  len += sprintf(buf+len, "page not present           %12lld times\n", page_not_present);
+  *eof = 1;
+  return len;
+}
+
+static int __init register_proc_tlb(void)
+{
+  create_proc_read_entry("tlb", 0, NULL, tlb_proc_info, NULL);
+  return 0;
+}
+
+__initcall(register_proc_tlb);
+
+#endif
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
new file mode 100644
index 0000000..bcad2ae
--- /dev/null
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -0,0 +1,264 @@
+/*
+ * arch/sh64/mm/hugetlbpage.c
+ *
+ * SuperH HugeTLB page support.
+ *
+ * Cloned from sparc64 by Paul Mundt.
+ *
+ * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd) {
+		pmd = pmd_alloc(mm, pgd, addr);
+		if (pmd)
+			pte = pte_alloc_map(mm, pmd, addr);
+	}
+	return pte;
+}
+
+static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd) {
+		pmd = pmd_offset(pgd, addr);
+		if (pmd)
+			pte = pte_offset_map(pmd, addr);
+	}
+	return pte;
+}
+
+#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
+
+static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+			 struct page *page, pte_t * page_table, int write_access)
+{
+	unsigned long i;
+	pte_t entry;
+
+	add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
+
+	if (write_access)
+		entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
+						       vma->vm_page_prot)));
+	else
+		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
+	entry = pte_mkyoung(entry);
+	mk_pte_huge(entry);
+
+	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+		set_pte(page_table, entry);
+		page_table++;
+
+		pte_val(entry) += PAGE_SIZE;
+	}
+}
+
+/*
+ * This function checks for proper alignment of input addr and len parameters.
+ */
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	return 0;
+}
+
+int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
+			    struct vm_area_struct *vma)
+{
+	pte_t *src_pte, *dst_pte, entry;
+	struct page *ptepage;
+	unsigned long addr = vma->vm_start;
+	unsigned long end = vma->vm_end;
+	int i;
+
+	while (addr < end) {
+		dst_pte = huge_pte_alloc(dst, addr);
+		if (!dst_pte)
+			goto nomem;
+		src_pte = huge_pte_offset(src, addr);
+		BUG_ON(!src_pte || pte_none(*src_pte));
+		entry = *src_pte;
+		ptepage = pte_page(entry);
+		get_page(ptepage);
+		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+			set_pte(dst_pte, entry);
+			pte_val(entry) += PAGE_SIZE;
+			dst_pte++;
+		}
+		add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
+		addr += HPAGE_SIZE;
+	}
+	return 0;
+
+nomem:
+	return -ENOMEM;
+}
+
+int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+			struct page **pages, struct vm_area_struct **vmas,
+			unsigned long *position, int *length, int i)
+{
+	unsigned long vaddr = *position;
+	int remainder = *length;
+
+	WARN_ON(!is_vm_hugetlb_page(vma));
+
+	while (vaddr < vma->vm_end && remainder) {
+		if (pages) {
+			pte_t *pte;
+			struct page *page;
+
+			pte = huge_pte_offset(mm, vaddr);
+
+			/* hugetlb should be locked, and hence, prefaulted */
+			BUG_ON(!pte || pte_none(*pte));
+
+			page = pte_page(*pte);
+
+			WARN_ON(!PageCompound(page));
+
+			get_page(page);
+			pages[i] = page;
+		}
+
+		if (vmas)
+			vmas[i] = vma;
+
+		vaddr += PAGE_SIZE;
+		--remainder;
+		++i;
+	}
+
+	*length = remainder;
+	*position = vaddr;
+
+	return i;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm,
+			      unsigned long address, int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+			     pmd_t *pmd, int write)
+{
+	return NULL;
+}
+
+void unmap_hugepage_range(struct vm_area_struct *vma,
+			  unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t *pte;
+	struct page *page;
+	int i;
+
+	BUG_ON(start & (HPAGE_SIZE - 1));
+	BUG_ON(end & (HPAGE_SIZE - 1));
+
+	for (address = start; address < end; address += HPAGE_SIZE) {
+		pte = huge_pte_offset(mm, address);
+		BUG_ON(!pte);
+		if (pte_none(*pte))
+			continue;
+		page = pte_page(*pte);
+		put_page(page);
+		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+			pte_clear(mm, address+(i*PAGE_SIZE), pte);
+			pte++;
+		}
+	}
+	add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
+	flush_tlb_range(vma, start, end);
+}
+
+int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret = 0;
+
+	BUG_ON(vma->vm_start & ~HPAGE_MASK);
+	BUG_ON(vma->vm_end & ~HPAGE_MASK);
+
+	spin_lock(&mm->page_table_lock);
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+		unsigned long idx;
+		pte_t *pte = huge_pte_alloc(mm, addr);
+		struct page *page;
+
+		if (!pte) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		if (!pte_none(*pte))
+			continue;
+
+		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+		page = find_get_page(mapping, idx);
+		if (!page) {
+			/* charge the fs quota first */
+			if (hugetlb_get_quota(mapping)) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			page = alloc_huge_page();
+			if (!page) {
+				hugetlb_put_quota(mapping);
+				ret = -ENOMEM;
+				goto out;
+			}
+			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
+			if (! ret) {
+				unlock_page(page);
+			} else {
+				hugetlb_put_quota(mapping);
+				free_huge_page(page);
+				goto out;
+			}
+		}
+		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+	}
+out:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c
new file mode 100644
index 0000000..a65e8bb
--- /dev/null
+++ b/arch/sh64/mm/init.c
@@ -0,0 +1,196 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mm/init.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003, 2004  Paul Mundt
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/rwsem.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/bootmem.h>
+
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/blk.h>
+#endif
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+/*
+ * Cache of MMU context last used.
+ */
+unsigned long mmu_context_cache;
+pgd_t * mmu_pdtp_cache;
+int after_bootmem = 0;
+
+/*
+ * BAD_PAGE is the page that is used for page faults when linux
+ * is out-of-memory. Older versions of linux just did a
+ * do_exit(), but using this instead means there is less risk
+ * for a process dying in kernel mode, possibly leaving an inode
+ * unused etc..
+ *
+ * BAD_PAGETABLE is the accompanying page-table: it is initialized
+ * to point to BAD_PAGE entries.
+ *
+ * ZERO_PAGE is a special page that is used for zero-initialized
+ * data and COW.
+ */
+
+extern unsigned char empty_zero_page[PAGE_SIZE];
+extern unsigned char empty_bad_page[PAGE_SIZE];
+extern pte_t empty_bad_pte_table[PTRS_PER_PTE];
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+extern char _text, _etext, _edata, __bss_start, _end;
+extern char __init_begin, __init_end;
+
+/* It'd be good if these lines were in the standard header file. */
+#define START_PFN	(NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT)
+#define MAX_LOW_PFN	(NODE_DATA(0)->bdata->node_low_pfn)
+
+
+void show_mem(void)
+{
+	int i, total = 0, reserved = 0;
+	int shared = 0, cached = 0;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+	i = max_mapnr;
+	while (i-- > 0) {
+		total++;
+		if (PageReserved(mem_map+i))
+			reserved++;
+		else if (PageSwapCache(mem_map+i))
+			cached++;
+		else if (page_count(mem_map+i))
+			shared += page_count(mem_map+i) - 1;
+	}
+	printk("%d pages of RAM\n",total);
+	printk("%d reserved pages\n",reserved);
+	printk("%d pages shared\n",shared);
+	printk("%d pages swap cached\n",cached);
+	printk("%ld pages in page table cache\n",pgtable_cache_size);
+}
+
+/*
+ * paging_init() sets up the page tables.
+ *
+ * head.S already did a lot to set up address translation for the kernel.
+ * Here we comes with:
+ * . MMU enabled
+ * . ASID set (SR)
+ * .  some 512MB regions being mapped of which the most relevant here is:
+ *   . CACHED segment (ASID 0 [irrelevant], shared AND NOT user)
+ * . possible variable length regions being mapped as:
+ *   . UNCACHED segment (ASID 0 [irrelevant], shared AND NOT user)
+ * . All of the memory regions are placed, independently from the platform
+ *   on high addresses, above 0x80000000.
+ * . swapper_pg_dir is already cleared out by the .space directive
+ *   in any case swapper does not require a real page directory since
+ *   it's all kernel contained.
+ *
+ * Those pesky NULL-reference errors in the kernel are then
+ * dealt with by not mapping address 0x00000000 at all.
+ *
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+
+	pgd_init((unsigned long)swapper_pg_dir);
+	pgd_init((unsigned long)swapper_pg_dir +
+		 sizeof(pgd_t) * USER_PTRS_PER_PGD);
+
+	mmu_context_cache = MMU_CONTEXT_FIRST_VERSION;
+
+        /*
+	 * All memory is good as ZONE_NORMAL (fall-through) and ZONE_DMA.
+         */
+	zones_size[ZONE_DMA] = MAX_LOW_PFN - START_PFN;
+	NODE_DATA(0)->node_mem_map = NULL;
+	free_area_init_node(0, NODE_DATA(0), zones_size, __MEMORY_START >> PAGE_SHIFT, 0);
+}
+
+void __init mem_init(void)
+{
+	int codesize, reservedpages, datasize, initsize;
+	int tmp;
+
+	max_mapnr = num_physpages = MAX_LOW_PFN - START_PFN;
+	high_memory = (void *)__va(MAX_LOW_PFN * PAGE_SIZE);
+
+	/*
+         * Clear the zero-page.
+         * This is not required but we might want to re-use
+         * this very page to pass boot parameters, one day.
+         */
+	memset(empty_zero_page, 0, PAGE_SIZE);
+
+	/* this will put all low memory onto the freelists */
+	totalram_pages += free_all_bootmem_node(NODE_DATA(0));
+	reservedpages = 0;
+	for (tmp = 0; tmp < num_physpages; tmp++)
+		/*
+		 * Only count reserved RAM pages
+		 */
+		if (PageReserved(mem_map+tmp))
+			reservedpages++;
+
+	after_bootmem = 1;
+
+	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
+		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		max_mapnr << (PAGE_SHIFT-10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT-10),
+		datasize >> 10,
+		initsize >> 10);
+}
+
+void free_initmem(void)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)(&__init_begin);
+	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		set_page_count(virt_to_page(addr), 1);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk ("Freeing unused kernel memory: %ldk freed\n", (&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	unsigned long p;
+	for (p = start; p < end; p += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(p));
+		set_page_count(virt_to_page(p), 1);
+		free_page(p);
+		totalram_pages++;
+	}
+	printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+}
+#endif
+
diff --git a/arch/sh64/mm/ioremap.c b/arch/sh64/mm/ioremap.c
new file mode 100644
index 0000000..f4003da
--- /dev/null
+++ b/arch/sh64/mm/ioremap.c
@@ -0,0 +1,469 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mm/ioremap.c
+ *
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ * Copyright (C) 2003, 2004  Paul Mundt
+ *
+ * Mostly derived from arch/sh/mm/ioremap.c which, in turn is mostly
+ * derived from arch/i386/mm/ioremap.c .
+ *
+ *   (C) Copyright 1995 1996 Linus Torvalds
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <linux/ioport.h>
+#include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+
+static void shmedia_mapioaddr(unsigned long, unsigned long);
+static unsigned long shmedia_ioremap(struct resource *, u32, int);
+
+static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
+	unsigned long phys_addr, unsigned long flags)
+{
+	unsigned long end;
+	unsigned long pfn;
+	pgprot_t pgprot = __pgprot(_PAGE_PRESENT  | _PAGE_READ   |
+				   _PAGE_WRITE    | _PAGE_DIRTY  |
+				   _PAGE_ACCESSED | _PAGE_SHARED | flags);
+
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	if (address >= end)
+		BUG();
+
+	pfn = phys_addr >> PAGE_SHIFT;
+
+	pr_debug("    %s: pte %p address %lx size %lx phys_addr %lx\n",
+		 __FUNCTION__,pte,address,size,phys_addr);
+
+	do {
+		if (!pte_none(*pte)) {
+			printk("remap_area_pte: page already exists\n");
+			BUG();
+		}
+
+		set_pte(pte, pfn_pte(pfn, pgprot));
+		address += PAGE_SIZE;
+		pfn++;
+		pte++;
+	} while (address && (address < end));
+}
+
+static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
+	unsigned long phys_addr, unsigned long flags)
+{
+	unsigned long end;
+
+	address &= ~PGDIR_MASK;
+	end = address + size;
+
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+
+	phys_addr -= address;
+
+	if (address >= end)
+		BUG();
+
+	do {
+		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		if (!pte)
+			return -ENOMEM;
+		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+static int remap_area_pages(unsigned long address, unsigned long phys_addr,
+				 unsigned long size, unsigned long flags)
+{
+	int error;
+	pgd_t * dir;
+	unsigned long end = address + size;
+
+	phys_addr -= address;
+	dir = pgd_offset_k(address);
+	flush_cache_all();
+	if (address >= end)
+		BUG();
+	spin_lock(&init_mm.page_table_lock);
+	do {
+		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
+		error = -ENOMEM;
+		if (!pmd)
+			break;
+		if (remap_area_pmd(pmd, address, end - address,
+				   phys_addr + address, flags)) {
+			 break;
+		}
+		error = 0;
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+	spin_unlock(&init_mm.page_table_lock);
+	flush_tlb_all();
+	return 0;
+}
+
+/*
+ * Generic mapping function (not visible outside):
+ */
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+	void * addr;
+	struct vm_struct * area;
+	unsigned long offset, last_addr;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	pr_debug("Get vm_area returns %p addr %p\n",area,area->addr);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	addr = area->addr;
+	if (remap_area_pages((unsigned long)addr, phys_addr, size, flags)) {
+		vunmap(addr);
+		return NULL;
+	}
+	return (void *) (offset + (char *)addr);
+}
+
+void iounmap(void *addr)
+{
+	struct vm_struct *area;
+
+	vfree((void *) (PAGE_MASK & (unsigned long) addr));
+	area = remove_vm_area((void *) (PAGE_MASK & (unsigned long) addr));
+	if (!area) {
+		printk(KERN_ERR "iounmap: bad address %p\n", addr);
+		return;
+	}
+
+	kfree(area);
+}
+
+static struct resource shmedia_iomap = {
+	.name	= "shmedia_iomap",
+	.start	= IOBASE_VADDR + PAGE_SIZE,
+	.end	= IOBASE_END - 1,
+};
+
+static void shmedia_mapioaddr(unsigned long pa, unsigned long va);
+static void shmedia_unmapioaddr(unsigned long vaddr);
+static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz);
+
+/*
+ * We have the same problem as the SPARC, so lets have the same comment:
+ * Our mini-allocator...
+ * Boy this is gross! We need it because we must map I/O for
+ * timers and interrupt controller before the kmalloc is available.
+ */
+
+#define XNMLN  15
+#define XNRES  10
+
+struct xresource {
+	struct resource xres;   /* Must be first */
+	int xflag;              /* 1 == used */
+	char xname[XNMLN+1];
+};
+
+static struct xresource xresv[XNRES];
+
+static struct xresource *xres_alloc(void)
+{
+        struct xresource *xrp;
+        int n;
+
+        xrp = xresv;
+        for (n = 0; n < XNRES; n++) {
+                if (xrp->xflag == 0) {
+                        xrp->xflag = 1;
+                        return xrp;
+                }
+                xrp++;
+        }
+        return NULL;
+}
+
+static void xres_free(struct xresource *xrp)
+{
+	xrp->xflag = 0;
+}
+
+static struct resource *shmedia_find_resource(struct resource *root,
+					      unsigned long vaddr)
+{
+	struct resource *res;
+
+	for (res = root->child; res; res = res->sibling)
+		if (res->start <= vaddr && res->end >= vaddr)
+			return res;
+
+	return NULL;
+}
+
+static unsigned long shmedia_alloc_io(unsigned long phys, unsigned long size,
+				      const char *name)
+{
+        static int printed_full = 0;
+        struct xresource *xres;
+        struct resource *res;
+        char *tack;
+        int tlen;
+
+        if (name == NULL) name = "???";
+
+        if ((xres = xres_alloc()) != 0) {
+                tack = xres->xname;
+                res = &xres->xres;
+        } else {
+                if (!printed_full) {
+                        printk("%s: done with statics, switching to kmalloc\n",
+			       __FUNCTION__);
+                        printed_full = 1;
+                }
+                tlen = strlen(name);
+                tack = kmalloc(sizeof (struct resource) + tlen + 1, GFP_KERNEL);
+                if (!tack)
+			return -ENOMEM;
+                memset(tack, 0, sizeof(struct resource));
+                res = (struct resource *) tack;
+                tack += sizeof (struct resource);
+        }
+
+        strncpy(tack, name, XNMLN);
+        tack[XNMLN] = 0;
+        res->name = tack;
+
+        return shmedia_ioremap(res, phys, size);
+}
+
+static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz)
+{
+        unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
+	unsigned long round_sz = (offset + sz + PAGE_SIZE-1) & PAGE_MASK;
+        unsigned long va;
+        unsigned int psz;
+
+        if (allocate_resource(&shmedia_iomap, res, round_sz,
+			      shmedia_iomap.start, shmedia_iomap.end,
+			      PAGE_SIZE, NULL, NULL) != 0) {
+                panic("alloc_io_res(%s): cannot occupy\n",
+                    (res->name != NULL)? res->name: "???");
+        }
+
+        va = res->start;
+        pa &= PAGE_MASK;
+
+	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
+
+	/* log at boot time ... */
+	printk("mapioaddr: %6s  [%2d page%s]  va 0x%08lx   pa 0x%08x\n",
+	       ((res->name != NULL) ? res->name : "???"),
+	       psz, psz == 1 ? " " : "s", va, pa);
+
+        for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
+                shmedia_mapioaddr(pa, va);
+                va += PAGE_SIZE;
+                pa += PAGE_SIZE;
+        }
+
+        res->start += offset;
+        res->end = res->start + sz - 1;         /* not strictly necessary.. */
+
+        return res->start;
+}
+
+static void shmedia_free_io(struct resource *res)
+{
+	unsigned long len = res->end - res->start + 1;
+
+	BUG_ON((len & (PAGE_SIZE - 1)) != 0);
+
+	while (len) {
+		len -= PAGE_SIZE;
+		shmedia_unmapioaddr(res->start + len);
+	}
+
+	release_resource(res);
+}
+
+static void *sh64_get_page(void)
+{
+	extern int after_bootmem;
+	void *page;
+
+	if (after_bootmem) {
+		page = (void *)get_zeroed_page(GFP_ATOMIC);
+	} else {
+		page = alloc_bootmem_pages(PAGE_SIZE);
+	}
+
+	if (!page || ((unsigned long)page & ~PAGE_MASK))
+		panic("sh64_get_page: Out of memory already?\n");
+
+	return page;
+}
+
+static void shmedia_mapioaddr(unsigned long pa, unsigned long va)
+{
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep, pte;
+	pgprot_t prot;
+	unsigned long flags = 1; /* 1 = CB0-1 device */
+
+	pr_debug("shmedia_mapiopage pa %08lx va %08lx\n",  pa, va);
+
+	pgdp = pgd_offset_k(va);
+	if (pgd_none(*pgdp) || !pgd_present(*pgdp)) {
+		pmdp = (pmd_t *)sh64_get_page();
+		set_pgd(pgdp, __pgd((unsigned long)pmdp | _KERNPG_TABLE));
+	}
+
+	pmdp = pmd_offset(pgdp, va);
+	if (pmd_none(*pmdp) || !pmd_present(*pmdp) ) {
+		ptep = (pte_t *)sh64_get_page();
+		set_pmd(pmdp, __pmd((unsigned long)ptep + _PAGE_TABLE));
+	}
+
+	prot = __pgprot(_PAGE_PRESENT | _PAGE_READ     | _PAGE_WRITE  |
+			_PAGE_DIRTY   | _PAGE_ACCESSED | _PAGE_SHARED | flags);
+
+	pte = pfn_pte(pa >> PAGE_SHIFT, prot);
+	ptep = pte_offset_kernel(pmdp, va);
+
+	if (!pte_none(*ptep) &&
+	    pte_val(*ptep) != pte_val(pte))
+		pte_ERROR(*ptep);
+
+	set_pte(ptep, pte);
+
+	flush_tlb_kernel_range(va, PAGE_SIZE);
+}
+
+static void shmedia_unmapioaddr(unsigned long vaddr)
+{
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pgdp = pgd_offset_k(vaddr);
+	pmdp = pmd_offset(pgdp, vaddr);
+
+	if (pmd_none(*pmdp) || pmd_bad(*pmdp))
+		return;
+
+	ptep = pte_offset_kernel(pmdp, vaddr);
+
+	if (pte_none(*ptep) || !pte_present(*ptep))
+		return;
+
+	clear_page((void *)ptep);
+	pte_clear(&init_mm, vaddr, ptep);
+}
+
+unsigned long onchip_remap(unsigned long phys, unsigned long size, const char *name)
+{
+	if (size < PAGE_SIZE)
+		size = PAGE_SIZE;
+
+	return shmedia_alloc_io(phys, size, name);
+}
+
+void onchip_unmap(unsigned long vaddr)
+{
+	struct resource *res;
+	unsigned int psz;
+
+	res = shmedia_find_resource(&shmedia_iomap, vaddr);
+	if (!res) {
+		printk(KERN_ERR "%s: Failed to free 0x%08lx\n",
+		       __FUNCTION__, vaddr);
+		return;
+	}
+
+        psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
+
+        printk(KERN_DEBUG "unmapioaddr: %6s  [%2d page%s] freed\n",
+	       res->name, psz, psz == 1 ? " " : "s");
+
+	shmedia_free_io(res);
+
+	if ((char *)res >= (char *)xresv &&
+	    (char *)res <  (char *)&xresv[XNRES]) {
+		xres_free((struct xresource *)res);
+	} else {
+		kfree(res);
+	}
+}
+
+#ifdef CONFIG_PROC_FS
+static int
+ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
+		  void *data)
+{
+	char *p = buf, *e = buf + length;
+	struct resource *r;
+	const char *nm;
+
+	for (r = ((struct resource *)data)->child; r != NULL; r = r->sibling) {
+		if (p + 32 >= e)        /* Better than nothing */
+			break;
+		if ((nm = r->name) == 0) nm = "???";
+		p += sprintf(p, "%08lx-%08lx: %s\n", r->start, r->end, nm);
+	}
+
+	return p-buf;
+}
+#endif /* CONFIG_PROC_FS */
+
+static int __init register_proc_onchip(void)
+{
+#ifdef CONFIG_PROC_FS
+	create_proc_read_entry("io_map",0,0, ioremap_proc_info, &shmedia_iomap);
+#endif
+	return 0;
+}
+
+__initcall(register_proc_onchip);
diff --git a/arch/sh64/mm/tlb.c b/arch/sh64/mm/tlb.c
new file mode 100644
index 0000000..d517e7d
--- /dev/null
+++ b/arch/sh64/mm/tlb.c
@@ -0,0 +1,166 @@
+/*
+ * arch/sh64/mm/tlb.c
+ *
+ * Copyright (C) 2003  Paul Mundt <lethal@linux-sh.org>
+ * Copyright (C) 2003  Richard Curnow <richard.curnow@superh.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <asm/page.h>
+#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+
+/**
+ * sh64_tlb_init
+ *
+ * Perform initial setup for the DTLB and ITLB.
+ */
+int __init sh64_tlb_init(void)
+{
+	/* Assign some sane DTLB defaults */
+	cpu_data->dtlb.entries	= 64;
+	cpu_data->dtlb.step	= 0x10;
+
+	cpu_data->dtlb.first	= DTLB_FIXED | cpu_data->dtlb.step;
+	cpu_data->dtlb.next	= cpu_data->dtlb.first;
+
+	cpu_data->dtlb.last	= DTLB_FIXED |
+				  ((cpu_data->dtlb.entries - 1) *
+				   cpu_data->dtlb.step);
+
+	/* And again for the ITLB */
+	cpu_data->itlb.entries	= 64;
+	cpu_data->itlb.step	= 0x10;
+
+	cpu_data->itlb.first	= ITLB_FIXED | cpu_data->itlb.step;
+	cpu_data->itlb.next	= cpu_data->itlb.first;
+	cpu_data->itlb.last	= ITLB_FIXED |
+				  ((cpu_data->itlb.entries - 1) *
+				   cpu_data->itlb.step);
+
+	return 0;
+}
+
+/**
+ * sh64_next_free_dtlb_entry
+ *
+ * Find the next available DTLB entry
+ */
+unsigned long long sh64_next_free_dtlb_entry(void)
+{
+	return cpu_data->dtlb.next;
+}
+
+/**
+ * sh64_get_wired_dtlb_entry
+ *
+ * Allocate a wired (locked-in) entry in the DTLB
+ */
+unsigned long long sh64_get_wired_dtlb_entry(void)
+{
+	unsigned long long entry = sh64_next_free_dtlb_entry();
+
+	cpu_data->dtlb.first += cpu_data->dtlb.step;
+	cpu_data->dtlb.next  += cpu_data->dtlb.step;
+
+	return entry;
+}
+
+/**
+ * sh64_put_wired_dtlb_entry
+ *
+ * @entry:	Address of TLB slot.
+ *
+ * Free a wired (locked-in) entry in the DTLB.
+ *
+ * Works like a stack, last one to allocate must be first one to free.
+ */
+int sh64_put_wired_dtlb_entry(unsigned long long entry)
+{
+	__flush_tlb_slot(entry);
+
+	/*
+	 * We don't do any particularly useful tracking of wired entries,
+	 * so this approach works like a stack .. last one to be allocated
+	 * has to be the first one to be freed.
+	 *
+	 * We could potentially load wired entries into a list and work on
+	 * rebalancing the list periodically (which also entails moving the
+	 * contents of a TLB entry) .. though I have a feeling that this is
+	 * more trouble than it's worth.
+	 */
+
+	/*
+	 * Entry must be valid .. we don't want any ITLB addresses!
+	 */
+	if (entry <= DTLB_FIXED)
+		return -EINVAL;
+
+	/*
+	 * Next, check if we're within range to be freed. (ie, must be the
+	 * entry beneath the first 'free' entry!
+	 */
+	if (entry < (cpu_data->dtlb.first - cpu_data->dtlb.step))
+		return -EINVAL;
+
+	/* If we are, then bring this entry back into the list */
+	cpu_data->dtlb.first	-= cpu_data->dtlb.step;
+	cpu_data->dtlb.next	= entry;
+
+	return 0;
+}
+
+/**
+ * sh64_setup_tlb_slot
+ *
+ * @config_addr:	Address of TLB slot.
+ * @eaddr:		Virtual address.
+ * @asid:		Address Space Identifier.
+ * @paddr:		Physical address.
+ *
+ * Load up a virtual<->physical translation for @eaddr<->@paddr in the
+ * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry).
+ */
+inline void sh64_setup_tlb_slot(unsigned long long config_addr,
+				unsigned long eaddr,
+				unsigned long asid,
+				unsigned long paddr)
+{
+	unsigned long long pteh, ptel;
+
+	/* Sign extension */
+#if (NEFF == 32)
+	pteh = (unsigned long long)(signed long long)(signed long) eaddr;
+#else
+#error "Can't sign extend more than 32 bits yet"
+#endif
+	pteh &= PAGE_MASK;
+	pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
+#if (NEFF == 32)
+	ptel = (unsigned long long)(signed long long)(signed long) paddr;
+#else
+#error "Can't sign extend more than 32 bits yet"
+#endif
+	ptel &= PAGE_MASK;
+	ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE);
+
+	asm volatile("putcfg %0, 1, %1\n\t"
+			"putcfg %0, 0, %2\n"
+			: : "r" (config_addr), "r" (ptel), "r" (pteh));
+}
+
+/**
+ * sh64_teardown_tlb_slot
+ *
+ * @config_addr:	Address of TLB slot.
+ *
+ * Teardown any existing mapping in the TLB slot @config_addr.
+ */
+inline void sh64_teardown_tlb_slot(unsigned long long config_addr)
+	__attribute__ ((alias("__flush_tlb_slot")));
+
diff --git a/arch/sh64/mm/tlbmiss.c b/arch/sh64/mm/tlbmiss.c
new file mode 100644
index 0000000..c861595
--- /dev/null
+++ b/arch/sh64/mm/tlbmiss.c
@@ -0,0 +1,280 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * arch/sh64/mm/tlbmiss.c
+ *
+ * Original code from fault.c
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * Fast PTE->TLB refill path
+ * Copyright (C) 2003 Richard.Curnow@superh.com
+ *
+ * IMPORTANT NOTES :
+ * The do_fast_page_fault function is called from a context in entry.S where very few registers
+ * have been saved.  In particular, the code in this file must be compiled not to use ANY
+ * caller-save regiseters that are not part of the restricted save set.  Also, it means that
+ * code in this file must not make calls to functions elsewhere in the kernel, or else the
+ * excepting context will see corruption in its caller-save registers.  Plus, the entry.S save
+ * area is non-reentrant, so this code has to run with SR.BL==1, i.e. no interrupts taken inside
+ * it and panic on any exception.
+ *
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/tlb.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/registers.h>		/* required by inline asm statements */
+
+/* Callable from fault.c, so not static */
+inline void __do_tlb_refill(unsigned long address,
+                            unsigned long long is_text_not_data, pte_t *pte)
+{
+	unsigned long long ptel;
+	unsigned long long pteh=0;
+	struct tlb_info *tlbp;
+	unsigned long long next;
+
+	/* Get PTEL first */
+	ptel = pte_val(*pte);
+
+	/*
+	 * Set PTEH register
+	 */
+	pteh = address & MMU_VPN_MASK;
+
+	/* Sign extend based on neff. */
+#if (NEFF == 32)
+	/* Faster sign extension */
+	pteh = (unsigned long long)(signed long long)(signed long)pteh;
+#else
+	/* General case */
+	pteh = (pteh & NEFF_SIGN) ? (pteh | NEFF_MASK) : pteh;
+#endif
+
+	/* Set the ASID. */
+	pteh |= get_asid() << PTEH_ASID_SHIFT;
+	pteh |= PTEH_VALID;
+
+	/* Set PTEL register, set_pte has performed the sign extension */
+	ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+
+	tlbp = is_text_not_data ? &(cpu_data->itlb) : &(cpu_data->dtlb);
+	next = tlbp->next;
+	__flush_tlb_slot(next);
+	asm volatile ("putcfg %0,1,%2\n\n\t"
+		      "putcfg %0,0,%1\n"
+		      :  : "r" (next), "r" (pteh), "r" (ptel) );
+
+	next += TLB_STEP;
+	if (next > tlbp->last) next = tlbp->first;
+	tlbp->next = next;
+
+}
+
+static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long protection_flags,
+                                unsigned long long textaccess,
+				unsigned long address)
+{
+	pgd_t *dir;
+	pmd_t *pmd;
+	static pte_t *pte;
+	pte_t entry;
+
+	dir = pgd_offset_k(address);
+	pmd = pmd_offset(dir, address);
+
+	if (pmd_none(*pmd)) {
+		return 0;
+	}
+
+	if (pmd_bad(*pmd)) {
+		pmd_clear(pmd);
+		return 0;
+	}
+
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+
+	if (pte_none(entry) || !pte_present(entry)) {
+		return 0;
+	}
+
+	if ((pte_val(entry) & protection_flags) != protection_flags) {
+		return 0;
+	}
+
+        __do_tlb_refill(address, textaccess, pte);
+
+	return 1;
+}
+
+static int handle_tlbmiss(struct mm_struct *mm, unsigned long long protection_flags,
+			unsigned long long textaccess,
+			unsigned long address)
+{
+	pgd_t *dir;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	/* NB. The PGD currently only contains a single entry - there is no
+	   page table tree stored for the top half of the address space since
+	   virtual pages in that region should never be mapped in user mode.
+	   (In kernel mode, the only things in that region are the 512Mb super
+	   page (locked in), and vmalloc (modules) +  I/O device pages (handled
+	   by handle_vmalloc_fault), so no PGD for the upper half is required
+	   by kernel mode either).
+
+	   See how mm->pgd is allocated and initialised in pgd_alloc to see why
+	   the next test is necessary.  - RPC */
+	if (address >= (unsigned long) TASK_SIZE) {
+		/* upper half - never has page table entries. */
+		return 0;
+	}
+	dir = pgd_offset(mm, address);
+	if (pgd_none(*dir)) {
+		return 0;
+	}
+	if (!pgd_present(*dir)) {
+		return 0;
+	}
+
+	pmd = pmd_offset(dir, address);
+	if (pmd_none(*pmd)) {
+		return 0;
+	}
+	if (!pmd_present(*pmd)) {
+		return 0;
+	}
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+	if (pte_none(entry)) {
+		return 0;
+	}
+	if (!pte_present(entry)) {
+		return 0;
+	}
+
+	/* If the page doesn't have sufficient protection bits set to service the
+	   kind of fault being handled, there's not much point doing the TLB refill.
+	   Punt the fault to the general handler. */
+	if ((pte_val(entry) & protection_flags) != protection_flags) {
+		return 0;
+	}
+
+        __do_tlb_refill(address, textaccess, pte);
+
+	return 1;
+}
+
+/* Put all this information into one structure so that everything is just arithmetic
+   relative to a single base address.  This reduces the number of movi/shori pairs needed
+   just to load addresses of static data. */
+struct expevt_lookup {
+	unsigned short protection_flags[8];
+	unsigned char  is_text_access[8];
+	unsigned char  is_write_access[8];
+};
+
+#define PRU (1<<9)
+#define PRW (1<<8)
+#define PRX (1<<7)
+#define PRR (1<<6)
+
+#define DIRTY (_PAGE_DIRTY | _PAGE_ACCESSED)
+#define YOUNG (_PAGE_ACCESSED)
+
+/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether
+   the fault happened in user mode or privileged mode. */
+static struct expevt_lookup expevt_lookup_table = {
+	.protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW},
+	.is_text_access   = {1,   1,   0, 0, 0,   0,   0,   0}
+};
+
+/*
+   This routine handles page faults that can be serviced just by refilling a
+   TLB entry from an existing page table entry.  (This case represents a very
+   large majority of page faults.) Return 1 if the fault was successfully
+   handled.  Return 0 if the fault could not be handled.  (This leads into the
+   general fault handling in fault.c which deals with mapping file-backed
+   pages, stack growth, segmentation faults, swapping etc etc)
+ */
+asmlinkage int do_fast_page_fault(unsigned long long ssr_md, unsigned long long expevt,
+			          unsigned long address)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	unsigned long long textaccess;
+	unsigned long long protection_flags;
+	unsigned long long index;
+	unsigned long long expevt4;
+
+	/* The next few lines implement a way of hashing EXPEVT into a small array index
+	   which can be used to lookup parameters specific to the type of TLBMISS being
+	   handled.  Note:
+	   ITLBMISS has EXPEVT==0xa40
+	   RTLBMISS has EXPEVT==0x040
+	   WTLBMISS has EXPEVT==0x060
+	*/
+
+	expevt4 = (expevt >> 4);
+	/* TODO : xor ssr_md into this expression too.  Then we can check that PRU is set
+	   when it needs to be. */
+	index = expevt4 ^ (expevt4 >> 5);
+	index &= 7;
+	protection_flags = expevt_lookup_table.protection_flags[index];
+	textaccess       = expevt_lookup_table.is_text_access[index];
+
+#ifdef CONFIG_SH64_PROC_TLB
+	++calls_to_do_fast_page_fault;
+#endif
+
+	/* SIM
+	 * Note this is now called with interrupts still disabled
+	 * This is to cope with being called for a missing IO port
+	 * address with interupts disabled. This should be fixed as
+	 * soon as we have a better 'fast path' miss handler.
+	 *
+	 * Plus take care how you try and debug this stuff.
+	 * For example, writing debug data to a port which you
+	 * have just faulted on is not going to work.
+	 */
+
+	tsk = current;
+	mm = tsk->mm;
+
+	if ((address >= VMALLOC_START && address < VMALLOC_END) ||
+	    (address >= IOBASE_VADDR  && address < IOBASE_END)) {
+		if (ssr_md) {
+			/* Process-contexts can never have this address range mapped */
+			if (handle_vmalloc_fault(mm, protection_flags, textaccess, address)) {
+				return 1;
+			}
+		}
+	} else if (!in_interrupt() && mm) {
+		if (handle_tlbmiss(mm, protection_flags, textaccess, address)) {
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
diff --git a/arch/sh64/oprofile/Kconfig b/arch/sh64/oprofile/Kconfig
new file mode 100644
index 0000000..19d3773
--- /dev/null
+++ b/arch/sh64/oprofile/Kconfig
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+	depends on EXPERIMENTAL
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+endmenu
+
diff --git a/arch/sh64/oprofile/Makefile b/arch/sh64/oprofile/Makefile
new file mode 100644
index 0000000..11a451f
--- /dev/null
+++ b/arch/sh64/oprofile/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o \
+		timer_int.o )
+
+profdrvr-y				:= op_model_null.o
+
+oprofile-y				:= $(DRIVER_OBJS) $(profdrvr-y)
+
diff --git a/arch/sh64/oprofile/op_model_null.c b/arch/sh64/oprofile/op_model_null.c
new file mode 100644
index 0000000..a845b08
--- /dev/null
+++ b/arch/sh64/oprofile/op_model_null.c
@@ -0,0 +1,23 @@
+/*
+ * arch/sh/oprofile/op_model_null.c
+ *
+ * Copyright (C) 2003  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	return -ENODEV;
+}
+
+void oprofile_arch_exit(void)
+{
+}
+