Merge branch 'next' of git://git.monstr.eu/linux-2.6-microblaze

* 'next' of git://git.monstr.eu/linux-2.6-microblaze:
  microblaze: remove obsolete DEBUG_BOOTMEM
  microblaze: trivial: Fix removed the part of line
  microblaze: Fix __muldi3 function for little-endian.
  microblaze: Clear i/dcache for sw breakpoints
  microblaze: Remove useless early_init_dt_check_for_initrd
  microblaze: Fix unaligned exception for little endian platform
  microblaze: Add PVR for Microblaze v8.00.b
  microblaze: Correct PVR access macros
  Revert "microblaze: Simplify syscall rutine"
  microblaze: Fix initramfs
  arch/microblaze: Remove unnecessary semicolons
diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug
index e66e25c..012e377 100644
--- a/arch/microblaze/Kconfig.debug
+++ b/arch/microblaze/Kconfig.debug
@@ -23,8 +23,4 @@
 	  This option turns on/off heart beat kernel functionality.
 	  First GPIO node is taken.
 
-config DEBUG_BOOTMEM
-	depends on DEBUG_KERNEL
-	bool "Debug BOOTMEM initialization"
-
 endmenu
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 15f1f1d..6f432e6 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -17,7 +17,7 @@
 # The various CONFIG_XILINX cpu features options are integers 0/1/2...
 # rather than bools y/n
 
-# Work out HW multipler support.  This is icky.
+# Work out HW multipler support. This is tricky.
 # 1. Spartan2 has no HW multiplers.
 # 2. MicroBlaze v3.x always uses them, except in Spartan 2
 # 3. All other FPGa/CPU ver combos, we can trust the CONFIG_ settings
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 8b422b1..ab8fbe7 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -66,5 +66,4 @@
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_EARLY_PRINTK=y
-CONFIG_DEBUG_BOOTMEM=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/microblaze/include/asm/pvr.h b/arch/microblaze/include/asm/pvr.h
index 37db96a..a10bec6 100644
--- a/arch/microblaze/include/asm/pvr.h
+++ b/arch/microblaze/include/asm/pvr.h
@@ -1,9 +1,9 @@
 /*
  * Support for the MicroBlaze PVR (Processor Version Register)
  *
- * Copyright (C) 2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2009 - 2011 Michal Simek <monstr@monstr.eu>
  * Copyright (C) 2007 John Williams <john.williams@petalogix.com>
- * Copyright (C) 2007 - 2009 PetaLogix
+ * Copyright (C) 2007 - 2011 PetaLogix
  *
  * This file is subject to the terms and conditions of the GNU General
  * Public License. See the file COPYING in the main directory of this
@@ -46,11 +46,11 @@
 #define PVR2_I_LMB_MASK			0x10000000
 #define PVR2_INTERRUPT_IS_EDGE_MASK	0x08000000
 #define PVR2_EDGE_IS_POSITIVE_MASK	0x04000000
-#define PVR2_D_PLB_MASK			0x02000000	/* new */
-#define PVR2_I_PLB_MASK			0x01000000	/* new */
-#define PVR2_INTERCONNECT		0x00800000	/* new */
-#define PVR2_USE_EXTEND_FSL		0x00080000	/* new */
-#define PVR2_USE_FSL_EXC		0x00040000	/* new */
+#define PVR2_D_PLB_MASK			0x02000000 /* new */
+#define PVR2_I_PLB_MASK			0x01000000 /* new */
+#define PVR2_INTERCONNECT		0x00800000 /* new */
+#define PVR2_USE_EXTEND_FSL		0x00080000 /* new */
+#define PVR2_USE_FSL_EXC		0x00040000 /* new */
 #define PVR2_USE_MSR_INSTR		0x00020000
 #define PVR2_USE_PCMP_INSTR		0x00010000
 #define PVR2_AREA_OPTIMISED		0x00008000
@@ -59,7 +59,7 @@
 #define PVR2_USE_HW_MUL_MASK		0x00001000
 #define PVR2_USE_FPU_MASK		0x00000800
 #define PVR2_USE_MUL64_MASK		0x00000400
-#define PVR2_USE_FPU2_MASK		0x00000200	/* new */
+#define PVR2_USE_FPU2_MASK		0x00000200 /* new */
 #define PVR2_USE_IPLBEXC 		0x00000100
 #define PVR2_USE_DPLBEXC		0x00000080
 #define PVR2_OPCODE_0x0_ILL_MASK	0x00000040
@@ -122,96 +122,103 @@
 
 
 /* PVR access macros */
-#define PVR_IS_FULL(pvr)		(pvr.pvr[0] & PVR0_PVR_FULL_MASK)
-#define PVR_USE_BARREL(pvr)		(pvr.pvr[0] & PVR0_USE_BARREL_MASK)
-#define PVR_USE_DIV(pvr)		(pvr.pvr[0] & PVR0_USE_DIV_MASK)
-#define PVR_USE_HW_MUL(pvr)		(pvr.pvr[0] & PVR0_USE_HW_MUL_MASK)
-#define PVR_USE_FPU(pvr)		(pvr.pvr[0] & PVR0_USE_FPU_MASK)
-#define PVR_USE_FPU2(pvr)		(pvr.pvr[2] & PVR2_USE_FPU2_MASK)
-#define PVR_USE_ICACHE(pvr)		(pvr.pvr[0] & PVR0_USE_ICACHE_MASK)
-#define PVR_USE_DCACHE(pvr)		(pvr.pvr[0] & PVR0_USE_DCACHE_MASK)
-#define PVR_VERSION(pvr)	((pvr.pvr[0] & PVR0_VERSION_MASK) >> 8)
-#define PVR_USER1(pvr)			(pvr.pvr[0] & PVR0_USER1_MASK)
-#define PVR_USER2(pvr)			(pvr.pvr[1] & PVR1_USER2_MASK)
+#define PVR_IS_FULL(_pvr)	(_pvr.pvr[0] & PVR0_PVR_FULL_MASK)
+#define PVR_USE_BARREL(_pvr)	(_pvr.pvr[0] & PVR0_USE_BARREL_MASK)
+#define PVR_USE_DIV(_pvr)	(_pvr.pvr[0] & PVR0_USE_DIV_MASK)
+#define PVR_USE_HW_MUL(_pvr)	(_pvr.pvr[0] & PVR0_USE_HW_MUL_MASK)
+#define PVR_USE_FPU(_pvr)	(_pvr.pvr[0] & PVR0_USE_FPU_MASK)
+#define PVR_USE_FPU2(_pvr)	(_pvr.pvr[2] & PVR2_USE_FPU2_MASK)
+#define PVR_USE_ICACHE(_pvr)	(_pvr.pvr[0] & PVR0_USE_ICACHE_MASK)
+#define PVR_USE_DCACHE(_pvr)	(_pvr.pvr[0] & PVR0_USE_DCACHE_MASK)
+#define PVR_VERSION(_pvr)	((_pvr.pvr[0] & PVR0_VERSION_MASK) >> 8)
+#define PVR_USER1(_pvr)		(_pvr.pvr[0] & PVR0_USER1_MASK)
+#define PVR_USER2(_pvr)		(_pvr.pvr[1] & PVR1_USER2_MASK)
 
-#define PVR_D_OPB(pvr)			(pvr.pvr[2] & PVR2_D_OPB_MASK)
-#define PVR_D_LMB(pvr)			(pvr.pvr[2] & PVR2_D_LMB_MASK)
-#define PVR_I_OPB(pvr)			(pvr.pvr[2] & PVR2_I_OPB_MASK)
-#define PVR_I_LMB(pvr)			(pvr.pvr[2] & PVR2_I_LMB_MASK)
-#define PVR_INTERRUPT_IS_EDGE(pvr) \
-			(pvr.pvr[2] & PVR2_INTERRUPT_IS_EDGE_MASK)
-#define PVR_EDGE_IS_POSITIVE(pvr) \
-			(pvr.pvr[2] & PVR2_EDGE_IS_POSITIVE_MASK)
-#define PVR_USE_MSR_INSTR(pvr)		(pvr.pvr[2] & PVR2_USE_MSR_INSTR)
-#define PVR_USE_PCMP_INSTR(pvr)		(pvr.pvr[2] & PVR2_USE_PCMP_INSTR)
-#define PVR_AREA_OPTIMISED(pvr)		(pvr.pvr[2] & PVR2_AREA_OPTIMISED)
-#define PVR_USE_MUL64(pvr)		(pvr.pvr[2] & PVR2_USE_MUL64_MASK)
-#define PVR_OPCODE_0x0_ILLEGAL(pvr) \
-			(pvr.pvr[2] & PVR2_OPCODE_0x0_ILL_MASK)
-#define PVR_UNALIGNED_EXCEPTION(pvr) \
-			(pvr.pvr[2] & PVR2_UNALIGNED_EXC_MASK)
-#define PVR_ILL_OPCODE_EXCEPTION(pvr) \
-			(pvr.pvr[2] & PVR2_ILL_OPCODE_EXC_MASK)
-#define PVR_IOPB_BUS_EXCEPTION(pvr) \
-			(pvr.pvr[2] & PVR2_IOPB_BUS_EXC_MASK)
-#define PVR_DOPB_BUS_EXCEPTION(pvr) \
-			(pvr.pvr[2] & PVR2_DOPB_BUS_EXC_MASK)
-#define PVR_DIV_ZERO_EXCEPTION(pvr) \
-			(pvr.pvr[2] & PVR2_DIV_ZERO_EXC_MASK)
-#define PVR_FPU_EXCEPTION(pvr)		(pvr.pvr[2] & PVR2_FPU_EXC_MASK)
-#define PVR_FSL_EXCEPTION(pvr)		(pvr.pvr[2] & PVR2_USE_EXTEND_FSL)
+#define PVR_D_OPB(_pvr)		(_pvr.pvr[2] & PVR2_D_OPB_MASK)
+#define PVR_D_LMB(_pvr)		(_pvr.pvr[2] & PVR2_D_LMB_MASK)
+#define PVR_I_OPB(_pvr)		(_pvr.pvr[2] & PVR2_I_OPB_MASK)
+#define PVR_I_LMB(_pvr)		(_pvr.pvr[2] & PVR2_I_LMB_MASK)
+#define PVR_INTERRUPT_IS_EDGE(_pvr) \
+			(_pvr.pvr[2] & PVR2_INTERRUPT_IS_EDGE_MASK)
+#define PVR_EDGE_IS_POSITIVE(_pvr) \
+			(_pvr.pvr[2] & PVR2_EDGE_IS_POSITIVE_MASK)
+#define PVR_USE_MSR_INSTR(_pvr)		(_pvr.pvr[2] & PVR2_USE_MSR_INSTR)
+#define PVR_USE_PCMP_INSTR(_pvr)	(_pvr.pvr[2] & PVR2_USE_PCMP_INSTR)
+#define PVR_AREA_OPTIMISED(_pvr)	(_pvr.pvr[2] & PVR2_AREA_OPTIMISED)
+#define PVR_USE_MUL64(_pvr)		(_pvr.pvr[2] & PVR2_USE_MUL64_MASK)
+#define PVR_OPCODE_0x0_ILLEGAL(_pvr) \
+			(_pvr.pvr[2] & PVR2_OPCODE_0x0_ILL_MASK)
+#define PVR_UNALIGNED_EXCEPTION(_pvr) \
+			(_pvr.pvr[2] & PVR2_UNALIGNED_EXC_MASK)
+#define PVR_ILL_OPCODE_EXCEPTION(_pvr) \
+			(_pvr.pvr[2] & PVR2_ILL_OPCODE_EXC_MASK)
+#define PVR_IOPB_BUS_EXCEPTION(_pvr) \
+			(_pvr.pvr[2] & PVR2_IOPB_BUS_EXC_MASK)
+#define PVR_DOPB_BUS_EXCEPTION(_pvr) \
+			(_pvr.pvr[2] & PVR2_DOPB_BUS_EXC_MASK)
+#define PVR_DIV_ZERO_EXCEPTION(_pvr) \
+			(_pvr.pvr[2] & PVR2_DIV_ZERO_EXC_MASK)
+#define PVR_FPU_EXCEPTION(_pvr)		(_pvr.pvr[2] & PVR2_FPU_EXC_MASK)
+#define PVR_FSL_EXCEPTION(_pvr)		(_pvr.pvr[2] & PVR2_USE_EXTEND_FSL)
 
-#define PVR_DEBUG_ENABLED(pvr)		(pvr.pvr[3] & PVR3_DEBUG_ENABLED_MASK)
-#define PVR_NUMBER_OF_PC_BRK(pvr) \
-			((pvr.pvr[3] & PVR3_NUMBER_OF_PC_BRK_MASK) >> 25)
-#define PVR_NUMBER_OF_RD_ADDR_BRK(pvr) \
-			((pvr.pvr[3] & PVR3_NUMBER_OF_RD_ADDR_BRK_MASK) >> 19)
-#define PVR_NUMBER_OF_WR_ADDR_BRK(pvr) \
-			((pvr.pvr[3] & PVR3_NUMBER_OF_WR_ADDR_BRK_MASK) >> 13)
-#define PVR_FSL_LINKS(pvr)	((pvr.pvr[3] & PVR3_FSL_LINKS_MASK) >> 7)
+#define PVR_DEBUG_ENABLED(_pvr)		(_pvr.pvr[3] & PVR3_DEBUG_ENABLED_MASK)
+#define PVR_NUMBER_OF_PC_BRK(_pvr) \
+			((_pvr.pvr[3] & PVR3_NUMBER_OF_PC_BRK_MASK) >> 25)
+#define PVR_NUMBER_OF_RD_ADDR_BRK(_pvr) \
+			((_pvr.pvr[3] & PVR3_NUMBER_OF_RD_ADDR_BRK_MASK) >> 19)
+#define PVR_NUMBER_OF_WR_ADDR_BRK(_pvr) \
+			((_pvr.pvr[3] & PVR3_NUMBER_OF_WR_ADDR_BRK_MASK) >> 13)
+#define PVR_FSL_LINKS(_pvr)	((_pvr.pvr[3] & PVR3_FSL_LINKS_MASK) >> 7)
 
-#define PVR_ICACHE_ADDR_TAG_BITS(pvr) \
-			((pvr.pvr[4] & PVR4_ICACHE_ADDR_TAG_BITS_MASK) >> 26)
-#define PVR_ICACHE_USE_FSL(pvr)		(pvr.pvr[4] & PVR4_ICACHE_USE_FSL_MASK)
-#define PVR_ICACHE_ALLOW_WR(pvr)	(pvr.pvr[4] & PVR4_ICACHE_ALLOW_WR_MASK)
-#define PVR_ICACHE_LINE_LEN(pvr) \
-			(1 << ((pvr.pvr[4] & PVR4_ICACHE_LINE_LEN_MASK) >> 21))
-#define PVR_ICACHE_BYTE_SIZE(pvr) \
-			(1 << ((pvr.pvr[4] & PVR4_ICACHE_BYTE_SIZE_MASK) >> 16))
+#define PVR_ICACHE_ADDR_TAG_BITS(_pvr) \
+		((_pvr.pvr[4] & PVR4_ICACHE_ADDR_TAG_BITS_MASK) >> 26)
+#define PVR_ICACHE_USE_FSL(_pvr) \
+		(_pvr.pvr[4] & PVR4_ICACHE_USE_FSL_MASK)
+#define PVR_ICACHE_ALLOW_WR(_pvr) \
+		(_pvr.pvr[4] & PVR4_ICACHE_ALLOW_WR_MASK)
+#define PVR_ICACHE_LINE_LEN(_pvr) \
+		(1 << ((_pvr.pvr[4] & PVR4_ICACHE_LINE_LEN_MASK) >> 21))
+#define PVR_ICACHE_BYTE_SIZE(_pvr) \
+		(1 << ((_pvr.pvr[4] & PVR4_ICACHE_BYTE_SIZE_MASK) >> 16))
 
-#define PVR_DCACHE_ADDR_TAG_BITS(pvr) \
-			((pvr.pvr[5] & PVR5_DCACHE_ADDR_TAG_BITS_MASK) >> 26)
-#define PVR_DCACHE_USE_FSL(pvr)		(pvr.pvr[5] & PVR5_DCACHE_USE_FSL_MASK)
-#define PVR_DCACHE_ALLOW_WR(pvr)	(pvr.pvr[5] & PVR5_DCACHE_ALLOW_WR_MASK)
+#define PVR_DCACHE_ADDR_TAG_BITS(_pvr) \
+			((_pvr.pvr[5] & PVR5_DCACHE_ADDR_TAG_BITS_MASK) >> 26)
+#define PVR_DCACHE_USE_FSL(_pvr)	(_pvr.pvr[5] & PVR5_DCACHE_USE_FSL_MASK)
+#define PVR_DCACHE_ALLOW_WR(_pvr) \
+			(_pvr.pvr[5] & PVR5_DCACHE_ALLOW_WR_MASK)
 /* FIXME two shifts on one line needs any comment */
-#define PVR_DCACHE_LINE_LEN(pvr) \
-			(1 << ((pvr.pvr[5] & PVR5_DCACHE_LINE_LEN_MASK) >> 21))
-#define PVR_DCACHE_BYTE_SIZE(pvr) \
-			(1 << ((pvr.pvr[5] & PVR5_DCACHE_BYTE_SIZE_MASK) >> 16))
+#define PVR_DCACHE_LINE_LEN(_pvr) \
+		(1 << ((_pvr.pvr[5] & PVR5_DCACHE_LINE_LEN_MASK) >> 21))
+#define PVR_DCACHE_BYTE_SIZE(_pvr) \
+		(1 << ((_pvr.pvr[5] & PVR5_DCACHE_BYTE_SIZE_MASK) >> 16))
 
-#define PVR_DCACHE_USE_WRITEBACK(pvr) \
-			((pvr.pvr[5] & PVR5_DCACHE_USE_WRITEBACK) >> 14)
+#define PVR_DCACHE_USE_WRITEBACK(_pvr) \
+			((_pvr.pvr[5] & PVR5_DCACHE_USE_WRITEBACK) >> 14)
 
-#define PVR_ICACHE_BASEADDR(pvr)	(pvr.pvr[6] & PVR6_ICACHE_BASEADDR_MASK)
-#define PVR_ICACHE_HIGHADDR(pvr)	(pvr.pvr[7] & PVR7_ICACHE_HIGHADDR_MASK)
+#define PVR_ICACHE_BASEADDR(_pvr) \
+			(_pvr.pvr[6] & PVR6_ICACHE_BASEADDR_MASK)
+#define PVR_ICACHE_HIGHADDR(_pvr) \
+			(_pvr.pvr[7] & PVR7_ICACHE_HIGHADDR_MASK)
+#define PVR_DCACHE_BASEADDR(_pvr) \
+			(_pvr.pvr[8] & PVR8_DCACHE_BASEADDR_MASK)
+#define PVR_DCACHE_HIGHADDR(_pvr) \
+			(_pvr.pvr[9] & PVR9_DCACHE_HIGHADDR_MASK)
 
-#define PVR_DCACHE_BASEADDR(pvr)	(pvr.pvr[8] & PVR8_DCACHE_BASEADDR_MASK)
-#define PVR_DCACHE_HIGHADDR(pvr)	(pvr.pvr[9] & PVR9_DCACHE_HIGHADDR_MASK)
+#define PVR_TARGET_FAMILY(_pvr) \
+			((_pvr.pvr[10] & PVR10_TARGET_FAMILY_MASK) >> 24)
 
-#define PVR_TARGET_FAMILY(pvr)	((pvr.pvr[10] & PVR10_TARGET_FAMILY_MASK) >> 24)
-
-#define PVR_MSR_RESET_VALUE(pvr) \
-				(pvr.pvr[11] & PVR11_MSR_RESET_VALUE_MASK)
+#define PVR_MSR_RESET_VALUE(_pvr) \
+			(_pvr.pvr[11] & PVR11_MSR_RESET_VALUE_MASK)
 
 /* mmu */
-#define PVR_USE_MMU(pvr)	((pvr.pvr[11] & PVR11_USE_MMU) >> 30)
-#define PVR_MMU_ITLB_SIZE(pvr)	(pvr.pvr[11] & PVR11_MMU_ITLB_SIZE)
-#define PVR_MMU_DTLB_SIZE(pvr)	(pvr.pvr[11] & PVR11_MMU_DTLB_SIZE)
-#define PVR_MMU_TLB_ACCESS(pvr)	(pvr.pvr[11] & PVR11_MMU_TLB_ACCESS)
-#define PVR_MMU_ZONES(pvr)	(pvr.pvr[11] & PVR11_MMU_ZONES)
+#define PVR_USE_MMU(_pvr)		((_pvr.pvr[11] & PVR11_USE_MMU) >> 30)
+#define PVR_MMU_ITLB_SIZE(_pvr)		(_pvr.pvr[11] & PVR11_MMU_ITLB_SIZE)
+#define PVR_MMU_DTLB_SIZE(_pvr)		(_pvr.pvr[11] & PVR11_MMU_DTLB_SIZE)
+#define PVR_MMU_TLB_ACCESS(_pvr)	(_pvr.pvr[11] & PVR11_MMU_TLB_ACCESS)
+#define PVR_MMU_ZONES(_pvr)		(_pvr.pvr[11] & PVR11_MMU_ZONES)
 
 /* endian */
-#define PVR_ENDIAN(pvr)	(pvr.pvr[0] & PVR0_ENDI)
+#define PVR_ENDIAN(_pvr)	(_pvr.pvr[0] & PVR0_ENDI)
 
 int cpu_has_pvr(void);
 void get_pvr(struct pvr_s *pvr);
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index 87c79fa..2c309fc 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -32,6 +32,7 @@
 	{"7.30.a", 0x10},
 	{"7.30.b", 0x11},
 	{"8.00.a", 0x12},
+	{"8.00.b", 0x13},
 	{NULL, 0},
 };
 
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index 819238b..41c30cd 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -287,25 +287,44 @@
  * are masked. This is nice, means we don't have to CLI before state save
  */
 C_ENTRY(_user_exception):
-	addi	r14, r14, 4	/* return address is 4 byte after call */
 	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
+	addi	r14, r14, 4	/* return address is 4 byte after call */
 
+	mfs	r1, rmsr
+	nop
+	andi	r1, r1, MSR_UMS
+	bnei	r1, 1f
+
+/* Kernel-mode state save - kernel execve */
+	lwi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/
+	tophys(r1,r1);
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack. */
+	SAVE_REGS
+
+	swi	r1, r1, PTO + PT_MODE; /* pt_regs -> kernel mode */
+	brid	2f;
+	nop;				/* Fill delay slot */
+
+/* User-mode state save.  */
+1:
 	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
 	tophys(r1,r1);
 	lwi	r1, r1, TS_THREAD_INFO;	/* get stack from task_struct */
-	/* MS these three instructions can be added to one */
-	/* addik	r1, r1, THREAD_SIZE; */
-	/* tophys(r1,r1); */
-	/* addik	r1, r1, -STATE_SAVE_SIZE; */
-	addik r1, r1, THREAD_SIZE + CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START - STATE_SAVE_SIZE;
+/* calculate kernel stack pointer from task struct 8k */
+	addik	r1, r1, THREAD_SIZE;
+	tophys(r1,r1);
+
+	addik	r1, r1, -STATE_SAVE_SIZE; /* Make room on the stack.  */
 	SAVE_REGS
 	swi	r0, r1, PTO + PT_R3
 	swi	r0, r1, PTO + PT_R4
 
+	swi	r0, r1, PTO + PT_MODE;			/* Was in user-mode. */
 	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
 	swi	r11, r1, PTO+PT_R1;		/* Store user SP.  */
 	clear_ums;
-	lwi	CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
+2:	lwi	CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
 	/* Save away the syscall number.  */
 	swi	r12, r1, PTO+PT_R0;
 	tovirt(r1,r1)
@@ -375,6 +394,9 @@
 	swi	r3, r1, PTO + PT_R3
 	swi	r4, r1, PTO + PT_R4
 
+	lwi	r11, r1, PTO + PT_MODE;
+/* See if returning to kernel mode, if so, skip resched &c.  */
+	bnei	r11, 2f;
 	/* We're returning to user mode, so check for various conditions that
 	 * trigger rescheduling. */
 	/* FIXME: Restructure all these flag checks. */
@@ -417,6 +439,16 @@
 	RESTORE_REGS;
 	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
 	lwi	r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */
+	bri	6f;
+
+/* Return to kernel state.  */
+2:	set_bip;			/*  Ints masked for state restore */
+	VM_OFF;
+	tophys(r1,r1);
+	RESTORE_REGS;
+	addik	r1, r1, STATE_SAVE_SIZE		/* Clean up stack space.  */
+	tovirt(r1,r1);
+6:
 TRAP_return:		/* Make global symbol for debugging */
 	rtbd	r14, 0;	/* Instructions to return from an IRQ */
 	nop;
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 478f294..a7fa6ae7 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -25,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <asm/current.h>
+#include <asm/cacheflush.h>
 
 #define MICROBLAZE_ILL_OPCODE_EXCEPTION	0x02
 #define MICROBLAZE_IBUS_EXCEPTION	0x03
@@ -52,6 +53,8 @@
 void sw_exception(struct pt_regs *regs)
 {
 	_exception(SIGTRAP, regs, TRAP_BRKPT, regs->r16);
+	flush_dcache_range(regs->r16, regs->r16 + 0x4);
+	flush_icache_range(regs->r16, regs->r16 + 0x4);
 }
 
 void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
index 7811954..25f6e07 100644
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ b/arch/microblaze/kernel/hw_exception_handler.S
@@ -945,11 +945,20 @@
 store4:	sbi	r3, r4, 3;	/* Delay slot */
 ex_shw_vm:
 	/* Store the lower half-word, byte-by-byte into destination address */
+#ifdef __MICROBLAZEEL__
+	lbui	r3, r5, 0;
+store5:	sbi	r3, r4, 0;
+	lbui	r3, r5, 1;
+	brid	ret_from_exc;
+store6:	sbi	r3, r4, 1;	/* Delay slot */
+#else
 	lbui	r3, r5, 2;
 store5:	sbi	r3, r4, 0;
 	lbui	r3, r5, 3;
 	brid	ret_from_exc;
 store6:	sbi	r3, r4, 1;	/* Delay slot */
+#endif
+
 ex_sw_end_vm:			/* Exception handling of store word, ends. */
 
 /* We have to prevent cases that get/put_user macros get unaligned pointer
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index a105301..c881393 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -61,14 +61,12 @@
 	char *p;
 	int *addr;
 
-	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
+	pr_debug("search \"serial\", depth: %d, uname: %s\n", depth, uname);
 
 /* find all serial nodes */
 	if (strncmp(uname, "serial", 6) != 0)
 		return 0;
 
-	early_init_dt_check_for_initrd(node);
-
 /* find compatible node with uartlite */
 	p = of_get_flat_dt_prop(node, "compatible", &l);
 	if ((strncmp(p, "xlnx,xps-uartlite", 17) != 0) &&
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index 96a88c3..3451bde 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -123,20 +123,10 @@
 
 	__init_end_before_initramfs = .;
 
-	.init.ramfs ALIGN(PAGE_SIZE) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-		__initramfs_start = .;
-		*(.init.ramfs)
-		__initramfs_end = .;
-		. = ALIGN(4);
-		LONG(0);
-/*
- * FIXME this can break initramfs for MMU.
- * Pad init.ramfs up to page boundary,
- * so that __init_end == __bss_start. This will make image.elf
- * consistent with the image.bin
- */
-		/* . = ALIGN(PAGE_SIZE); */
+	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+		INIT_RAM_FS
 	}
+
 	__init_end = .;
 
 	.bss ALIGN (PAGE_SIZE) : AT(ADDR(.bss) - LOAD_OFFSET) {
diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c
index 123e361..810fd68 100644
--- a/arch/microblaze/lib/memmove.c
+++ b/arch/microblaze/lib/memmove.c
@@ -182,7 +182,7 @@
 			for (; c >= 4; c -= 4) {
 				value = *--i_src;
 				*--i_dst = buf_hold | ((value & 0xFF000000)>> 24);
-				buf_hold = (value & 0xFFFFFF) << 8;;
+				buf_hold = (value & 0xFFFFFF) << 8;
 			}
 #endif
 			/* Realign the source */
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S
deleted file mode 100644
index ceeaa8c..0000000
--- a/arch/microblaze/lib/muldi3.S
+++ /dev/null
@@ -1,121 +0,0 @@
-#include <linux/linkage.h>
-
-/*
- * Multiply operation for 64 bit integers, for devices with hard multiply
- *	Input :	Operand1[H] in Reg r5
- *		Operand1[L] in Reg r6
- *		Operand2[H] in Reg r7
- *		Operand2[L] in Reg r8
- *	Output: Result[H] in Reg r3
- *		Result[L] in Reg r4
- *
- * Explaination:
- *
- *	Both the input numbers are divided into 16 bit number as follows
- *		op1 = A B C D
- *		op2 = E F G H
- *	result = D * H
- *		 + (C * H + D * G) << 16
- *		 + (B * H + C * G + D * F) << 32
- *		 + (A * H + B * G + C * F + D * E) << 48
- *
- *	Only 64 bits of the output are considered
- */
-
-	.text
-	.globl	__muldi3
-	.type __muldi3, @function
-	.ent __muldi3
-
-__muldi3:
-	addi	r1, r1, -40
-
-/* Save the input operands on the caller's stack */
-	swi	r5, r1, 44
-	swi	r6, r1, 48
-	swi	r7, r1, 52
-	swi	r8, r1, 56
-
-/* Store all the callee saved registers */
-	sw	r20, r1, r0
-	swi	r21, r1, 4
-	swi	r22, r1, 8
-	swi	r23, r1, 12
-	swi	r24, r1, 16
-	swi	r25, r1, 20
-	swi	r26, r1, 24
-	swi	r27, r1, 28
-
-/* Load all the 16 bit values for A thru H */
-	lhui	r20, r1, 44 /* A */
-	lhui	r21, r1, 46 /* B */
-	lhui	r22, r1, 48 /* C */
-	lhui	r23, r1, 50 /* D */
-	lhui	r24, r1, 52 /* E */
-	lhui	r25, r1, 54 /* F */
-	lhui	r26, r1, 56 /* G */
-	lhui	r27, r1, 58 /* H */
-
-/* D * H ==> LSB of the result on stack ==> Store1 */
-	mul	r9, r23, r27
-	swi	r9, r1, 36 /* Pos2 and Pos3 */
-
-/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
-/* Store the carry generated in position 2 for Pos 3 */
-	lhui	r11, r1, 36 /* Pos2 */
-	mul	r9, r22, r27 /* C * H */
-	mul	r10, r23, r26 /* D * G */
-	add	r9, r9, r10
-	addc	r12, r0, r0
-	add	r9, r9, r11
-	addc	r12, r12, r0 /* Store the Carry */
-	shi	r9, r1, 36 /* Store Pos2 */
-	swi	r9, r1, 32
-	lhui	r11, r1, 32
-	shi	r11, r1, 34 /* Store Pos1 */
-
-/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
-	mul	r9, r21, r27 /* B * H */
-	mul	r10, r22, r26 /* C * G */
-	mul	r7, r23, r25 /* D * F */
-	add	r9, r9, r11
-	add	r9, r9, r10
-	add	r9, r9, r7
-	swi	r9, r1, 32 /* Pos0 and Pos1 */
-
-/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
-	lhui	r11, r1, 32 /* Pos0 */
-	mul	r9, r20, r27 /* A * H */
-	mul	r10, r21, r26 /* B * G */
-	mul	r7, r22, r25 /* C * F */
-	mul	r8, r23, r24 /* D * E */
-	add	r9, r9, r11
-	add 	r9, r9, r10
-	add	r9, r9, r7
-	add	r9, r9, r8
-	sext16	r9, r9 /* Sign extend the MSB */
-	shi	r9, r1, 32
-
-/* Move results to r3 and r4 */
-	lhui	r3, r1, 32
-	add	r3, r3, r12
-	shi	r3, r1, 32
-	lwi	r3, r1, 32 /* Hi Part */
-	lwi	r4, r1, 36 /* Lo Part */
-
-/* Restore Callee saved registers */
-	lw	r20, r1, r0
-	lwi	r21, r1, 4
-	lwi	r22, r1, 8
-	lwi	r23, r1, 12
-	lwi	r24, r1, 16
-	lwi	r25, r1, 20
-	lwi	r26, r1, 24
-	lwi	r27, r1, 28
-
-/* Restore Frame and return */
-	rtsd	r15, 8
-	addi	r1, r1, 40
-
-.size __muldi3, . - __muldi3
-.end __muldi3
diff --git a/arch/microblaze/lib/muldi3.c b/arch/microblaze/lib/muldi3.c
new file mode 100644
index 0000000..d4860e1
--- /dev/null
+++ b/arch/microblaze/lib/muldi3.c
@@ -0,0 +1,60 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+#define DWtype long long
+#define UWtype unsigned long
+#define UHWtype unsigned short
+
+#define W_TYPE_SIZE 32
+
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* If we still don't have umul_ppmm, define it using plain C.  */
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+	do {								\
+		UWtype __x0, __x1, __x2, __x3;				\
+		UHWtype __ul, __vl, __uh, __vh;				\
+									\
+		__ul = __ll_lowpart(u);					\
+		__uh = __ll_highpart(u);				\
+		__vl = __ll_lowpart(v);					\
+		__vh = __ll_highpart(v);				\
+									\
+		__x0 = (UWtype) __ul * __vl;				\
+		__x1 = (UWtype) __ul * __vh;				\
+		__x2 = (UWtype) __uh * __vl;				\
+		__x3 = (UWtype) __uh * __vh;				\
+									\
+		__x1 += __ll_highpart(__x0); /* this can't give carry */\
+		__x1 += __x2; /* but this indeed can */			\
+		if (__x1 < __x2) /* did we get it? */			\
+		__x3 += __ll_B; /* yes, add it in the proper pos */	\
+									\
+		(w1) = __x3 + __ll_highpart(__x1);			\
+		(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
+	} while (0)
+#endif
+
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) ({				\
+	DWunion __w;					\
+	umul_ppmm(__w.s.high, __w.s.low, u, v);		\
+	__w.ll;						\
+	})
+#endif
+
+DWtype __muldi3(DWtype u, DWtype v)
+{
+	const DWunion uu = {.ll = u};
+	const DWunion vv = {.ll = v};
+	DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
+
+	w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
+		+ (UWtype) uu.s.high * (UWtype) vv.s.low);
+
+	return w.ll;
+}