Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (151 commits)
  powerpc: Fix usage of 64-bit instruction in 32-bit altivec code
  MAINTAINERS: Add PowerPC patterns
  powerpc/pseries: Track previous CPPR values to correctly EOI interrupts
  powerpc/pseries: Correct pseries/dlpar.c build break without CONFIG_SMP
  powerpc: Make "intspec" pointers in irq_host->xlate() const
  powerpc/8xx: DTLB Miss cleanup
  powerpc/8xx: Remove DIRTY pte handling in DTLB Error.
  powerpc/8xx: Start using dcbX instructions in various copy routines
  powerpc/8xx: Restore _PAGE_WRITETHRU
  powerpc/8xx: Add missing Guarded setting in DTLB Error.
  powerpc/8xx: Fixup DAR from buggy dcbX instructions.
  powerpc/8xx: Tag DAR with 0x00f0 to catch buggy instructions.
  powerpc/8xx: Update TLB asm so it behaves as linux mm expects.
  powerpc/8xx: Invalidate non present TLBs
  powerpc/pseries: Serialize cpu hotplug operations during deactivate Vs deallocate
  pseries/pseries: Add code to online/offline CPUs of a DLPAR node
  powerpc: stop_this_cpu: remove the cpu from the online map.
  powerpc/pseries: Add kernel based CPU DLPAR handling
  sysfs/cpu: Add probe/release files
  powerpc/pseries: Kernel DLPAR Infrastructure
  ...
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 974e29f..2aae06f 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -62,6 +62,21 @@
 		See Documentation/cputopology.txt for more information.
 
 
+What:		/sys/devices/system/cpu/probe
+		/sys/devices/system/cpu/release
+Date:		November 2009
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Dynamic addition and removal of CPU's.  This is not hotplug
+		removal, this is meant complete removal/addition of the CPU
+		from the system.
+
+		probe: writes to this file will dynamically add a CPU to the
+		system.  Information written to the file to add CPU's is
+		architecture specific.
+
+		release: writes to this file dynamically remove a CPU from
+		the system.  Information writtento the file to remove CPU's
+		is architecture specific.
 
 What:		/sys/devices/system/cpu/cpu#/node
 Date:		October 2009
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 9d620c1..4d4a644b 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -49,6 +49,12 @@
 additional_cpus=n (*)	Use this to limit hotpluggable cpus. This option sets
   			cpu_possible_map = cpu_present_map + additional_cpus
 
+cede_offline={"off","on"}  Use this option to disable/enable putting offlined
+		            processors to an extended H_CEDE state on
+			    supported pseries platforms.
+			    If nothing is specified,
+			    cede_offline is set to "on".
+
 (*) Option valid only for following architectures
 - ia64
 
diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt
index e8b5bc2..39e9415 100644
--- a/Documentation/powerpc/dts-bindings/fsl/board.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/board.txt
@@ -20,12 +20,16 @@
 - compatible : should be "fsl,fpga-pixis".
 - reg : should contain the address and the length of the FPPGA register
   set.
+- interrupt-parent: should specify phandle for the interrupt controller.
+- interrupts : should specify event (wakeup) IRQ.
 
 Example (MPC8610HPCD):
 
 	board-control@e8000000 {
 		compatible = "fsl,fpga-pixis";
 		reg = <0xe8000000 32>;
+		interrupt-parent = <&mpic>;
+		interrupts = <8 8>;
 	};
 
 * Freescale BCSR GPIO banks
diff --git a/Documentation/powerpc/dts-bindings/fsl/mpc5200.txt b/Documentation/powerpc/dts-bindings/fsl/mpc5200.txt
index cabc780..5c6602d 100644
--- a/Documentation/powerpc/dts-bindings/fsl/mpc5200.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/mpc5200.txt
@@ -103,7 +103,22 @@
 ---------------------
 On the mpc5200 and 5200b, GPT0 has a watchdog timer function.  If the board
 design supports the internal wdt, then the device node for GPT0 should
-include the empty property 'fsl,has-wdt'.
+include the empty property 'fsl,has-wdt'.  Note that this does not activate
+the watchdog.  The timer will function as a GPT if the timer api is used, and
+it will function as watchdog if the watchdog device is used.  The watchdog
+mode has priority over the gpt mode, i.e. if the watchdog is activated, any
+gpt api call to this timer will fail with -EBUSY.
+
+If you add the property
+	fsl,wdt-on-boot = <n>;
+GPT0 will be marked as in-use watchdog, i.e. blocking every gpt access to it.
+If n>0, the watchdog is started with a timeout of n seconds.  If n=0, the
+configuration of the watchdog is not touched.  This is useful in two cases:
+- just mark GPT0 as watchdog, blocking gpt accesses, and configure it later;
+- do not touch a configuration assigned by the boot loader which supervises
+  the boot process itself.
+
+The watchdog will respect the CONFIG_WATCHDOG_NOWAYOUT option.
 
 An mpc5200-gpt can be used as a single line GPIO controller.  To do so,
 add the following properties to the gpt node:
diff --git a/MAINTAINERS b/MAINTAINERS
index d58fa70..cff133b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3260,6 +3260,7 @@
 M:	Paul Mackerras <paulus@au.ibm.com>
 W:	http://www.ibm.com/linux/ltc/projects/ppc
 S:	Supported
+F:	arch/powerpc/boot/rs6000.h
 
 LINUX FOR POWERPC (32-BIT AND 64-BIT)
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
@@ -3268,18 +3269,24 @@
 L:	linuxppc-dev@ozlabs.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git
 S:	Supported
+F:	Documentation/powerpc/
+F:	arch/powerpc/
 
 LINUX FOR POWER MACINTOSH
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@ozlabs.org
 S:	Maintained
+F:	arch/powerpc/platforms/powermac/
+F:	drivers/macintosh/
 
 LINUX FOR POWERPC EMBEDDED MPC5XXX
 M:	Grant Likely <grant.likely@secretlab.ca>
 L:	linuxppc-dev@ozlabs.org
 T:	git git://git.secretlab.ca/git/linux-2.6.git
 S:	Maintained
+F:	arch/powerpc/platforms/512x/
+F:	arch/powerpc/platforms/52xx/
 
 LINUX FOR POWERPC EMBEDDED PPC4XX
 M:	Josh Boyer <jwboyer@linux.vnet.ibm.com>
@@ -3288,6 +3295,8 @@
 L:	linuxppc-dev@ozlabs.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jwboyer/powerpc-4xx.git
 S:	Maintained
+F:	arch/powerpc/platforms/40x/
+F:	arch/powerpc/platforms/44x/
 
 LINUX FOR POWERPC EMBEDDED XILINX VIRTEX
 M:	Grant Likely <grant.likely@secretlab.ca>
@@ -3295,6 +3304,8 @@
 L:	linuxppc-dev@ozlabs.org
 T:	git git://git.secretlab.ca/git/linux-2.6.git
 S:	Maintained
+F:	arch/powerpc/*/*virtex*
+F:	arch/powerpc/*/*/*virtex*
 
 LINUX FOR POWERPC EMBEDDED PPC8XX
 M:	Vitaly Bordug <vitb@kernel.crashing.org>
@@ -3308,12 +3319,16 @@
 W:	http://www.penguinppc.org/
 L:	linuxppc-dev@ozlabs.org
 S:	Maintained
+F:	arch/powerpc/platforms/83xx/
 
 LINUX FOR POWERPC PA SEMI PWRFICIENT
 M:	Olof Johansson <olof@lixom.net>
 W:	http://www.pasemi.com/
 L:	linuxppc-dev@ozlabs.org
 S:	Supported
+F:	arch/powerpc/platforms/pasemi/
+F:	drivers/*/*pasemi*
+F:	drivers/*/*/*pasemi*
 
 LINUX SECURITY MODULE (LSM) FRAMEWORK
 M:	Chris Wright <chrisw@sous-sol.org>
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2ba14e7..0df5746 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -56,6 +56,16 @@
 	bool
 	default y
 
+config NR_IRQS
+	int "Number of virtual interrupt numbers"
+	range 32 512
+	default "512"
+	help
+	  This defines the number of virtual interrupt numbers the kernel
+	  can manage. Virtual interrupt numbers are what you see in
+	  /proc/interrupts. If you configure your system to have too few,
+	  drivers will fail to load or worse - handle with care.
+
 config STACKTRACE_SUPPORT
 	bool
 	default y
@@ -199,24 +209,14 @@
 config REDBOOT
 	bool
 
-config HIBERNATE_32
-	bool
-	depends on (PPC_PMAC && !SMP) || BROKEN
-	default y
-
-config HIBERNATE_64
-	bool
-	depends on BROKEN || (PPC_PMAC64 && EXPERIMENTAL)
-	default y
-
 config ARCH_HIBERNATION_POSSIBLE
 	bool
-	depends on (PPC64 && HIBERNATE_64) || (PPC32 && HIBERNATE_32)
 	default y
 
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
-	depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx
+	depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
+		   PPC_85xx || PPC_86xx
 
 config PPC_DCR_NATIVE
 	bool
@@ -320,6 +320,10 @@
 
 	  Say N if you are unsure.
 
+config ARCH_CPU_PROBE_RELEASE
+	def_bool y
+	depends on HOTPLUG_CPU
+
 config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 
@@ -378,6 +382,19 @@
 	  CPU.  Generally saying Y is safe, although some problems have been
 	  reported with SMP Power Macintoshes with this option enabled.
 
+config SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	default y
+	help
+	  This enables support for sparse irqs. This is useful for distro
+	  kernels that want to define a high CONFIG_NR_CPUS value but still
+	  want to have low kernel memory footprint on smaller machines.
+
+	  ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
+	    out the irq_desc[] array in a more NUMA-friendly way. )
+
+	  If you don't know what to do here, say Y.
+
 config NUMA
 	bool "NUMA support"
 	depends on PPC64
@@ -652,6 +669,14 @@
 	select PPC_INDIRECT_PCI
 	select PCI_QUIRKS
 
+config FSL_PMC
+	bool
+	default y
+	depends on SUSPEND && (PPC_85xx || PPC_86xx)
+	help
+	  Freescale MPC85xx/MPC86xx power management controller support
+	  (suspend/resume). For MPC83xx see platforms/83xx/suspend.c
+
 config 4xx_SOC
 	bool
 
diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts
index c920170..cd56bb5 100644
--- a/arch/powerpc/boot/dts/canyonlands.dts
+++ b/arch/powerpc/boot/dts/canyonlands.dts
@@ -352,6 +352,7 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
@@ -381,6 +382,7 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
diff --git a/arch/powerpc/boot/dts/eiger.dts b/arch/powerpc/boot/dts/eiger.dts
index c4a934f..48bcf71 100644
--- a/arch/powerpc/boot/dts/eiger.dts
+++ b/arch/powerpc/boot/dts/eiger.dts
@@ -316,6 +316,7 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
@@ -345,6 +346,7 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
@@ -375,6 +377,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII1>;
@@ -403,6 +407,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII1>;
diff --git a/arch/powerpc/boot/dts/gef_ppc9a.dts b/arch/powerpc/boot/dts/gef_ppc9a.dts
index 910944e..c86114e 100644
--- a/arch/powerpc/boot/dts/gef_ppc9a.dts
+++ b/arch/powerpc/boot/dts/gef_ppc9a.dts
@@ -118,6 +118,12 @@
 			};
 		};
 
+		nvram@3,0 {
+			device_type = "nvram";
+			compatible = "simtek,stk14ca8";
+			reg = <0x3 0x0 0x20000>;
+		};
+
 		fpga@4,0 {
 			compatible = "gef,ppc9a-fpga-regs";
 			reg = <0x4 0x0 0x40>;
diff --git a/arch/powerpc/boot/dts/gef_sbc310.dts b/arch/powerpc/boot/dts/gef_sbc310.dts
index 2107d3c..820c2b3 100644
--- a/arch/powerpc/boot/dts/gef_sbc310.dts
+++ b/arch/powerpc/boot/dts/gef_sbc310.dts
@@ -115,6 +115,12 @@
 			};
 		};
 
+		nvram@3,0 {
+			device_type = "nvram";
+			compatible = "simtek,stk14ca8";
+			reg = <0x3 0x0 0x20000>;
+		};
+
 		fpga@4,0 {
 			compatible = "gef,fpga-regs";
 			reg = <0x4 0x0 0x40>;
diff --git a/arch/powerpc/boot/dts/gef_sbc610.dts b/arch/powerpc/boot/dts/gef_sbc610.dts
index 35a6318..30911ad 100644
--- a/arch/powerpc/boot/dts/gef_sbc610.dts
+++ b/arch/powerpc/boot/dts/gef_sbc610.dts
@@ -84,6 +84,12 @@
 			  6 0 0xfd000000 0x00800000     // IO FPGA (8-bit)
 			  7 0 0xfd800000 0x00800000>;   // IO FPGA (32-bit)
 
+		nvram@3,0 {
+			device_type = "nvram";
+			compatible = "simtek,stk14ca8";
+			reg = <0x3 0x0 0x20000>;
+		};
+
 		fpga@4,0 {
 			compatible = "gef,fpga-regs";
 			reg = <0x4 0x0 0x40>;
diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts
index f3787a2..f6f6189 100644
--- a/arch/powerpc/boot/dts/glacier.dts
+++ b/arch/powerpc/boot/dts/glacier.dts
@@ -292,6 +292,7 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
@@ -321,6 +322,7 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
@@ -351,6 +353,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII1>;
@@ -379,6 +383,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>; /* emac2&3 only */
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII1>;
diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts
index 5b2a494..2b25669 100644
--- a/arch/powerpc/boot/dts/haleakala.dts
+++ b/arch/powerpc/boot/dts/haleakala.dts
@@ -226,6 +226,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts
index 077819b..51eb6ed 100644
--- a/arch/powerpc/boot/dts/katmai.dts
+++ b/arch/powerpc/boot/dts/katmai.dts
@@ -16,7 +16,7 @@
 
 / {
 	#address-cells = <2>;
-	#size-cells = <1>;
+	#size-cells = <2>;
 	model = "amcc,katmai";
 	compatible = "amcc,katmai";
 	dcr-parent = <&{/cpus/cpu@0}>;
@@ -49,7 +49,7 @@
 
 	memory {
 		device_type = "memory";
-		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by zImage */
+		reg = <0x0 0x00000000 0x0 0x00000000>; /* Filled in by U-Boot */
 	};
 
 	UIC0: interrupt-controller0 {
@@ -112,7 +112,15 @@
 		compatible = "ibm,plb-440spe", "ibm,plb-440gp", "ibm,plb4";
 		#address-cells = <2>;
 		#size-cells = <1>;
-		ranges;
+		/*        addr-child     addr-parent    size */
+		ranges = <0x4 0xe0000000 0x4 0xe0000000 0x20000000
+			  0xc 0x00000000 0xc 0x00000000 0x20000000
+			  0xd 0x00000000 0xd 0x00000000 0x80000000
+			  0xd 0x80000000 0xd 0x80000000 0x80000000
+			  0xe 0x00000000 0xe 0x00000000 0x80000000
+			  0xe 0x80000000 0xe 0x80000000 0x80000000
+			  0xf 0x00000000 0xf 0x00000000 0x80000000
+			  0xf 0x80000000 0xf 0x80000000 0x80000000>;
 		clock-frequency = <0>; /* Filled in by zImage */
 
 		SDRAM0: sdram {
@@ -245,8 +253,8 @@
 			ranges = <0x02000000 0x00000000 0x80000000 0x0000000d 0x80000000 0x00000000 0x80000000
 				  0x01000000 0x00000000 0x00000000 0x0000000c 0x08000000 0x00000000 0x00010000>;
 
-			/* Inbound 2GB range starting at 0 */
-			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
 
 			/* This drives busses 0 to 0xf */
 			bus-range = <0x0 0xf>;
@@ -289,10 +297,10 @@
 			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x00000000 0x00000000 0x80000000
 				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80000000 0x00000000 0x00010000>;
 
-			/* Inbound 2GB range starting at 0 */
-			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
 
-			/* This drives busses 10 to 0x1f */
+			/* This drives busses 0x10 to 0x1f */
 			bus-range = <0x10 0x1f>;
 
 			/* Legacy interrupts (note the weird polarity, the bridge seems
@@ -330,10 +338,10 @@
 			ranges = <0x02000000 0x00000000 0x80000000 0x0000000e 0x80000000 0x00000000 0x80000000
 				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80010000 0x00000000 0x00010000>;
 
-			/* Inbound 2GB range starting at 0 */
-			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
 
-			/* This drives busses 10 to 0x1f */
+			/* This drives busses 0x20 to 0x2f */
 			bus-range = <0x20 0x2f>;
 
 			/* Legacy interrupts (note the weird polarity, the bridge seems
@@ -371,10 +379,10 @@
 			ranges = <0x02000000 0x00000000 0x80000000 0x0000000f 0x00000000 0x00000000 0x80000000
 				  0x01000000 0x00000000 0x00000000 0x0000000f 0x80020000 0x00000000 0x00010000>;
 
-			/* Inbound 2GB range starting at 0 */
-			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
+			/* Inbound 4GB range starting at 0 */
+			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x1 0x00000000>;
 
-			/* This drives busses 10 to 0x1f */
+			/* This drives busses 0x30 to 0x3f */
 			bus-range = <0x30 0x3f>;
 
 			/* Legacy interrupts (note the weird polarity, the bridge seems
diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts
index c465614..083e68e 100644
--- a/arch/powerpc/boot/dts/kilauea.dts
+++ b/arch/powerpc/boot/dts/kilauea.dts
@@ -272,6 +272,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
@@ -300,6 +302,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts
index 167044f..65b8b4f 100644
--- a/arch/powerpc/boot/dts/kmeter1.dts
+++ b/arch/powerpc/boot/dts/kmeter1.dts
@@ -59,6 +59,13 @@
 		reg = <0xe0000000 0x00000200>;
 		bus-frequency = <0>;	/* Filled in by U-Boot */
 
+		pmc: power@b00 {
+			compatible = "fsl,mpc8360-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
 		i2c@3000 {
 			#address-cells = <1>;
 			#size-cells = <0>;
diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts
index ffc246e..63d48b6 100644
--- a/arch/powerpc/boot/dts/makalu.dts
+++ b/arch/powerpc/boot/dts/makalu.dts
@@ -227,6 +227,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
+				tx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x0000003f>;	/* Start at 6 */
 				rgmii-device = <&RGMII0>;
@@ -255,6 +257,8 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+                                rx-fifo-size-gige = <16384>;
+                                tx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
diff --git a/arch/powerpc/boot/dts/mpc832x_mds.dts b/arch/powerpc/boot/dts/mpc832x_mds.dts
index 436c9c6..05ad8c9 100644
--- a/arch/powerpc/boot/dts/mpc832x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc832x_mds.dts
@@ -79,6 +79,13 @@
 			reg = <0x200 0x100>;
 		};
 
+		pmc: power@b00 {
+			compatible = "fsl,mpc8323-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
 		i2c@3000 {
 			#address-cells = <1>;
 			#size-cells = <0>;
@@ -163,6 +170,7 @@
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0x4c>;
 			fsl,descriptor-types-mask = <0x0122003f>;
+			sleep = <&pmc 0x03000000>;
 		};
 
 		ipic: pic@700 {
@@ -428,5 +436,6 @@
 		       0xe0008300 0x8>;		/* config space access registers */
 		compatible = "fsl,mpc8349-pci";
 		device_type = "pci";
+		sleep = <&pmc 0x00010000>;
 	};
 };
diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts
index 9a0952f..f4fadb23a 100644
--- a/arch/powerpc/boot/dts/mpc832x_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts
@@ -62,6 +62,13 @@
 			reg = <0x200 0x100>;
 		};
 
+		pmc: power@b00 {
+			compatible = "fsl,mpc8323-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
 		i2c@3000 {
 			#address-cells = <1>;
 			#size-cells = <0>;
@@ -141,6 +148,7 @@
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0x4c>;
 			fsl,descriptor-types-mask = <0x0122003f>;
+			sleep = <&pmc 0x03000000>;
 		};
 
 		ipic:pic@700 {
@@ -360,5 +368,6 @@
 		       0xe0008300 0x8>;		/* config space access registers */
 		compatible = "fsl,mpc8349-pci";
 		device_type = "pci";
+		sleep = <&pmc 0x00010000>;
 	};
 };
diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts
index 39ff4c8..45cfa1c5 100644
--- a/arch/powerpc/boot/dts/mpc836x_mds.dts
+++ b/arch/powerpc/boot/dts/mpc836x_mds.dts
@@ -99,6 +99,13 @@
 			reg = <0x200 0x100>;
 		};
 
+		pmc: power@b00 {
+			compatible = "fsl,mpc8360-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
 		i2c@3000 {
 			#address-cells = <1>;
 			#size-cells = <0>;
@@ -194,6 +201,7 @@
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0x7e>;
 			fsl,descriptor-types-mask = <0x01010ebf>;
+			sleep = <&pmc 0x03000000>;
 		};
 
 		ipic: pic@700 {
@@ -470,5 +478,6 @@
 		       0xe0008300 0x8>;		/* config space access registers */
 		compatible = "fsl,mpc8349-pci";
 		device_type = "pci";
+		sleep = <&pmc 0x00010000>;
 	};
 };
diff --git a/arch/powerpc/boot/dts/mpc836x_rdk.dts b/arch/powerpc/boot/dts/mpc836x_rdk.dts
index 6315d6f..bdf4459 100644
--- a/arch/powerpc/boot/dts/mpc836x_rdk.dts
+++ b/arch/powerpc/boot/dts/mpc836x_rdk.dts
@@ -71,6 +71,13 @@
 			reg = <0x200 0x100>;
 		};
 
+		pmc: power@b00 {
+			compatible = "fsl,mpc8360-pmc", "fsl,mpc8349-pmc";
+			reg = <0xb00 0x100 0xa00 0x100>;
+			interrupts = <80 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
 		i2c@3000 {
 			#address-cells = <1>;
 			#size-cells = <0>;
@@ -161,6 +168,7 @@
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0x7e>;
 			fsl,descriptor-types-mask = <0x01010ebf>;
+			sleep = <&pmc 0x03000000>;
 		};
 
 		ipic: interrupt-controller@700 {
@@ -455,6 +463,7 @@
 				 0xa800 0 0 2 &ipic 20 8
 				 0xa800 0 0 3 &ipic 21 8
 				 0xa800 0 0 4 &ipic 18 8>;
+		sleep = <&pmc 0x00010000>;
 		/* filled by u-boot */
 		bus-range = <0 0>;
 		clock-frequency = <0>;
diff --git a/arch/powerpc/boot/dts/mpc8568mds.dts b/arch/powerpc/boot/dts/mpc8568mds.dts
index 00c2bbd..6d892ba 100644
--- a/arch/powerpc/boot/dts/mpc8568mds.dts
+++ b/arch/powerpc/boot/dts/mpc8568mds.dts
@@ -40,6 +40,8 @@
 			i-cache-line-size = <32>;	// 32 bytes
 			d-cache-size = <0x8000>;		// L1, 32K
 			i-cache-size = <0x8000>;		// L1, 32K
+			sleep = <&pmc 0x00008000	// core
+				 &pmc 0x00004000>;	// timebase
 			timebase-frequency = <0>;
 			bus-frequency = <0>;
 			clock-frequency = <0>;
@@ -94,31 +96,41 @@
 			interrupts = <16 2>;
 		};
 
-		i2c@3000 {
+		i2c-sleep-nexus {
 			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x00000004>;
+			ranges;
 
-			rtc@68 {
-				compatible = "dallas,ds1374";
-				reg = <0x68>;
+			i2c@3000 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <0>;
+				compatible = "fsl-i2c";
+				reg = <0x3000 0x100>;
+				interrupts = <43 2>;
+				interrupt-parent = <&mpic>;
+				dfsrr;
+
+				rtc@68 {
+					compatible = "dallas,ds1374";
+					reg = <0x68>;
+					interrupts = <3 1>;
+					interrupt-parent = <&mpic>;
+				};
 			};
-		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
+			i2c@3100 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <1>;
+				compatible = "fsl-i2c";
+				reg = <0x3100 0x100>;
+				interrupts = <43 2>;
+				interrupt-parent = <&mpic>;
+				dfsrr;
+			};
 		};
 
 		dma@21300 {
@@ -128,6 +140,8 @@
 			reg = <0x21300 0x4>;
 			ranges = <0x0 0x21100 0x200>;
 			cell-index = <0>;
+			sleep = <&pmc 0x00000400>;
+
 			dma-channel@0 {
 				compatible = "fsl,mpc8568-dma-channel",
 						"fsl,eloplus-dma-channel";
@@ -176,6 +190,7 @@
 			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy2>;
+			sleep = <&pmc 0x00000080>;
 
 			mdio@520 {
 				#address-cells = <1>;
@@ -228,6 +243,7 @@
 			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi1>;
 			phy-handle = <&phy3>;
+			sleep = <&pmc 0x00000040>;
 
 			mdio@520 {
 				#address-cells = <1>;
@@ -242,30 +258,47 @@
 			};
 		};
 
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
+		duart-sleep-nexus {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x00000002>;
+			ranges;
+
+			serial0: serial@4500 {
+				cell-index = <0>;
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0x4500 0x100>;
+				clock-frequency = <0>;
+				interrupts = <42 2>;
+				interrupt-parent = <&mpic>;
+			};
+
+			serial1: serial@4600 {
+				cell-index = <1>;
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0x4600 0x100>;
+				clock-frequency = <0>;
+				interrupts = <42 2>;
+				interrupt-parent = <&mpic>;
+			};
 		};
 
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,mpc8548-guts";
+		global-utilities@e0000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8568-guts", "fsl,mpc8548-guts";
 			reg = <0xe0000 0x1000>;
+			ranges = <0 0xe0000 0x1000>;
 			fsl,has-rstcr;
-		};
 
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
+			pmc: power@70 {
+				compatible = "fsl,mpc8568-pmc",
+					     "fsl,mpc8548-pmc";
+				reg = <0x70 0x20>;
+			};
 		};
 
 		crypto@30000 {
@@ -277,6 +310,7 @@
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0xfe>;
 			fsl,descriptor-types-mask = <0x12b0ebf>;
+			sleep = <&pmc 0x01000000>;
 		};
 
 		mpic: pic@40000 {
@@ -376,6 +410,7 @@
 		compatible = "fsl,qe";
 		ranges = <0x0 0xe0080000 0x40000>;
 		reg = <0xe0080000 0x480>;
+		sleep = <&pmc 0x00000800>;
 		brg-frequency = <0>;
 		bus-frequency = <396000000>;
 		fsl,qe-num-riscs = <2>;
@@ -509,6 +544,7 @@
 		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0x80000000 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x0 0xe2000000 0x0 0x800000>;
+		sleep = <&pmc 0x80000000>;
 		clock-frequency = <66666666>;
 		#interrupt-cells = <1>;
 		#size-cells = <2>;
@@ -534,6 +570,7 @@
 		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
 			  0x1000000 0x0 0x0 0xe2800000 0x0 0x800000>;
+		sleep = <&pmc 0x20000000>;
 		clock-frequency = <33333333>;
 		#interrupt-cells = <1>;
 		#size-cells = <2>;
@@ -570,5 +607,7 @@
 			      55 2 /* msg2_tx   */
 			      56 2 /* msg2_rx   */>;
 		interrupt-parent = <&mpic>;
+		sleep = <&pmc 0x00080000   /* controller */
+			 &pmc 0x00040000>; /* message unit */
 	};
 };
diff --git a/arch/powerpc/boot/dts/mpc8569mds.dts b/arch/powerpc/boot/dts/mpc8569mds.dts
index 1e3ec8f..795eb36 100644
--- a/arch/powerpc/boot/dts/mpc8569mds.dts
+++ b/arch/powerpc/boot/dts/mpc8569mds.dts
@@ -41,6 +41,8 @@
 			i-cache-line-size = <32>;	// 32 bytes
 			d-cache-size = <0x8000>;		// L1, 32K
 			i-cache-size = <0x8000>;		// L1, 32K
+			sleep = <&pmc 0x00008000	// core
+				 &pmc 0x00004000>;	// timebase
 			timebase-frequency = <0>;
 			bus-frequency = <0>;
 			clock-frequency = <0>;
@@ -59,6 +61,7 @@
 		reg = <0xe0005000 0x1000>;
 		interrupts = <19 2>;
 		interrupt-parent = <&mpic>;
+		sleep = <&pmc 0x08000000>;
 
 		ranges = <0x0 0x0 0xfe000000 0x02000000
 			  0x1 0x0 0xf8000000 0x00008000
@@ -158,51 +161,69 @@
 			interrupts = <18 2>;
 		};
 
-		i2c@3000 {
+		i2c-sleep-nexus {
 			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x00000004>;
+			ranges;
 
-			rtc@68 {
-				compatible = "dallas,ds1374";
-				reg = <0x68>;
+			i2c@3000 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <0>;
+				compatible = "fsl-i2c";
+				reg = <0x3000 0x100>;
+				interrupts = <43 2>;
+				interrupt-parent = <&mpic>;
+				dfsrr;
+
+				rtc@68 {
+					compatible = "dallas,ds1374";
+					reg = <0x68>;
+					interrupts = <3 1>;
+					interrupt-parent = <&mpic>;
+				};
+			};
+
+			i2c@3100 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				cell-index = <1>;
+				compatible = "fsl-i2c";
+				reg = <0x3100 0x100>;
+				interrupts = <43 2>;
+				interrupt-parent = <&mpic>;
+				dfsrr;
 			};
 		};
 
-		i2c@3100 {
+		duart-sleep-nexus {
 			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
+			#size-cells = <1>;
+			compatible = "simple-bus";
+			sleep = <&pmc 0x00000002>;
+			ranges;
 
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
+			serial0: serial@4500 {
+				cell-index = <0>;
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0x4500 0x100>;
+				clock-frequency = <0>;
+				interrupts = <42 2>;
+				interrupt-parent = <&mpic>;
+			};
 
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
+			serial1: serial@4600 {
+				cell-index = <1>;
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0x4600 0x100>;
+				clock-frequency = <0>;
+				interrupts = <42 2>;
+				interrupt-parent = <&mpic>;
+			};
 		};
 
 		L2: l2-cache-controller@20000 {
@@ -260,6 +281,7 @@
 			reg = <0x2e000 0x1000>;
 			interrupts = <72 0x8>;
 			interrupt-parent = <&mpic>;
+			sleep = <&pmc 0x00200000>;
 			/* Filled in by U-Boot */
 			clock-frequency = <0>;
 			status = "disabled";
@@ -276,6 +298,7 @@
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0xbfe>;
 			fsl,descriptor-types-mask = <0x3ab0ebf>;
+			sleep = <&pmc 0x01000000>;
 		};
 
 		mpic: pic@40000 {
@@ -304,9 +327,18 @@
 		};
 
 		global-utilities@e0000 {
-			compatible = "fsl,mpc8569-guts";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,mpc8569-guts", "fsl,mpc8548-guts";
 			reg = <0xe0000 0x1000>;
+			ranges = <0 0xe0000 0x1000>;
 			fsl,has-rstcr;
+
+			pmc: power@70 {
+				compatible = "fsl,mpc8569-pmc",
+					     "fsl,mpc8548-pmc";
+				reg = <0x70 0x20>;
+			};
 		};
 
 		par_io@e0100 {
@@ -422,6 +454,7 @@
 		compatible = "fsl,qe";
 		ranges = <0x0 0xe0080000 0x40000>;
 		reg = <0xe0080000 0x480>;
+		sleep = <&pmc 0x00000800>;
 		brg-frequency = <0>;
 		bus-frequency = <0>;
 		fsl,qe-num-riscs = <4>;
@@ -684,6 +717,7 @@
 		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
 			  0x1000000 0x0 0x00000000 0xe2800000 0x0 0x00800000>;
+		sleep = <&pmc 0x20000000>;
 		clock-frequency = <33333333>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
@@ -714,5 +748,6 @@
 			      55 2 /* msg2_tx   */
 			      56 2 /* msg2_rx   */>;
 		interrupt-parent = <&mpic>;
+		sleep = <&pmc 0x00080000>;
 	};
 };
diff --git a/arch/powerpc/boot/dts/mpc8610_hpcd.dts b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
index f468d21..9535ce6 100644
--- a/arch/powerpc/boot/dts/mpc8610_hpcd.dts
+++ b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
@@ -35,6 +35,8 @@
 			i-cache-line-size = <32>;
 			d-cache-size = <32768>;		// L1
 			i-cache-size = <32768>;		// L1
+			sleep = <&pmc 0x00008000 0	// core
+				 &pmc 0x00004000 0>;	// timebase
 			timebase-frequency = <0>;	// From uboot
 			bus-frequency = <0>;		// From uboot
 			clock-frequency = <0>;		// From uboot
@@ -60,6 +62,7 @@
 			  5 0 0xe8480000 0x00008000
 			  6 0 0xe84c0000 0x00008000
 			  3 0 0xe8000000 0x00000020>;
+		sleep = <&pmc 0x08000000 0>;
 
 		flash@0,0 {
 			compatible = "cfi-flash";
@@ -105,6 +108,8 @@
 			compatible = "fsl,fpga-pixis";
 			reg = <3 0 0x20>;
 			ranges = <0 3 0 0x20>;
+			interrupt-parent = <&mpic>;
+			interrupts = <8 8>;
 
 			sdcsr_pio: gpio-controller@a {
 				#gpio-cells = <2>;
@@ -163,6 +168,7 @@
 			reg = <0x3100 0x100>;
 			interrupts = <43 2>;
 			interrupt-parent = <&mpic>;
+			sleep = <&pmc 0x00000004 0>;
 			dfsrr;
 		};
 
@@ -174,6 +180,7 @@
 			clock-frequency = <0>;
 			interrupts = <42 2>;
 			interrupt-parent = <&mpic>;
+			sleep = <&pmc 0x00000002 0>;
 		};
 
 		serial1: serial@4600 {
@@ -184,6 +191,7 @@
 			clock-frequency = <0>;
 			interrupts = <42 2>;
 			interrupt-parent = <&mpic>;
+			sleep = <&pmc 0x00000008 0>;
 		};
 
 		spi@7000 {
@@ -196,6 +204,7 @@
 			interrupt-parent = <&mpic>;
 			mode = "cpu";
 			gpios = <&sdcsr_pio 7 0>;
+			sleep = <&pmc 0x00000800 0>;
 
 			mmc-slot@0 {
 				compatible = "fsl,mpc8610hpcd-mmc-slot",
@@ -213,6 +222,7 @@
 			reg = <0x2c000 100>;
 			interrupts = <72 2>;
 			interrupt-parent = <&mpic>;
+			sleep = <&pmc 0x04000000 0>;
 		};
 
 		mpic: interrupt-controller@40000 {
@@ -241,9 +251,18 @@
 		};
 
 		global-utilities@e0000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
 			compatible = "fsl,mpc8610-guts";
 			reg = <0xe0000 0x1000>;
+			ranges = <0 0xe0000 0x1000>;
 			fsl,has-rstcr;
+
+			pmc: power@70 {
+				compatible = "fsl,mpc8610-pmc",
+					     "fsl,mpc8641d-pmc";
+				reg = <0x70 0x20>;
+			};
 		};
 
 		wdt@e4000 {
@@ -262,6 +281,7 @@
 			fsl,playback-dma = <&dma00>;
 			fsl,capture-dma = <&dma01>;
 			fsl,fifo-depth = <8>;
+			sleep = <&pmc 0 0x08000000>;
 		};
 
 		ssi@16100 {
@@ -271,6 +291,7 @@
 			interrupt-parent = <&mpic>;
 			interrupts = <63 2>;
 			fsl,fifo-depth = <8>;
+			sleep = <&pmc 0 0x04000000>;
 		};
 
 		dma@21300 {
@@ -280,6 +301,7 @@
 			cell-index = <0>;
 			reg = <0x21300 0x4>; /* DMA general status register */
 			ranges = <0x0 0x21100 0x200>;
+			sleep = <&pmc 0x00000400 0>;
 
 			dma00: dma-channel@0 {
 				compatible = "fsl,mpc8610-dma-channel",
@@ -322,6 +344,7 @@
 			cell-index = <1>;
 			reg = <0xc300 0x4>; /* DMA general status register */
 			ranges = <0x0 0xc100 0x200>;
+			sleep = <&pmc 0x00000200 0>;
 
 			dma-channel@0 {
 				compatible = "fsl,mpc8610-dma-channel",
@@ -369,6 +392,7 @@
 		bus-range = <0 0>;
 		ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x10000000
 			  0x01000000 0x0 0x00000000 0xe1000000 0x0 0x00100000>;
+		sleep = <&pmc 0x80000000 0>;
 		clock-frequency = <33333333>;
 		interrupt-parent = <&mpic>;
 		interrupts = <24 2>;
@@ -398,6 +422,7 @@
 		bus-range = <1 3>;
 		ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
 			  0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>;
+		sleep = <&pmc 0x40000000 0>;
 		clock-frequency = <33333333>;
 		interrupt-parent = <&mpic>;
 		interrupts = <26 2>;
@@ -474,6 +499,7 @@
 				 0x0000 0 0 4 &mpic 7 1>;
 		interrupt-parent = <&mpic>;
 		interrupts = <25 2>;
+		sleep = <&pmc 0x20000000 0>;
 		clock-frequency = <33333333>;
 	};
 };
diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts
new file mode 100644
index 0000000..df52690
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb.dts
@@ -0,0 +1,477 @@
+/*
+ * P1020 RDB Device Tree Source
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	model = "fsl,P1020";
+	compatible = "fsl,P1020RDB";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P1020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xffe05000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+
+		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
+		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
+			  0x1 0x0 0x0 0xffa00000 0x00040000
+			  0x2 0x0 0x0 0xffb00000 0x00020000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x1000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 256KB for Vitesse 7385 Switch firmware */
+				reg = <0x0 0x00040000>;
+				label = "NOR (RO) Vitesse-7385 Firmware";
+				read-only;
+			};
+
+			partition@40000 {
+				/* 256KB for DTB Image */
+				reg = <0x00040000 0x00040000>;
+				label = "NOR (RO) DTB Image";
+				read-only;
+			};
+
+			partition@80000 {
+				/* 3.5 MB for Linux Kernel Image */
+				reg = <0x00080000 0x00380000>;
+				label = "NOR (RO) Linux Kernel Image";
+				read-only;
+			};
+
+			partition@400000 {
+				/* 11MB for JFFS2 based Root file System */
+				reg = <0x00400000 0x00b00000>;
+				label = "NOR (RW) JFFS2 Root File System";
+			};
+
+			partition@f00000 {
+				/* This location must not be altered  */
+				/* 512KB for u-boot Bootloader Image */
+				/* 512KB for u-boot Environment Variables */
+				reg = <0x00f00000 0x00100000>;
+				label = "NOR (RO) U-Boot Image";
+				read-only;
+			};
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p1020-fcm-nand",
+				     "fsl,elbc-fcm-nand";
+			reg = <0x1 0x0 0x40000>;
+
+			partition@0 {
+				/* This location must not be altered  */
+				/* 1MB for u-boot Bootloader Image */
+				reg = <0x0 0x00100000>;
+				label = "NAND (RO) U-Boot Image";
+				read-only;
+			};
+
+			partition@100000 {
+				/* 1MB for DTB Image */
+				reg = <0x00100000 0x00100000>;
+				label = "NAND (RO) DTB Image";
+				read-only;
+			};
+
+			partition@200000 {
+				/* 4MB for Linux Kernel Image */
+				reg = <0x00200000 0x00400000>;
+				label = "NAND (RO) Linux Kernel Image";
+				read-only;
+			};
+
+			partition@600000 {
+				/* 4MB for Compressed Root file System Image */
+				reg = <0x00600000 0x00400000>;
+				label = "NAND (RO) Compressed RFS Image";
+				read-only;
+			};
+
+			partition@a00000 {
+				/* 7MB for JFFS2 based Root file System */
+				reg = <0x00a00000 0x00700000>;
+				label = "NAND (RW) JFFS2 Root File System";
+			};
+
+			partition@1100000 {
+				/* 15MB for JFFS2 based Root file System */
+				reg = <0x01100000 0x00f00000>;
+				label = "NAND (RW) Writable User area";
+			};
+		};
+
+		L2switch@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "vitesse-7385";
+			reg = <0x2 0x0 0x20000>;
+		};
+
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p1020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p1020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <16 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p1020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			mode = "cpu";
+
+			fsl_m25p80@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "fsl,espi-flash";
+				reg = <0>;
+				linux,modalias = "fsl_m25p80";
+				modal = "s25sl128b";
+				spi-max-frequency = <50000000>;
+				mode = <0>;
+
+				partition@0 {
+					/* 512KB for u-boot Bootloader Image */
+					reg = <0x0 0x00080000>;
+					label = "SPI (RO) U-Boot Image";
+					read-only;
+				};
+
+				partition@80000 {
+					/* 512KB for DTB Image */
+					reg = <0x00080000 0x00080000>;
+					label = "SPI (RO) DTB Image";
+					read-only;
+				};
+
+				partition@100000 {
+					/* 4MB for Linux Kernel Image */
+					reg = <0x00100000 0x00400000>;
+					label = "SPI (RO) Linux Kernel Image";
+					read-only;
+				};
+
+				partition@500000 {
+					/* 4MB for Compressed RFS Image */
+					reg = <0x00500000 0x00400000>;
+					label = "SPI (RO) Compressed RFS Image";
+					read-only;
+				};
+
+				partition@900000 {
+					/* 7MB for JFFS2 based RFS */
+					reg = <0x00900000 0x00700000>;
+					label = "SPI (RW) JFFS2 RFS";
+				};
+			};
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p1020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x40000>; // L2,256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+			phy_type = "ulpi";
+		};
+
+		usb@23000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <46 0x2>;
+			phy_type = "ulpi";
+		};
+
+		sdhci@2e000 {
+			compatible = "fsl,p1020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		msi@41600 {
+			compatible = "fsl,p1020-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,p1020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc30000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
new file mode 100644
index 0000000..0fe93d0
--- /dev/null
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
@@ -0,0 +1,363 @@
+/*
+ * P2020 RDB  Core0 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts file allows core0 to have memory, l2, i2c, spi, gpio, dma1, usb,
+ * eth1, eth2, sdhc, crypto, global-util, pci0.
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	model = "fsl,P2020";
+	compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P2020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p2020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p2020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p2020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			mode = "cpu";
+
+			fsl_m25p80@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "fsl,espi-flash";
+				reg = <0>;
+				linux,modalias = "fsl_m25p80";
+				modal = "s25sl128b";
+				spi-max-frequency = <50000000>;
+				mode = <0>;
+
+				partition@0 {
+					/* 512KB for u-boot Bootloader Image */
+					reg = <0x0 0x00080000>;
+					label = "SPI (RO) U-Boot Image";
+					read-only;
+				};
+
+				partition@80000 {
+					/* 512KB for DTB Image */
+					reg = <0x00080000 0x00080000>;
+					label = "SPI (RO) DTB Image";
+					read-only;
+				};
+
+				partition@100000 {
+					/* 4MB for Linux Kernel Image */
+					reg = <0x00100000 0x00400000>;
+					label = "SPI (RO) Linux Kernel Image";
+					read-only;
+				};
+
+				partition@500000 {
+					/* 4MB for Compressed RFS Image */
+					reg = <0x00500000 0x00400000>;
+					label = "SPI (RO) Compressed RFS Image";
+					read-only;
+				};
+
+				partition@900000 {
+					/* 7MB for JFFS2 based RFS */
+					reg = <0x00900000 0x00700000>;
+					label = "SPI (RW) JFFS2 RFS";
+				};
+			};
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p2020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>; // L2,512K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+			phy_type = "ulpi";
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-mdio";
+			reg = <0x24520 0x20>;
+
+			phy0: ethernet-phy@0 {
+				interrupt-parent = <&mpic>;
+				interrupts = <3 1>;
+				reg = <0x0>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupt-parent = <&mpic>;
+				interrupts = <3 1>;
+				reg = <0x1>;
+			};
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+			tbi-handle = <&tbi0>;
+			phy-handle = <&phy0>;
+			phy-connection-type = "sgmii";
+
+		};
+
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		sdhci@2e000 {
+			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+			protected-sources = <
+			42 76 77 78 79 /* serial1 , dma2 */
+			29 30 34 26 /* enet0, pci1 */
+			0xe0 0xe1 0xe2 0xe3 /* msi */
+			0xe4 0xe5 0xe6 0xe7
+			>;
+		};
+
+		global-utilities@e0000 {
+			compatible = "fsl,p2020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc30000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
new file mode 100644
index 0000000..e95a512
--- /dev/null
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
@@ -0,0 +1,184 @@
+/*
+ * P2020 RDB Core1 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts allows core1 to have l2, dma2, eth0, pci1, msi.
+ *
+ * Please note to add "-b 1" for core1's dts compiling.
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	model = "fsl,P2020";
+	compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		ethernet0 = &enet0;
+		serial0 = &serial0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P2020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p2020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		serial0: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p2020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>; // L2,512K
+			interrupt-parent = <&mpic>;
+		};
+
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+			fixed-link = <1 1 1000 0 0>;
+			phy-connection-type = "rgmii-id";
+
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+			protected-sources = <
+			17 18 43 42 59 47 /*ecm, mem, i2c, serial0, spi,gpio */
+			16 20 21 22 23 28 	/* L2, dma1, USB */
+			03 35 36 40 31 32 33 	/* mdio, enet1, enet2 */
+			72 45 58 25 		/* sdhci, crypto , pci */
+			>;
+		};
+
+		msi@41600 {
+			compatible = "fsl,p2020-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xc0000000
+				  0x2000000 0x0 0xc0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
new file mode 100644
index 0000000..6b29eab
--- /dev/null
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -0,0 +1,554 @@
+/*
+ * P4080DS Device Tree Source
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute	it and/or modify it
+ * under  the terms of	the GNU General	 Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+
+/ {
+	model = "fsl,P4080DS";
+	compatible = "fsl,P4080DS";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		ccsr = &soc;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		sdhc = &sdhc;
+
+		rio0 = &rapidio0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,4080@0 {
+			device_type = "cpu";
+			reg = <0>;
+			next-level-cache = <&L2_0>;
+			L2_0: l2-cache {
+			};
+		};
+		cpu1: PowerPC,4080@1 {
+			device_type = "cpu";
+			reg = <1>;
+			next-level-cache = <&L2_1>;
+			L2_1: l2-cache {
+			};
+		};
+		cpu2: PowerPC,4080@2 {
+			device_type = "cpu";
+			reg = <2>;
+			next-level-cache = <&L2_2>;
+			L2_2: l2-cache {
+			};
+		};
+		cpu3: PowerPC,4080@3 {
+			device_type = "cpu";
+			reg = <3>;
+			next-level-cache = <&L2_3>;
+			L2_3: l2-cache {
+			};
+		};
+		cpu4: PowerPC,4080@4 {
+			device_type = "cpu";
+			reg = <4>;
+			next-level-cache = <&L2_4>;
+			L2_4: l2-cache {
+			};
+		};
+		cpu5: PowerPC,4080@5 {
+			device_type = "cpu";
+			reg = <5>;
+			next-level-cache = <&L2_5>;
+			L2_5: l2-cache {
+			};
+		};
+		cpu6: PowerPC,4080@6 {
+			device_type = "cpu";
+			reg = <6>;
+			next-level-cache = <&L2_6>;
+			L2_6: l2-cache {
+			};
+		};
+		cpu7: PowerPC,4080@7 {
+			device_type = "cpu";
+			reg = <7>;
+			next-level-cache = <&L2_7>;
+			L2_7: l2-cache {
+			};
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	soc: soc@ffe000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "simple-bus";
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		corenet-law@0 {
+			compatible = "fsl,corenet-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <32>;
+		};
+
+		memory-controller@8000 {
+			compatible = "fsl,p4080-memory-controller";
+			reg = <0x8000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <0x12 2>;
+		};
+
+		memory-controller@9000 {
+			compatible = "fsl,p4080-memory-controller";
+			reg = <0x9000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <0x12 2>;
+		};
+
+		corenet-cf@18000 {
+			compatible = "fsl,corenet-cf";
+			reg = <0x18000 0x1000>;
+			fsl,ccf-num-csdids = <32>;
+			fsl,ccf-num-snoopids = <32>;
+		};
+
+		iommu@20000 {
+			compatible = "fsl,p4080-pamu";
+			reg = <0x20000 0x10000>;
+			interrupts = <24 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		dma0: dma@100300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p4080-dma", "fsl,eloplus-dma";
+			reg = <0x100300 0x4>;
+			ranges = <0x0 0x100100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,p4080-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <28 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,p4080-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <29 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,p4080-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <30 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,p4080-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <31 2>;
+			};
+		};
+
+		dma1: dma@101300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,p4080-dma", "fsl,eloplus-dma";
+			reg = <0x101300 0x4>;
+			ranges = <0x0 0x101100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,p4080-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <32 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,p4080-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <33 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,p4080-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <34 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,p4080-dma-channel",
+						"fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <35 2>;
+			};
+		};
+
+		spi@110000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x110000 0x1000>;
+			interrupts = <53 0x2>;
+			interrupt-parent = <&mpic>;
+			espi,num-ss-bits = <4>;
+			mode = "cpu";
+
+			fsl_m25p80@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "fsl,espi-flash";
+				reg = <0>;
+				linux,modalias = "fsl_m25p80";
+				spi-max-frequency = <40000000>; /* input clock */
+				partition@u-boot {
+					label = "u-boot";
+					reg = <0x00000000 0x00100000>;
+					read-only;
+				};
+				partition@kernel {
+					label = "kernel";
+					reg = <0x00100000 0x00500000>;
+					read-only;
+				};
+				partition@dtb {
+					label = "dtb";
+					reg = <0x00600000 0x00100000>;
+					read-only;
+				};
+				partition@fs {
+					label = "file system";
+					reg = <0x00700000 0x00900000>;
+				};
+			};
+		};
+
+		sdhc: sdhc@114000 {
+			compatible = "fsl,p4080-esdhc", "fsl,esdhc";
+			reg = <0x114000 0x1000>;
+			interrupts = <48 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		i2c@118000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x118000 0x100>;
+			interrupts = <38 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@118100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x118100 0x100>;
+			interrupts = <38 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+			eeprom@51 {
+				compatible = "at24,24c256";
+				reg = <0x51>;
+			};
+			eeprom@52 {
+				compatible = "at24,24c256";
+				reg = <0x52>;
+			};
+			rtc@68 {
+				compatible = "dallas,ds3232";
+				reg = <0x68>;
+				interrupts = <0 0x1>;
+				interrupt-parent = <&mpic>;
+			};
+		};
+
+		i2c@119000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <2>;
+			compatible = "fsl-i2c";
+			reg = <0x119000 0x100>;
+			interrupts = <39 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@119100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <3>;
+			compatible = "fsl-i2c";
+			reg = <0x119100 0x100>;
+			interrupts = <39 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@11c500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x11c500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <36 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@11c600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x11c600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <36 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial2: serial@11d500 {
+			cell-index = <2>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x11d500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <37 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial3: serial@11d600 {
+			cell-index = <3>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x11d600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <37 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		gpio0: gpio@130000 {
+			compatible = "fsl,p4080-gpio";
+			reg = <0x130000 0x1000>;
+			interrupts = <55 2>;
+			interrupt-parent = <&mpic>;
+			#gpio-cells = <2>;
+			gpio-controller;
+		};
+
+		usb0: usb@210000 {
+			compatible = "fsl,p4080-usb2-mph",
+					"fsl,mpc85xx-usb2-mph", "fsl-usb2-mph";
+			reg = <0x210000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&mpic>;
+			interrupts = <44 0x2>;
+			phy_type = "ulpi";
+		};
+
+		usb1: usb@211000 {
+			compatible = "fsl,p4080-usb2-dr",
+					"fsl,mpc85xx-usb2-dr", "fsl-usb2-dr";
+			reg = <0x211000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&mpic>;
+			interrupts = <45 0x2>;
+			dr_mode = "host";
+			phy_type = "ulpi";
+		};
+	};
+
+	rapidio0: rapidio@ffe0c0000 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		compatible = "fsl,rapidio-delta";
+		reg = <0xf 0xfe0c0000 0 0x20000>;
+		ranges = <0 0 0xf 0xf5000000 0 0x01000000>;
+		interrupt-parent = <&mpic>;
+		/* err_irq bell_outb_irq bell_inb_irq
+			msg1_tx_irq msg1_rx_irq	msg2_tx_irq msg2_rx_irq */
+		interrupts = <16 2 56 2 57 2 60 2 61 2 62 2 63 2>;
+	};
+
+	localbus@ffe124000 {
+		compatible = "fsl,p4080-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xf 0xfe124000 0 0x1000>;
+		interrupts = <25 2>;
+		#address-cells = <2>;
+		#size-cells = <1>;
+
+		ranges = <0 0 0xf 0xe8000000 0x08000000>;
+
+		flash@0,0 {
+			compatible = "cfi-flash";
+			reg = <0 0 0x08000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		compatible = "fsl,p4080-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xf 0xfe200000 0 0x1000>;
+		bus-range = <0x0 0xff>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		clock-frequency = <0x1fca055>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1
+			0000 0 0 2 &mpic 1 1
+			0000 0 0 3 &mpic 2 1
+			0000 0 0 4 &mpic 3 1
+			>;
+		pcie@0 {
+			reg = <0 0 0 0 0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		compatible = "fsl,p4080-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xf 0xfe201000 0 0x1000>;
+		bus-range = <0 0xff>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		clock-frequency = <0x1fca055>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1
+			0000 0 0 2 &mpic 5 1
+			0000 0 0 3 &mpic 6 1
+			0000 0 0 4 &mpic 7 1
+			>;
+		pcie@0 {
+			reg = <0 0 0 0 0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe202000 {
+		compatible = "fsl,p4080-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0xf 0xfe202000 0 0x1000>;
+		bus-range = <0x0 0xff>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		clock-frequency = <0x1fca055>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1
+			0000 0 0 2 &mpic 9 1
+			0000 0 0 3 &mpic 10 1
+			0000 0 0 4 &mpic 11 1
+			>;
+		pcie@0 {
+			reg = <0 0 0 0 0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+};
diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts
index ad402c4..d2af32e 100644
--- a/arch/powerpc/boot/dts/redwood.dts
+++ b/arch/powerpc/boot/dts/redwood.dts
@@ -226,6 +226,7 @@
 				max-frame-size = <9000>;
 				rx-fifo-size = <4096>;
 				tx-fifo-size = <2048>;
+				rx-fifo-size-gige = <16384>;
 				phy-mode = "rgmii";
 				phy-map = <0x00000000>;
 				rgmii-device = <&RGMII0>;
diff --git a/arch/powerpc/boot/dts/yosemite.dts b/arch/powerpc/boot/dts/yosemite.dts
index 1fa3cb4..6492324 100644
--- a/arch/powerpc/boot/dts/yosemite.dts
+++ b/arch/powerpc/boot/dts/yosemite.dts
@@ -282,20 +282,10 @@
 			/* Inbound 2GB range starting at 0 */
 			dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x0 0x80000000>;
 
-			/* Bamboo has all 4 IRQ pins tied together per slot */
 			interrupt-map-mask = <0xf800 0x0 0x0 0x0>;
 			interrupt-map = <
-				/* IDSEL 1 */
-				0x800 0x0 0x0 0x0 &UIC0 0x1c 0x8
-
-				/* IDSEL 2 */
-				0x1000 0x0 0x0 0x0 &UIC0 0x1b 0x8
-
-				/* IDSEL 3 */
-				0x1800 0x0 0x0 0x0 &UIC0 0x1a 0x8
-
-				/* IDSEL 4 */
-				0x2000 0x0 0x0 0x0 &UIC0 0x19 0x8
+				/* IDSEL 12 */
+				0x6000 0x0 0x0 0x0 &UIC0 0x19 0x8
 			>;
 		};
 	};
diff --git a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig
index 2898073..6cd2cd6 100644
--- a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig
+++ b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig
@@ -218,7 +218,7 @@
 # CONFIG_MPIC_WEIRD is not set
 # CONFIG_PPC_I8259 is not set
 # CONFIG_PPC_RTAS is not set
-# CONFIG_MMIO_NVRAM is not set
+CONFIG_MMIO_NVRAM=y
 # CONFIG_PPC_MPC106 is not set
 # CONFIG_PPC_970_NAP is not set
 # CONFIG_PPC_INDIRECT_IO is not set
diff --git a/arch/powerpc/configs/86xx/gef_sbc310_defconfig b/arch/powerpc/configs/86xx/gef_sbc310_defconfig
index e199d1c..a6a3768 100644
--- a/arch/powerpc/configs/86xx/gef_sbc310_defconfig
+++ b/arch/powerpc/configs/86xx/gef_sbc310_defconfig
@@ -218,7 +218,7 @@
 # CONFIG_MPIC_WEIRD is not set
 # CONFIG_PPC_I8259 is not set
 # CONFIG_PPC_RTAS is not set
-# CONFIG_MMIO_NVRAM is not set
+CONFIG_MMIO_NVRAM=y
 # CONFIG_PPC_MPC106 is not set
 # CONFIG_PPC_970_NAP is not set
 # CONFIG_PPC_INDIRECT_IO is not set
diff --git a/arch/powerpc/configs/86xx/gef_sbc610_defconfig b/arch/powerpc/configs/86xx/gef_sbc610_defconfig
index 3b0fbfb..1975d41 100644
--- a/arch/powerpc/configs/86xx/gef_sbc610_defconfig
+++ b/arch/powerpc/configs/86xx/gef_sbc610_defconfig
@@ -219,7 +219,7 @@
 # CONFIG_MPIC_WEIRD is not set
 # CONFIG_PPC_I8259 is not set
 # CONFIG_PPC_RTAS is not set
-# CONFIG_MMIO_NVRAM is not set
+CONFIG_MMIO_NVRAM=y
 # CONFIG_PPC_MPC106 is not set
 # CONFIG_PPC_970_NAP is not set
 # CONFIG_PPC_INDIRECT_IO is not set
@@ -1124,7 +1124,7 @@
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
 # CONFIG_HW_RANDOM_TIMERIOMEM is not set
-# CONFIG_NVRAM is not set
+CONFIG_NVRAM=y
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
 # CONFIG_RAW_DRIVER is not set
diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
index 24d79e3..0835eb9 100644
--- a/arch/powerpc/include/asm/cpm.h
+++ b/arch/powerpc/include/asm/cpm.h
@@ -3,8 +3,47 @@
 
 #include <linux/compiler.h>
 #include <linux/types.h>
+#include <linux/errno.h>
 #include <linux/of.h>
 
+/*
+ * USB Controller pram common to QE and CPM.
+ */
+struct usb_ctlr {
+	u8	usb_usmod;
+	u8	usb_usadr;
+	u8	usb_uscom;
+	u8	res1[1];
+	__be16	usb_usep[4];
+	u8	res2[4];
+	__be16	usb_usber;
+	u8	res3[2];
+	__be16	usb_usbmr;
+	u8	res4[1];
+	u8	usb_usbs;
+	/* Fields down below are QE-only */
+	__be16	usb_ussft;
+	u8	res5[2];
+	__be16	usb_usfrn;
+	u8	res6[0x22];
+} __attribute__ ((packed));
+
+/*
+ * Function code bits, usually generic to devices.
+ */
+#ifdef CONFIG_CPM1
+#define CPMFCR_GBL	((u_char)0x00)	/* Flag doesn't exist in CPM1 */
+#define CPMFCR_TC2	((u_char)0x00)	/* Flag doesn't exist in CPM1 */
+#define CPMFCR_DTB	((u_char)0x00)	/* Flag doesn't exist in CPM1 */
+#define CPMFCR_BDB	((u_char)0x00)	/* Flag doesn't exist in CPM1 */
+#else
+#define CPMFCR_GBL	((u_char)0x20)	/* Set memory snooping */
+#define CPMFCR_TC2	((u_char)0x04)	/* Transfer code 2 value */
+#define CPMFCR_DTB	((u_char)0x02)	/* Use local bus for data when set */
+#define CPMFCR_BDB	((u_char)0x01)	/* Use local bus for BD when set */
+#endif
+#define CPMFCR_EB	((u_char)0x10)	/* Set big endian byte order */
+
 /* Opcodes common to CPM1 and CPM2
 */
 #define CPM_CR_INIT_TRX		((ushort)0x0000)
@@ -93,13 +132,56 @@
 #define BD_I2C_START		(0x0400)
 
 int cpm_muram_init(void);
+
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
 unsigned long cpm_muram_alloc(unsigned long size, unsigned long align);
 int cpm_muram_free(unsigned long offset);
 unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
 void __iomem *cpm_muram_addr(unsigned long offset);
 unsigned long cpm_muram_offset(void __iomem *addr);
 dma_addr_t cpm_muram_dma(void __iomem *addr);
+#else
+static inline unsigned long cpm_muram_alloc(unsigned long size,
+					    unsigned long align)
+{
+	return -ENOSYS;
+}
+
+static inline int cpm_muram_free(unsigned long offset)
+{
+	return -ENOSYS;
+}
+
+static inline unsigned long cpm_muram_alloc_fixed(unsigned long offset,
+						  unsigned long size)
+{
+	return -ENOSYS;
+}
+
+static inline void __iomem *cpm_muram_addr(unsigned long offset)
+{
+	return NULL;
+}
+
+static inline unsigned long cpm_muram_offset(void __iomem *addr)
+{
+	return -ENOSYS;
+}
+
+static inline dma_addr_t cpm_muram_dma(void __iomem *addr)
+{
+	return 0;
+}
+#endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */
+
+#ifdef CONFIG_CPM
 int cpm_command(u32 command, u8 opcode);
+#else
+static inline int cpm_command(u32 command, u8 opcode)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_CPM */
 
 int cpm2_gpiochip_add32(struct device_node *np);
 
diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h
index 7685ffd..81b0119 100644
--- a/arch/powerpc/include/asm/cpm1.h
+++ b/arch/powerpc/include/asm/cpm1.h
@@ -478,51 +478,6 @@
 	char	res2[2];	/* Reserved */
 } iic_t;
 
-/* SPI parameter RAM.
-*/
-typedef struct spi {
-	ushort	spi_rbase;	/* Rx Buffer descriptor base address */
-	ushort	spi_tbase;	/* Tx Buffer descriptor base address */
-	u_char	spi_rfcr;	/* Rx function code */
-	u_char	spi_tfcr;	/* Tx function code */
-	ushort	spi_mrblr;	/* Max receive buffer length */
-	uint	spi_rstate;	/* Internal */
-	uint	spi_rdp;	/* Internal */
-	ushort	spi_rbptr;	/* Internal */
-	ushort	spi_rbc;	/* Internal */
-	uint	spi_rxtmp;	/* Internal */
-	uint	spi_tstate;	/* Internal */
-	uint	spi_tdp;	/* Internal */
-	ushort	spi_tbptr;	/* Internal */
-	ushort	spi_tbc;	/* Internal */
-	uint	spi_txtmp;	/* Internal */
-	uint	spi_res;
-	ushort	spi_rpbase;	/* Relocation pointer */
-	ushort	spi_res2;
-} spi_t;
-
-/* SPI Mode register.
-*/
-#define SPMODE_LOOP	((ushort)0x4000)	/* Loopback */
-#define SPMODE_CI	((ushort)0x2000)	/* Clock Invert */
-#define SPMODE_CP	((ushort)0x1000)	/* Clock Phase */
-#define SPMODE_DIV16	((ushort)0x0800)	/* BRG/16 mode */
-#define SPMODE_REV	((ushort)0x0400)	/* Reversed Data */
-#define SPMODE_MSTR	((ushort)0x0200)	/* SPI Master */
-#define SPMODE_EN	((ushort)0x0100)	/* Enable */
-#define SPMODE_LENMSK	((ushort)0x00f0)	/* character length */
-#define SPMODE_LEN4	((ushort)0x0030)	/*  4 bits per char */
-#define SPMODE_LEN8	((ushort)0x0070)	/*  8 bits per char */
-#define SPMODE_LEN16	((ushort)0x00f0)	/* 16 bits per char */
-#define SPMODE_PMMSK	((ushort)0x000f)	/* prescale modulus */
-
-/* SPIE fields */
-#define SPIE_MME	0x20
-#define SPIE_TXE	0x10
-#define SPIE_BSY	0x04
-#define SPIE_TXB	0x02
-#define SPIE_RXB	0x01
-
 /*
  * RISC Controller Configuration Register definitons
  */
diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
index 990ff19..f42e9ba 100644
--- a/arch/powerpc/include/asm/cpm2.h
+++ b/arch/powerpc/include/asm/cpm2.h
@@ -124,14 +124,6 @@
 	__cpm2_setbrg(brg, rate, CPM2_BRG_INT_CLK, div16, CPM_BRG_EXTC_INT);
 }
 
-/* Function code bits, usually generic to devices.
-*/
-#define CPMFCR_GBL	((u_char)0x20)	/* Set memory snooping */
-#define CPMFCR_EB	((u_char)0x10)	/* Set big endian byte order */
-#define CPMFCR_TC2	((u_char)0x04)	/* Transfer code 2 value */
-#define CPMFCR_DTB	((u_char)0x02)	/* Use local bus for data when set */
-#define CPMFCR_BDB	((u_char)0x01)	/* Use local bus for BD when set */
-
 /* Parameter RAM offsets from the base.
 */
 #define PROFF_SCC1		((uint)0x8000)
@@ -654,45 +646,6 @@
 	uint	iic_txtmp;	/* Internal */
 } iic_t;
 
-/* SPI parameter RAM.
-*/
-typedef struct spi {
-	ushort	spi_rbase;	/* Rx Buffer descriptor base address */
-	ushort	spi_tbase;	/* Tx Buffer descriptor base address */
-	u_char	spi_rfcr;	/* Rx function code */
-	u_char	spi_tfcr;	/* Tx function code */
-	ushort	spi_mrblr;	/* Max receive buffer length */
-	uint	spi_rstate;	/* Internal */
-	uint	spi_rdp;	/* Internal */
-	ushort	spi_rbptr;	/* Internal */
-	ushort	spi_rbc;	/* Internal */
-	uint	spi_rxtmp;	/* Internal */
-	uint	spi_tstate;	/* Internal */
-	uint	spi_tdp;	/* Internal */
-	ushort	spi_tbptr;	/* Internal */
-	ushort	spi_tbc;	/* Internal */
-	uint	spi_txtmp;	/* Internal */
-	uint	spi_res;	/* Tx temp. */
-	uint	spi_res1[4];	/* SDMA temp. */
-} spi_t;
-
-/* SPI Mode register.
-*/
-#define SPMODE_LOOP	((ushort)0x4000)	/* Loopback */
-#define SPMODE_CI	((ushort)0x2000)	/* Clock Invert */
-#define SPMODE_CP	((ushort)0x1000)	/* Clock Phase */
-#define SPMODE_DIV16	((ushort)0x0800)	/* BRG/16 mode */
-#define SPMODE_REV	((ushort)0x0400)	/* Reversed Data */
-#define SPMODE_MSTR	((ushort)0x0200)	/* SPI Master */
-#define SPMODE_EN	((ushort)0x0100)	/* Enable */
-#define SPMODE_LENMSK	((ushort)0x00f0)	/* character length */
-#define SPMODE_PMMSK	((ushort)0x000f)	/* prescale modulus */
-
-#define SPMODE_LEN(x)	((((x)-1)&0xF)<<4)
-#define SPMODE_PM(x)	((x) &0xF)
-
-#define SPI_EB		((u_char)0x10)		/* big endian byte order */
-
 /* IDMA parameter RAM
 */
 typedef struct idma {
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index a98653b..57c4000 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -147,6 +147,7 @@
 	.globl label##_pSeries;				\
 label##_pSeries:					\
 	HMT_MEDIUM;					\
+	DO_KVM	n;					\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
 
@@ -170,6 +171,7 @@
 	.globl label##_pSeries;						\
 label##_pSeries:							\
 	HMT_MEDIUM;							\
+	DO_KVM	n;							\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
 	mfspr	r13,SPRN_SPRG_PACA;	/* get paca address into r13 */	\
 	std	r9,PACA_EXGEN+EX_R9(r13);	/* save r9, r10 */	\
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index b1dafb6..5856a66 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -3,6 +3,10 @@
 
 #include <asm/page.h>
 
+pte_t *huge_pte_offset_and_shift(struct mm_struct *mm,
+				 unsigned long addr, unsigned *shift);
+
+void flush_dcache_icache_hugepage(struct page *page);
 
 int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 			   unsigned long len);
@@ -11,12 +15,6 @@
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 
-void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-		     pte_t *ptep, pte_t pte);
-
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep);
-
 /*
  * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
  * to override the version in mm/hugetlb.c
@@ -42,9 +40,26 @@
 {
 }
 
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep)
+{
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
+	return __pte(old);
+}
+
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 {
+	pte_t pte;
+	pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+	flush_tlb_page(vma, addr);
 }
 
 static inline int huge_pte_none(pte_t pte)
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index c27caac..f027581 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -212,6 +212,19 @@
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING		0x1D8
 #define H_ILLAN_ATTRIBUTES	0x244
+#define H_MODIFY_HEA_QP		0x250
+#define H_QUERY_HEA_QP		0x254
+#define H_QUERY_HEA		0x258
+#define H_QUERY_HEA_PORT	0x25C
+#define H_MODIFY_HEA_PORT	0x260
+#define H_REG_BCMC		0x264
+#define H_DEREG_BCMC		0x268
+#define H_REGISTER_HEA_RPAGES	0x26C
+#define H_DISABLE_AND_GET_HEA	0x270
+#define H_GET_HEA_INFO		0x274
+#define H_ALLOC_HEA_RESOURCE	0x278
+#define H_ADD_CONN		0x284
+#define H_DEL_CONN		0x288
 #define H_JOIN			0x298
 #define H_VASI_STATE            0x2A4
 #define H_ENABLE_CRQ		0x2B0
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index abbc2aa..9f4c9d4f 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -64,11 +64,6 @@
 		get_paca()->hard_enabled = 0;	\
 	} while(0)
 
-static inline int irqs_disabled_flags(unsigned long flags)
-{
-	return flags == 0;
-}
-
 #else
 
 #if defined(CONFIG_BOOKE)
diff --git a/arch/powerpc/include/asm/immap_cpm2.h b/arch/powerpc/include/asm/immap_cpm2.h
index d4f069b..7c64fda 100644
--- a/arch/powerpc/include/asm/immap_cpm2.h
+++ b/arch/powerpc/include/asm/immap_cpm2.h
@@ -549,7 +549,7 @@
 
 /* USB Controller.
 */
-typedef struct usb_ctlr {
+typedef struct cpm_usb_ctlr {
 	u8	usb_usmod;
 	u8	usb_usadr;
 	u8	usb_uscom;
diff --git a/arch/powerpc/include/asm/immap_qe.h b/arch/powerpc/include/asm/immap_qe.h
index c346d0b..4e10f50 100644
--- a/arch/powerpc/include/asm/immap_qe.h
+++ b/arch/powerpc/include/asm/immap_qe.h
@@ -210,7 +210,7 @@
 } __attribute__ ((packed));
 
 /* USB Controller */
-struct usb_ctlr {
+struct qe_usb_ctlr {
 	u8	usb_usmod;
 	u8	usb_usadr;
 	u8	usb_uscom;
@@ -229,7 +229,7 @@
 } __attribute__ ((packed));
 
 /* MCC */
-struct mcc {
+struct qe_mcc {
 	__be32	mcce;		/* MCC event register */
 	__be32	mccm;		/* MCC mask register */
 	__be32	mccf;		/* MCC configuration register */
@@ -431,9 +431,9 @@
 	struct qe_mux		qmx;		/* QE Multiplexer */
 	struct qe_timers	qet;		/* QE Timers */
 	struct spi		spi[0x2];	/* spi */
-	struct mcc		mcc;		/* mcc */
+	struct qe_mcc		mcc;		/* mcc */
 	struct qe_brg		brg;		/* brg */
-	struct usb_ctlr		usb;		/* USB */
+	struct qe_usb_ctlr	usb;		/* USB */
 	struct si1		si1;		/* SI */
 	u8			res11[0x800];
 	struct sir		sir;		/* SI Routing Tables */
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index bbcd1aa..e054bae 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -17,8 +17,6 @@
 #include <asm/atomic.h>
 
 
-#define get_irq_desc(irq) (&irq_desc[(irq)])
-
 /* Define a way to iterate across irqs. */
 #define for_each_irq(i) \
 	for ((i) = 0; (i) < NR_IRQS; ++(i))
@@ -34,12 +32,15 @@
  */
 #define NO_IRQ_IGNORE		((unsigned int)-1)
 
-/* Total number of virq in the platform (make it a CONFIG_* option ? */
-#define NR_IRQS		512
+/* Total number of virq in the platform */
+#define NR_IRQS		CONFIG_NR_IRQS
 
 /* Number of irqs reserved for the legacy controller */
 #define NUM_ISA_INTERRUPTS	16
 
+/* Same thing, used by the generic IRQ code */
+#define NR_IRQS_LEGACY		NUM_ISA_INTERRUPTS
+
 /* This type is the placeholder for a hardware interrupt number. It has to
  * be big enough to enclose whatever representation is used by a given
  * platform.
@@ -99,7 +100,7 @@
 	 * interrupt controller has for that line)
 	 */
 	int (*xlate)(struct irq_host *h, struct device_node *ctrler,
-		     u32 *intspec, unsigned int intsize,
+		     const u32 *intspec, unsigned int intsize,
 		     irq_hw_number_t *out_hwirq, unsigned int *out_type);
 };
 
@@ -313,7 +314,7 @@
  * of the of_irq_map_*() functions.
  */
 extern unsigned int irq_create_of_mapping(struct device_node *controller,
-					  u32 *intspec, unsigned int intsize);
+					  const u32 *intspec, unsigned int intsize);
 
 /**
  * irq_of_parse_and_map - Parse and Map an interrupt into linux virq space
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index bb2de6a..81f3b0b 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -46,6 +46,24 @@
 };
 
 struct kvm_sregs {
+	__u32 pvr;
+	union {
+		struct {
+			__u64 sdr1;
+			struct {
+				struct {
+					__u64 slbe;
+					__u64 slbv;
+				} slb[64];
+			} ppc64;
+			struct {
+				__u32 sr[16];
+				__u64 ibat[8]; 
+				__u64 dbat[8]; 
+			} ppc32;
+		} s;
+		__u8 pad[1020];
+	} u;
 };
 
 struct kvm_fpu {
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 56bfae5..af2abe7 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -49,6 +49,46 @@
 #define BOOKE_INTERRUPT_SPE_FP_ROUND 34
 #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
 
+/* book3s */
+
+#define BOOK3S_INTERRUPT_SYSTEM_RESET	0x100
+#define BOOK3S_INTERRUPT_MACHINE_CHECK	0x200
+#define BOOK3S_INTERRUPT_DATA_STORAGE	0x300
+#define BOOK3S_INTERRUPT_DATA_SEGMENT	0x380
+#define BOOK3S_INTERRUPT_INST_STORAGE	0x400
+#define BOOK3S_INTERRUPT_INST_SEGMENT	0x480
+#define BOOK3S_INTERRUPT_EXTERNAL	0x500
+#define BOOK3S_INTERRUPT_ALIGNMENT	0x600
+#define BOOK3S_INTERRUPT_PROGRAM	0x700
+#define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
+#define BOOK3S_INTERRUPT_DECREMENTER	0x900
+#define BOOK3S_INTERRUPT_SYSCALL	0xc00
+#define BOOK3S_INTERRUPT_TRACE		0xd00
+#define BOOK3S_INTERRUPT_PERFMON	0xf00
+#define BOOK3S_INTERRUPT_ALTIVEC	0xf20
+#define BOOK3S_INTERRUPT_VSX		0xf40
+
+#define BOOK3S_IRQPRIO_SYSTEM_RESET		0
+#define BOOK3S_IRQPRIO_DATA_SEGMENT		1
+#define BOOK3S_IRQPRIO_INST_SEGMENT		2
+#define BOOK3S_IRQPRIO_DATA_STORAGE		3
+#define BOOK3S_IRQPRIO_INST_STORAGE		4
+#define BOOK3S_IRQPRIO_ALIGNMENT		5
+#define BOOK3S_IRQPRIO_PROGRAM			6
+#define BOOK3S_IRQPRIO_FP_UNAVAIL		7
+#define BOOK3S_IRQPRIO_ALTIVEC			8
+#define BOOK3S_IRQPRIO_VSX			9
+#define BOOK3S_IRQPRIO_SYSCALL			10
+#define BOOK3S_IRQPRIO_MACHINE_CHECK		11
+#define BOOK3S_IRQPRIO_DEBUG			12
+#define BOOK3S_IRQPRIO_EXTERNAL			13
+#define BOOK3S_IRQPRIO_DECREMENTER		14
+#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR	15
+#define BOOK3S_IRQPRIO_MAX			16
+
+#define BOOK3S_HFLAG_DCBZ32			0x1
+#define BOOK3S_HFLAG_SLB			0x2
+
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
 
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
new file mode 100644
index 0000000..74b7369
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -0,0 +1,139 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#ifndef __ASM_KVM_BOOK3S_H__
+#define __ASM_KVM_BOOK3S_H__
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_ppc.h>
+
+struct kvmppc_slb {
+	u64 esid;
+	u64 vsid;
+	u64 orige;
+	u64 origv;
+	bool valid;
+	bool Ks;
+	bool Kp;
+	bool nx;
+	bool large;
+	bool class;
+};
+
+struct kvmppc_sr {
+	u32 raw;
+	u32 vsid;
+	bool Ks;
+	bool Kp;
+	bool nx;
+};
+
+struct kvmppc_bat {
+	u64 raw;
+	u32 bepi;
+	u32 bepi_mask;
+	bool vs;
+	bool vp;
+	u32 brpn;
+	u8 wimg;
+	u8 pp;
+};
+
+struct kvmppc_sid_map {
+	u64 guest_vsid;
+	u64 guest_esid;
+	u64 host_vsid;
+	bool valid;
+};
+
+#define SID_MAP_BITS    9
+#define SID_MAP_NUM     (1 << SID_MAP_BITS)
+#define SID_MAP_MASK    (SID_MAP_NUM - 1)
+
+struct kvmppc_vcpu_book3s {
+	struct kvm_vcpu vcpu;
+	struct kvmppc_sid_map sid_map[SID_MAP_NUM];
+	struct kvmppc_slb slb[64];
+	struct {
+		u64 esid;
+		u64 vsid;
+	} slb_shadow[64];
+	u8 slb_shadow_max;
+	struct kvmppc_sr sr[16];
+	struct kvmppc_bat ibat[8];
+	struct kvmppc_bat dbat[8];
+	u64 hid[6];
+	int slb_nr;
+	u64 sdr1;
+	u64 dsisr;
+	u64 hior;
+	u64 msr_mask;
+	u64 vsid_first;
+	u64 vsid_next;
+	u64 vsid_max;
+	int context_id;
+};
+
+#define CONTEXT_HOST		0
+#define CONTEXT_GUEST		1
+#define CONTEXT_GUEST_END	2
+
+#define VSID_REAL	0xfffffffffff00000
+#define VSID_REAL_DR	0xffffffffffe00000
+#define VSID_REAL_IR	0xffffffffffd00000
+#define VSID_BAT	0xffffffffffc00000
+#define VSID_PR		0x8000000000000000
+
+extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask);
+extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
+extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end);
+extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
+extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
+extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
+extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
+extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data);
+extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data);
+extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr);
+extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
+extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
+			   bool upper, u32 val);
+
+extern u32 kvmppc_trampoline_lowmem;
+extern u32 kvmppc_trampoline_enter;
+
+static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
+{
+	return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu);
+}
+
+static inline ulong dsisr(void)
+{
+	ulong r;
+	asm ( "mfdsisr %0 " : "=r" (r) );
+	return r;
+}
+
+extern void kvm_return_point(void);
+
+#define INS_DCBZ			0x7c0007ec
+
+#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64_asm.h b/arch/powerpc/include/asm/kvm_book3s_64_asm.h
new file mode 100644
index 0000000..2e06ee8
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_64_asm.h
@@ -0,0 +1,58 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#ifndef __ASM_KVM_BOOK3S_ASM_H__
+#define __ASM_KVM_BOOK3S_ASM_H__
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+
+#include <asm/kvm_asm.h>
+
+.macro DO_KVM intno
+	.if (\intno == BOOK3S_INTERRUPT_SYSTEM_RESET) || \
+	    (\intno == BOOK3S_INTERRUPT_MACHINE_CHECK) || \
+	    (\intno == BOOK3S_INTERRUPT_DATA_STORAGE) || \
+	    (\intno == BOOK3S_INTERRUPT_INST_STORAGE) || \
+	    (\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \
+	    (\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \
+	    (\intno == BOOK3S_INTERRUPT_EXTERNAL) || \
+	    (\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \
+	    (\intno == BOOK3S_INTERRUPT_PROGRAM) || \
+	    (\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \
+	    (\intno == BOOK3S_INTERRUPT_DECREMENTER) || \
+	    (\intno == BOOK3S_INTERRUPT_SYSCALL) || \
+	    (\intno == BOOK3S_INTERRUPT_TRACE) || \
+	    (\intno == BOOK3S_INTERRUPT_PERFMON) || \
+	    (\intno == BOOK3S_INTERRUPT_ALTIVEC) || \
+	    (\intno == BOOK3S_INTERRUPT_VSX)
+
+	b	kvmppc_trampoline_\intno
+kvmppc_resume_\intno:
+
+	.endif
+.endm
+
+#else
+
+.macro DO_KVM intno
+.endm
+
+#endif /* CONFIG_KVM_BOOK3S_64_HANDLER */
+
+#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c9c930e..1201f62 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -21,7 +21,8 @@
 #define __POWERPC_KVM_HOST_H__
 
 #include <linux/mutex.h>
-#include <linux/timer.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <asm/kvm_asm.h>
@@ -37,6 +38,8 @@
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
 
+#define HPTEG_CACHE_NUM 1024
+
 struct kvm;
 struct kvm_run;
 struct kvm_vcpu;
@@ -63,6 +66,17 @@
 	u32 dec_exits;
 	u32 ext_intr_exits;
 	u32 halt_wakeup;
+#ifdef CONFIG_PPC64
+	u32 pf_storage;
+	u32 pf_instruc;
+	u32 sp_storage;
+	u32 sp_instruc;
+	u32 queue_intr;
+	u32 ld;
+	u32 ld_slow;
+	u32 st;
+	u32 st_slow;
+#endif
 };
 
 enum kvm_exit_types {
@@ -109,9 +123,53 @@
 struct kvm_arch {
 };
 
+struct kvmppc_pte {
+	u64 eaddr;
+	u64 vpage;
+	u64 raddr;
+	bool may_read;
+	bool may_write;
+	bool may_execute;
+};
+
+struct kvmppc_mmu {
+	/* book3s_64 only */
+	void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs);
+	u64  (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr);
+	u64  (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr);
+	void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr);
+	void (*slbia)(struct kvm_vcpu *vcpu);
+	/* book3s */
+	void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value);
+	u32  (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
+	int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data);
+	void (*reset_msr)(struct kvm_vcpu *vcpu);
+	void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
+	int  (*esid_to_vsid)(struct kvm_vcpu *vcpu, u64 esid, u64 *vsid);
+	u64  (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
+	bool (*is_dcbz32)(struct kvm_vcpu *vcpu);
+};
+
+struct hpte_cache {
+	u64 host_va;
+	u64 pfn;
+	ulong slot;
+	struct kvmppc_pte pte;
+};
+
 struct kvm_vcpu_arch {
-	u32 host_stack;
+	ulong host_stack;
 	u32 host_pid;
+#ifdef CONFIG_PPC64
+	ulong host_msr;
+	ulong host_r2;
+	void *host_retip;
+	ulong trampoline_lowmem;
+	ulong trampoline_enter;
+	ulong highmem_handler;
+	ulong host_paca_phys;
+	struct kvmppc_mmu mmu;
+#endif
 
 	u64 fpr[32];
 	ulong gpr[32];
@@ -123,6 +181,10 @@
 	ulong xer;
 
 	ulong msr;
+#ifdef CONFIG_PPC64
+	ulong shadow_msr;
+	ulong hflags;
+#endif
 	u32 mmucr;
 	ulong sprg0;
 	ulong sprg1;
@@ -149,6 +211,7 @@
 	u32 ivor[64];
 	ulong ivpr;
 	u32 pir;
+	u32 pvr;
 
 	u32 shadow_pid;
 	u32 pid;
@@ -174,6 +237,9 @@
 #endif
 
 	u32 last_inst;
+#ifdef CONFIG_PPC64
+	ulong fault_dsisr;
+#endif
 	ulong fault_dear;
 	ulong fault_esr;
 	gpa_t paddr_accessed;
@@ -185,8 +251,15 @@
 
 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
 
-	struct timer_list dec_timer;
+	struct hrtimer dec_timer;
+	struct tasklet_struct tasklet;
+	u64 dec_jiffies;
 	unsigned long pending_exceptions;
+
+#ifdef CONFIG_PPC64
+	struct hpte_cache hpte_cache[HPTEG_CACHE_NUM];
+	int hpte_cache_offset;
+#endif
 };
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2c6ee34..269ee46 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -39,6 +39,7 @@
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern char kvmppc_handlers_start[];
 extern unsigned long kvmppc_handler_len;
+extern void kvmppc_handler_highmem(void);
 
 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
 extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index f78f65c..14b592d 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -100,7 +100,14 @@
 	// Used to pass parms from the OS to PLIC for SetAsrAndRfid
 	u64	saved_gpr3;		// Saved GPR3                   x20-x27
 	u64	saved_gpr4;		// Saved GPR4                   x28-x2F
-	u64	saved_gpr5;		// Saved GPR5                   x30-x37
+	union {
+		u64	saved_gpr5;	/* Saved GPR5               x30-x37 */
+		struct {
+			u8	cede_latency_hint;  /*			x30 */
+			u8	reserved[7];        /*		    x31-x36 */
+		} fields;
+	} gpr5_dword;
+
 
 	u8	dtl_enable_mask;	// Dispatch Trace Log mask	x38-x38
 	u8	donate_dedicated_cpu;	// Donate dedicated CPU cycles  x39-x39
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 9efa2be..9f0fc9e 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -266,6 +266,11 @@
 	void (*suspend_disable_irqs)(void);
 	void (*suspend_enable_irqs)(void);
 #endif
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	ssize_t (*cpu_probe)(const char *, size_t);
+	ssize_t (*cpu_release)(const char *, size_t);
+#endif
 };
 
 extern void e500_idle(void);
diff --git a/arch/powerpc/include/asm/macio.h b/arch/powerpc/include/asm/macio.h
index 079c06e..a062c57 100644
--- a/arch/powerpc/include/asm/macio.h
+++ b/arch/powerpc/include/asm/macio.h
@@ -39,6 +39,7 @@
 	struct macio_bus	*bus;		/* macio bus this device is on */
 	struct macio_dev	*media_bay;	/* Device is part of a media bay */
 	struct of_device	ofdev;
+	struct device_dma_parameters dma_parms; /* ide needs that */
 	int			n_resources;
 	struct resource		resource[MACIO_DEV_COUNT_RESOURCES];
 	int			n_interrupts;
@@ -78,6 +79,8 @@
 	return res->end - res->start + 1;
 }
 
+extern int macio_enable_devres(struct macio_dev *dev);
+
 extern int macio_request_resource(struct macio_dev *dev, int resource_no, const char *name);
 extern void macio_release_resource(struct macio_dev *dev, int resource_no);
 extern int macio_request_resources(struct macio_dev *dev, const char *name);
@@ -131,6 +134,9 @@
 	int	(*resume)(struct macio_dev* dev);
 	int	(*shutdown)(struct macio_dev* dev);
 
+#ifdef CONFIG_PMAC_MEDIABAY
+	void	(*mediabay_event)(struct macio_dev* dev, int mb_state);
+#endif
 	struct device_driver	driver;
 };
 #define	to_macio_driver(drv) container_of(drv,struct macio_driver, driver)
diff --git a/arch/powerpc/include/asm/mediabay.h b/arch/powerpc/include/asm/mediabay.h
index b2efb33..11037a4 100644
--- a/arch/powerpc/include/asm/mediabay.h
+++ b/arch/powerpc/include/asm/mediabay.h
@@ -17,26 +17,31 @@
 #define MB_POWER	6	/* media bay contains a Power device (???) */
 #define MB_NO		7	/* media bay contains nothing */
 
-/* Number of bays in the machine or 0 */
-extern int media_bay_count;
+struct macio_dev;
 
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-#include <linux/ide.h>
+#ifdef CONFIG_PMAC_MEDIABAY
 
-int check_media_bay_by_base(unsigned long base, int what);
-/* called by IDE PMAC host driver to register IDE controller for media bay */
-int media_bay_set_ide_infos(struct device_node *which_bay, unsigned long base,
-			    int irq, ide_hwif_t *hwif);
+/* Check the content type of the bay, returns MB_NO if the bay is still
+ * transitionning
+ */
+extern int check_media_bay(struct macio_dev *bay);
 
-int check_media_bay(struct device_node *which_bay, int what);
+/* The ATA driver uses the calls below to temporarily hold on the
+ * media bay callbacks while initializing the interface
+ */
+extern void lock_media_bay(struct macio_dev *bay);
+extern void unlock_media_bay(struct macio_dev *bay);
 
 #else
 
-static inline int check_media_bay(struct device_node *which_bay, int what)
+static inline int check_media_bay(struct macio_dev *bay)
 {
-	return -ENODEV;
+	return MB_NO;
 }
 
+static inline void lock_media_bay(struct macio_dev *bay) { }
+static inline void unlock_media_bay(struct macio_dev *bay) { }
+
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index bebe31c..2102b21 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -173,14 +173,6 @@
  */
 extern int mmu_ci_restrictions;
 
-#ifdef CONFIG_HUGETLB_PAGE
-/*
- * The page size indexes of the huge pages for use by hugetlbfs
- */
-extern unsigned int mmu_huge_psizes[MMU_PAGE_COUNT];
-
-#endif /* CONFIG_HUGETLB_PAGE */
-
 /*
  * This function sets the AVPN and L fields of the HPTE  appropriately
  * for the page size
@@ -253,10 +245,11 @@
 			   unsigned long vsid, pte_t *ptep, unsigned long trap,
 			   unsigned int local, int ssize);
 struct mm_struct;
+unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap);
 extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
-extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
-			  unsigned long ea, unsigned long vsid, int local,
-			  unsigned long trap);
+int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
+		     pte_t *ptep, unsigned long trap, int local, int ssize,
+		     unsigned int shift, unsigned int mmu_psize);
 
 extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 			     unsigned long pstart, unsigned long prot,
@@ -380,6 +373,38 @@
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+/*
+ * For the sub-page protection option, we extend the PGD with one of
+ * these.  Basically we have a 3-level tree, with the top level being
+ * the protptrs array.  To optimize speed and memory consumption when
+ * only addresses < 4GB are being protected, pointers to the first
+ * four pages of sub-page protection words are stored in the low_prot
+ * array.
+ * Each page of sub-page protection words protects 1GB (4 bytes
+ * protects 64k).  For the 3-level tree, each page of pointers then
+ * protects 8TB.
+ */
+struct subpage_prot_table {
+	unsigned long maxaddr;	/* only addresses < this are protected */
+	unsigned int **protptrs[2];
+	unsigned int *low_prot[4];
+};
+
+#define SBP_L1_BITS		(PAGE_SHIFT - 2)
+#define SBP_L2_BITS		(PAGE_SHIFT - 3)
+#define SBP_L1_COUNT		(1 << SBP_L1_BITS)
+#define SBP_L2_COUNT		(1 << SBP_L2_BITS)
+#define SBP_L2_SHIFT		(PAGE_SHIFT + SBP_L1_BITS)
+#define SBP_L3_SHIFT		(SBP_L2_SHIFT + SBP_L2_BITS)
+
+extern void subpage_prot_free(struct mm_struct *mm);
+extern void subpage_prot_init_new_context(struct mm_struct *mm);
+#else
+static inline void subpage_prot_free(struct mm_struct *mm) {}
+static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+
 typedef unsigned long mm_context_id_t;
 
 typedef struct {
@@ -393,6 +418,9 @@
 	u16 sllp;		/* SLB page size encoding */
 #endif
 	unsigned long vdso_base;
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+	struct subpage_prot_table spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
 } mm_context_t;
 
 
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b34e94d..26383e0 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -23,6 +23,8 @@
 extern void set_context(unsigned long id, pgd_t *pgd);
 
 #ifdef CONFIG_PPC_BOOK3S_64
+extern int __init_new_context(void);
+extern void __destroy_context(int context_id);
 static inline void mmu_context_init(void) { }
 #else
 extern void mmu_context_init(void);
diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h
index 1b4f697..b664ce7 100644
--- a/arch/powerpc/include/asm/mpc52xx.h
+++ b/arch/powerpc/include/asm/mpc52xx.h
@@ -276,6 +276,53 @@
 extern unsigned int mpc52xx_get_xtal_freq(struct device_node *node);
 extern void mpc52xx_restart(char *cmd);
 
+/* mpc52xx_gpt.c */
+struct mpc52xx_gpt_priv;
+extern struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq);
+extern int mpc52xx_gpt_start_timer(struct mpc52xx_gpt_priv *gpt, u64 period,
+                            int continuous);
+extern u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt);
+extern int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt);
+
+/* mpc52xx_lpbfifo.c */
+#define MPC52XX_LPBFIFO_FLAG_READ		(0)
+#define MPC52XX_LPBFIFO_FLAG_WRITE		(1<<0)
+#define MPC52XX_LPBFIFO_FLAG_NO_INCREMENT	(1<<1)
+#define MPC52XX_LPBFIFO_FLAG_NO_DMA		(1<<2)
+#define MPC52XX_LPBFIFO_FLAG_POLL_DMA		(1<<3)
+
+struct mpc52xx_lpbfifo_request {
+	struct list_head list;
+
+	/* localplus bus address */
+	unsigned int cs;
+	size_t offset;
+
+	/* Memory address */
+	void *data;
+	phys_addr_t data_phys;
+
+	/* Details of transfer */
+	size_t size;
+	size_t pos;	/* current position of transfer */
+	int flags;
+
+	/* What to do when finished */
+	void (*callback)(struct mpc52xx_lpbfifo_request *);
+
+	void *priv;		/* Driver private data */
+
+	/* statistics */
+	int irq_count;
+	int irq_ticks;
+	u8 last_byte;
+	int buffer_not_done_cnt;
+};
+
+extern int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req);
+extern void mpc52xx_lpbfifo_abort(struct mpc52xx_lpbfifo_request *req);
+extern void mpc52xx_lpbfifo_poll(void);
+
 /* mpc52xx_pic.c */
 extern void mpc52xx_init_irq(void);
 extern unsigned int mpc52xx_get_irq(void);
diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index 6c587ed..850b72f 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -73,7 +73,6 @@
 extern int nvram_read_error_log(char * buff, int length,
 					 unsigned int * err_type, unsigned int *err_seq);
 extern int nvram_clear_error_log(void);
-extern struct nvram_partition *nvram_find_partition(int sig, const char *name);
 
 extern int pSeries_nvram_init(void);
 
diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
index e482e53..d4b4bfa 100644
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ b/arch/powerpc/include/asm/pSeries_reconfig.h
@@ -17,6 +17,7 @@
 #ifdef CONFIG_PPC_PSERIES
 extern int pSeries_reconfig_notifier_register(struct notifier_block *);
 extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
+extern struct blocking_notifier_head pSeries_reconfig_chain;
 #else /* !CONFIG_PPC_PSERIES */
 static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 7d8514c..5e9b4ef 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -129,6 +129,15 @@
 	u64 system_time;		/* accumulated system TB ticks */
 	u64 startpurr;			/* PURR/TB value snapshot */
 	u64 startspurr;			/* SPURR value snapshot */
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	struct  {
+		u64     esid;
+		u64     vsid;
+	} kvm_slb[64];			/* guest SLB */
+	u8 kvm_slb_max;			/* highest used guest slb entry */
+	u8 kvm_in_guest;		/* are we inside the guest? */
+#endif
 };
 
 extern struct paca_struct paca[];
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index ff24254..e96d52a 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -229,6 +229,20 @@
 
 #endif
 
+typedef struct { signed long pd; } hugepd_t;
+#define HUGEPD_SHIFT_MASK     0x3f
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline int hugepd_ok(hugepd_t hpd)
+{
+	return (hpd.pd > 0);
+}
+
+#define is_hugepd(pdep)               (hugepd_ok(*((hugepd_t *)(pdep))))
+#else /* CONFIG_HUGETLB_PAGE */
+#define is_hugepd(pdep)			0
+#endif /* CONFIG_HUGETLB_PAGE */
+
 struct page;
 extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
 extern void copy_user_page(void *to, void *from, unsigned long vaddr,
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 3f17b83..bfc4e02 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -90,7 +90,7 @@
 #define HPAGE_SIZE		((1UL) << HPAGE_SHIFT)
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
-#define HUGE_MAX_HSTATE		3
+#define HUGE_MAX_HSTATE		(MMU_PAGE_COUNT-1)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/pgalloc-32.h b/arch/powerpc/include/asm/pgalloc-32.h
index c9500d6..580cf73 100644
--- a/arch/powerpc/include/asm/pgalloc-32.h
+++ b/arch/powerpc/include/asm/pgalloc-32.h
@@ -3,7 +3,8 @@
 
 #include <linux/threads.h>
 
-#define PTE_NONCACHE_NUM	0  /* dummy for now to share code w/ppc64 */
+/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
+#define MAX_PGTABLE_INDEX_SIZE	0
 
 extern void __bad_pte(pmd_t *pmd);
 
@@ -36,11 +37,10 @@
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
 extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
 
-static inline void pgtable_free(pgtable_free_t pgf)
+static inline void pgtable_free(void *table, unsigned index_size)
 {
-	void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
-
-	free_page((unsigned long)p);
+	BUG_ON(index_size); /* 32-bit doesn't use this */
+	free_page((unsigned long)table);
 }
 
 #define check_pgt_cache()	do { } while (0)
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index e6f069c..605f5c5 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -11,27 +11,34 @@
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
 
-#ifndef CONFIG_PPC_SUBPAGE_PROT
-static inline void subpage_prot_free(pgd_t *pgd) {}
-#endif
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation.  For PTE pages (which are linked to a struct
+ * page for now, and drawn from the main get_free_pages() pool), the
+ * allocation size will be (2^index_size * sizeof(pointer)) and
+ * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer.  In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value.  This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE	0xf
 
 extern struct kmem_cache *pgtable_cache[];
-
-#define PGD_CACHE_NUM		0
-#define PUD_CACHE_NUM		1
-#define PMD_CACHE_NUM		1
-#define HUGEPTE_CACHE_NUM	2
-#define PTE_NONCACHE_NUM	7  /* from GFP rather than kmem_cache */
+#define PGT_CACHE(shift) (pgtable_cache[(shift)-1])
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
+	return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	subpage_prot_free(pgd);
-	kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
+	kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
 }
 
 #ifndef CONFIG_PPC_64K_PAGES
@@ -40,13 +47,13 @@
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
 				GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-	kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
+	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -78,13 +85,13 @@
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
+	return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE),
 				GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
+	kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -107,24 +114,22 @@
 	return page;
 }
 
-static inline void pgtable_free(pgtable_free_t pgf)
+static inline void pgtable_free(void *table, unsigned index_size)
 {
-	void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
-	int cachenum = pgf.val & PGF_CACHENUM_MASK;
-
-	if (cachenum == PTE_NONCACHE_NUM)
-		free_page((unsigned long)p);
-	else
-		kmem_cache_free(pgtable_cache[cachenum], p);
+	if (!index_size)
+		free_page((unsigned long)table);
+	else {
+		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
+		kmem_cache_free(PGT_CACHE(index_size), table);
+	}
 }
 
-#define __pmd_free_tlb(tlb, pmd,addr)		      \
-	pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
-		PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
+#define __pmd_free_tlb(tlb, pmd, addr)		      \
+	pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE)
 #ifndef CONFIG_PPC_64K_PAGES
 #define __pud_free_tlb(tlb, pud, addr)		      \
-	pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
-		PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
+	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
+
 #endif /* CONFIG_PPC_64K_PAGES */
 
 #define check_pgt_cache()	do { } while (0)
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index f2e812d..abe8532 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -24,25 +24,6 @@
 	__free_page(ptepage);
 }
 
-typedef struct pgtable_free {
-	unsigned long val;
-} pgtable_free_t;
-
-/* This needs to be big enough to allow for MMU_PAGE_COUNT + 2 to be stored
- * and small enough to fit in the low bits of any naturally aligned page
- * table cache entry. Arbitrarily set to 0x1f, that should give us some
- * room to grow
- */
-#define PGF_CACHENUM_MASK	0x1f
-
-static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
-						unsigned long mask)
-{
-	BUG_ON(cachenum > PGF_CACHENUM_MASK);
-
-	return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum};
-}
-
 #ifdef CONFIG_PPC64
 #include <asm/pgalloc-64.h>
 #else
@@ -50,12 +31,12 @@
 #endif
 
 #ifdef CONFIG_SMP
-extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
 extern void pte_free_finish(void);
 #else /* CONFIG_SMP */
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
 {
-	pgtable_free(pgf);
+	pgtable_free(table, shift);
 }
 static inline void pte_free_finish(void) { }
 #endif /* !CONFIG_SMP */
@@ -63,12 +44,9 @@
 static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
 				  unsigned long address)
 {
-	pgtable_free_t pgf = pgtable_free_cache(page_address(ptepage),
-						PTE_NONCACHE_NUM,
-						PTE_TABLE_SIZE-1);
 	tlb_flush_pgtable(tlb, address);
 	pgtable_page_dtor(ptepage);
-	pgtable_free_tlb(tlb, pgf);
+	pgtable_free_tlb(tlb, page_address(ptepage), 0);
 }
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 806abe7..4986504 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -354,6 +354,7 @@
 #define pgoff_to_pte(off)	((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
 #define PTE_FILE_MAX_BITS	(BITS_PER_LONG - PTE_RPN_SHIFT)
 
+void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
 /*
@@ -378,7 +379,18 @@
 	return pt;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long address);
+#ifdef CONFIG_HUGETLB_PAGE
+pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+				 unsigned *shift);
+#else
+static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+					       unsigned *shift)
+{
+	if (shift)
+		*shift = 0;
+	return find_linux_pte(pgdir, ea);
+}
+#endif /* !CONFIG_HUGETLB_PAGE */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 2a5da06..21207e5 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -211,6 +211,9 @@
  */
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
 
+extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
+		      unsigned long end, int write, struct page **pages, int *nr);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h
index dd5ea95..d44826e 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -33,21 +33,21 @@
 #define _PAGE_NO_CACHE	0x0002	/* I: cache inhibit */
 #define _PAGE_SHARED	0x0004	/* No ASID (context) compare */
 #define _PAGE_SPECIAL	0x0008	/* SW entry, forced to 0 by the TLB miss */
+#define _PAGE_DIRTY	0x0100	/* C: page changed */
 
-/* These five software bits must be masked out when the entry is loaded
- * into the TLB.
+/* These 4 software bits must be masked out when the entry is loaded
+ * into the TLB, 1 SW bit left(0x0080).
  */
 #define _PAGE_GUARDED	0x0010	/* software: guarded access */
-#define _PAGE_DIRTY	0x0020	/* software: page changed */
-#define _PAGE_RW	0x0040	/* software: user write access allowed */
-#define _PAGE_ACCESSED	0x0080	/* software: page referenced */
+#define _PAGE_ACCESSED	0x0020	/* software: page referenced */
+#define _PAGE_WRITETHRU	0x0040	/* software: caching is write through */
 
 /* Setting any bits in the nibble with the follow two controls will
  * require a TLB exception handler change.  It is assumed unused bits
  * are always zero.
  */
-#define _PAGE_HWWRITE	0x0100	/* h/w write enable: never set in Linux PTE */
-#define _PAGE_USER	0x0800	/* One of the PP bits, the other is USER&~RW */
+#define _PAGE_RW	0x0400	/* lsb PP bits, inverted in HW */
+#define _PAGE_USER	0x0800	/* msb PP bits */
 
 #define _PMD_PRESENT	0x0001
 #define _PMD_BAD	0x0ff0
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index 82b72207..c4490f9 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -76,41 +76,4 @@
 	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,		\
 			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))
 
-
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-/*
- * For the sub-page protection option, we extend the PGD with one of
- * these.  Basically we have a 3-level tree, with the top level being
- * the protptrs array.  To optimize speed and memory consumption when
- * only addresses < 4GB are being protected, pointers to the first
- * four pages of sub-page protection words are stored in the low_prot
- * array.
- * Each page of sub-page protection words protects 1GB (4 bytes
- * protects 64k).  For the 3-level tree, each page of pointers then
- * protects 8TB.
- */
-struct subpage_prot_table {
-	unsigned long maxaddr;	/* only addresses < this are protected */
-	unsigned int **protptrs[2];
-	unsigned int *low_prot[4];
-};
-
-#undef PGD_TABLE_SIZE
-#define PGD_TABLE_SIZE		((sizeof(pgd_t) << PGD_INDEX_SIZE) + \
-				 sizeof(struct subpage_prot_table))
-
-#define SBP_L1_BITS		(PAGE_SHIFT - 2)
-#define SBP_L2_BITS		(PAGE_SHIFT - 3)
-#define SBP_L1_COUNT		(1 << SBP_L1_BITS)
-#define SBP_L2_COUNT		(1 << SBP_L2_BITS)
-#define SBP_L2_SHIFT		(PAGE_SHIFT + SBP_L1_BITS)
-#define SBP_L3_SHIFT		(SBP_L2_SHIFT + SBP_L2_BITS)
-
-extern void subpage_prot_free(pgd_t *pgd);
-
-static inline struct subpage_prot_table *pgd_subpage_prot(pgd_t *pgd)
-{
-	return (struct subpage_prot_table *)(pgd + PTRS_PER_PGD);
-}
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index f388f0a..0947b36 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -87,7 +87,7 @@
 
 /* Export QE common operations */
 #ifdef CONFIG_QUICC_ENGINE
-extern void __init qe_reset(void);
+extern void qe_reset(void);
 #else
 static inline void qe_reset(void) {}
 #endif
@@ -145,8 +145,17 @@
 static inline void qe_pin_set_dedicated(struct qe_pin *pin) {}
 #endif /* CONFIG_QE_GPIO */
 
-/* QE internal API */
+#ifdef CONFIG_QUICC_ENGINE
 int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input);
+#else
+static inline int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol,
+			       u32 cmd_input)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_QUICC_ENGINE */
+
+/* QE internal API */
 enum qe_clock qe_clock_source(const char *source);
 unsigned int qe_get_brg_clk(void);
 int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier);
@@ -154,7 +163,28 @@
 void qe_put_snum(u8 snum);
 unsigned int qe_get_num_of_risc(void);
 unsigned int qe_get_num_of_snums(void);
-int qe_alive_during_sleep(void);
+
+static inline int qe_alive_during_sleep(void)
+{
+	/*
+	 * MPC8568E reference manual says:
+	 *
+	 * "...power down sequence waits for all I/O interfaces to become idle.
+	 *  In some applications this may happen eventually without actively
+	 *  shutting down interfaces, but most likely, software will have to
+	 *  take steps to shut down the eTSEC, QUICC Engine Block, and PCI
+	 *  interfaces before issuing the command (either the write to the core
+	 *  MSR[WE] as described above or writing to POWMGTCSR) to put the
+	 *  device into sleep state."
+	 *
+	 * MPC8569E reference manual has a similar paragraph.
+	 */
+#ifdef CONFIG_PPC_85xx
+	return 0;
+#else
+	return 1;
+#endif
+}
 
 /* we actually use cpm_muram implementation, define this for convenience */
 #define qe_muram_init cpm_muram_init
@@ -210,8 +240,15 @@
 	u64 extended_modes;	/* Extended modes */
 };
 
+#ifdef CONFIG_QUICC_ENGINE
 /* Upload a firmware to the QE */
 int qe_upload_firmware(const struct qe_firmware *firmware);
+#else
+static inline int qe_upload_firmware(const struct qe_firmware *firmware)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_QUICC_ENGINE */
 
 /* Obtain information on the uploaded firmware */
 struct qe_firmware_info *qe_get_firmware_info(void);
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index c7d671a..07d2d19 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -145,7 +145,7 @@
 SYSCALL_SPU(setfsgid)
 SYSCALL_SPU(llseek)
 COMPAT_SYS_SPU(getdents)
-SYSX_SPU(sys_select,ppc32_select,ppc_select)
+SYSX_SPU(sys_select,ppc32_select,sys_select)
 SYSCALL_SPU(flock)
 SYSCALL_SPU(msync)
 COMPAT_SYS_SPU(readv)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b23664a..c002b04 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -42,10 +42,11 @@
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
 obj-$(CONFIG_PPC_OF)		+= of_device.o of_platform.o prom_parse.o
 obj-$(CONFIG_PPC_CLOCK)		+= clock.o
-procfs-$(CONFIG_PPC64)		:= proc_ppc64.o
+procfs-y			:= proc_powerpc.o
 obj-$(CONFIG_PROC_FS)		+= $(procfs-y)
 rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)	:= rtas_pci.o
 obj-$(CONFIG_PPC_RTAS)		+= rtas.o rtas-rtc.o $(rtaspci-y-y)
+obj-$(CONFIG_PPC_RTAS_DAEMON)	+= rtasd.o
 obj-$(CONFIG_RTAS_FLASH)	+= rtas_flash.o
 obj-$(CONFIG_RTAS_PROC)		+= rtas-proc.o
 obj-$(CONFIG_LPARCFG)		+= lparcfg.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 0812b0f..a6c2b63 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -190,6 +190,11 @@
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
 	DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest));
+	DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb));
+	DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max));
+#endif
 #endif /* CONFIG_PPC64 */
 
 	/* RTAS */
@@ -398,14 +403,24 @@
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
+
+	/* book3s_64 */
+#ifdef CONFIG_PPC64
+	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
+	DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
+	DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
+	DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
+	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+	DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
+	DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
+	DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
+	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
+#endif
 #endif
 #ifdef CONFIG_44x
 	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
 #endif
-#ifdef CONFIG_FSL_BOOKE
-	DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
-#endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
 	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 0a8439a..6f4613d 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -373,7 +373,7 @@
 	hard_irq_disable();
 
 	for_each_irq(i) {
-		struct irq_desc *desc = irq_desc + i;
+		struct irq_desc *desc = irq_to_desc(i);
 
 		if (desc->status & IRQ_INPROGRESS)
 			desc->chip->eoi(i);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index e96cbbd..59c9285 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -21,7 +21,6 @@
 #include <asm/dma.h>
 #include <asm/abs_addr.h>
 
-int swiotlb __read_mostly;
 unsigned int ppc_swiotlb_enable;
 
 /*
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index c7eb4e0..e3be98f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -41,6 +41,7 @@
 	. = 0x200
 _machine_check_pSeries:
 	HMT_MEDIUM
+	DO_KVM	0x200
 	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
 	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
 
@@ -48,6 +49,7 @@
 	.globl data_access_pSeries
 data_access_pSeries:
 	HMT_MEDIUM
+	DO_KVM	0x300
 	mtspr	SPRN_SPRG_SCRATCH0,r13
 BEGIN_FTR_SECTION
 	mfspr	r13,SPRN_SPRG_PACA
@@ -77,6 +79,7 @@
 	.globl data_access_slb_pSeries
 data_access_slb_pSeries:
 	HMT_MEDIUM
+	DO_KVM	0x380
 	mtspr	SPRN_SPRG_SCRATCH0,r13
 	mfspr	r13,SPRN_SPRG_PACA		/* get paca address into r13 */
 	std	r3,PACA_EXSLB+EX_R3(r13)
@@ -115,6 +118,7 @@
 	.globl instruction_access_slb_pSeries
 instruction_access_slb_pSeries:
 	HMT_MEDIUM
+	DO_KVM	0x480
 	mtspr	SPRN_SPRG_SCRATCH0,r13
 	mfspr	r13,SPRN_SPRG_PACA		/* get paca address into r13 */
 	std	r3,PACA_EXSLB+EX_R3(r13)
@@ -154,6 +158,7 @@
 	.globl	system_call_pSeries
 system_call_pSeries:
 	HMT_MEDIUM
+	DO_KVM	0xc00
 BEGIN_FTR_SECTION
 	cmpdi	r0,0x1ebe
 	beq-	1f
@@ -187,14 +192,17 @@
 	 */
 performance_monitor_pSeries_1:
 	. = 0xf00
+	DO_KVM	0xf00
 	b	performance_monitor_pSeries
 
 altivec_unavailable_pSeries_1:
 	. = 0xf20
+	DO_KVM	0xf20
 	b	altivec_unavailable_pSeries
 
 vsx_unavailable_pSeries_1:
 	. = 0xf40
+	DO_KVM	0xf40
 	b	vsx_unavailable_pSeries
 
 #ifdef CONFIG_CBE_RAS
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index c38afdb..9258074 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -37,6 +37,7 @@
 #include <asm/firmware.h>
 #include <asm/page_64.h>
 #include <asm/irqflags.h>
+#include <asm/kvm_book3s_64_asm.h>
 
 /* The physical memory is layed out such that the secondary processor
  * spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -165,6 +166,12 @@
 #include "exceptions-64s.S"
 #endif
 
+/* KVM trampoline code needs to be close to the interrupt handlers */
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include "../kvm/book3s_64_rmhandlers.S"
+#endif
+
 _GLOBAL(generic_secondary_thread_init)
 	mr	r24,r3
 
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 6ded19d..678f98c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -206,6 +206,8 @@
 	EXCEPTION_PROLOG
 	mfspr r4,SPRN_DAR
 	stw r4,_DAR(r11)
+	li r5,0x00f0
+	mtspr SPRN_DAR,r5	/* Tag DAR, to be used in DTLB Error */
 	mfspr r5,SPRN_DSISR
 	stw r5,_DSISR(r11)
 	addi r3,r1,STACK_FRAME_OVERHEAD
@@ -222,6 +224,8 @@
 	stw	r10,_DSISR(r11)
 	mr	r5,r10
 	mfspr	r4,SPRN_DAR
+	li	r10,0x00f0
+	mtspr	SPRN_DAR,r10	/* Tag DAR, to be used in DTLB Error */
 	EXC_XFER_EE_LITE(0x300, handle_page_fault)
 
 /* Instruction access exception.
@@ -244,6 +248,8 @@
 	EXCEPTION_PROLOG
 	mfspr	r4,SPRN_DAR
 	stw	r4,_DAR(r11)
+	li	r5,0x00f0
+	mtspr	SPRN_DAR,r5	/* Tag DAR, to be used in DTLB Error */
 	mfspr	r5,SPRN_DSISR
 	stw	r5,_DSISR(r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
@@ -333,26 +339,20 @@
 	mfspr	r11, SPRN_MD_TWC	/* ....and get the pte address */
 	lwz	r10, 0(r11)	/* Get the pte */
 
-#ifdef CONFIG_SWAP
-	/* do not set the _PAGE_ACCESSED bit of a non-present page */
-	andi.	r11, r10, _PAGE_PRESENT
-	beq	4f
-	ori	r10, r10, _PAGE_ACCESSED
-	mfspr	r11, SPRN_MD_TWC	/* get the pte address again */
-	stw	r10, 0(r11)
-4:
-#else
-	ori	r10, r10, _PAGE_ACCESSED
-	stw	r10, 0(r11)
-#endif
+	andi.	r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
+	cmpwi	cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT
+	bne-	cr0, 2f
+
+	/* Clear PP lsb, 0x400 */
+	rlwinm 	r10, r10, 0, 22, 20
 
 	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 21, 22 and 28 must be clear.
+	 * Software indicator bits 22 and 28 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x2d80, r3)
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
@@ -365,6 +365,22 @@
 	lwz	r3, 8(r0)
 #endif
 	rfi
+2:
+	mfspr	r11, SPRN_SRR1
+	/* clear all error bits as TLB Miss
+	 * sets a few unconditionally
+	*/
+	rlwinm	r11, r11, 0, 0xffff
+	mtspr	SPRN_SRR1, r11
+
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	InstructionAccess
 
 	. = 0x1200
 DataStoreTLBMiss:
@@ -406,29 +422,45 @@
 	 * above.
 	 */
 	rlwimi	r11, r10, 0, 27, 27
+	/* Insert the WriteThru flag into the TWC from the Linux PTE.
+	 * It is bit 25 in the Linux PTE and bit 30 in the TWC
+	 */
+	rlwimi	r11, r10, 32-5, 30, 30
 	DO_8xx_CPU6(0x3b80, r3)
 	mtspr	SPRN_MD_TWC, r11
 
-#ifdef CONFIG_SWAP
-	/* do not set the _PAGE_ACCESSED bit of a non-present page */
-	andi.	r11, r10, _PAGE_PRESENT
-	beq	4f
-	ori	r10, r10, _PAGE_ACCESSED
-4:
-	/* and update pte in table */
-#else
-	ori	r10, r10, _PAGE_ACCESSED
-#endif
-	mfspr	r11, SPRN_MD_TWC	/* get the pte address again */
-	stw	r10, 0(r11)
+	/* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
+	 * We also need to know if the insn is a load/store, so:
+	 * Clear _PAGE_PRESENT and load that which will
+	 * trap into DTLB Error with store bit set accordinly.
+	 */
+	/* PRESENT=0x1, ACCESSED=0x20
+	 * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
+	 * r10 = (r10 & ~PRESENT) | r11;
+	 */
+	rlwinm	r11, r10, 32-5, _PAGE_PRESENT
+	and	r11, r11, r10
+	rlwimi	r10, r11, 0, _PAGE_PRESENT
+
+	/* Honour kernel RO, User NA */
+	/* 0x200 == Extended encoding, bit 22 */
+	/* r11 =  (r10 & _PAGE_USER) >> 2 */
+	rlwinm	r11, r10, 32-2, 0x200
+	or	r10, r11, r10
+	/* r11 =  (r10 & _PAGE_RW) >> 1 */
+	rlwinm	r11, r10, 32-1, 0x200
+	or	r10, r11, r10
+	/* invert RW and 0x200 bits */
+	xori	r10, r10, _PAGE_RW | 0x200
 
 	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 21, 22 and 28 must be clear.
+	 * Software indicator bits 22 and 28 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
 2:	li	r11, 0x00f0
+	mtspr	SPRN_DAR,r11	/* Tag DAR */
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
@@ -469,97 +501,10 @@
 	stw	r10, 0(r0)
 	stw	r11, 4(r0)
 
-	/* First, make sure this was a store operation.
-	*/
-	mfspr	r10, SPRN_DSISR
-	andis.	r11, r10, 0x0200	/* If set, indicates store op */
-	beq	2f
-
-	/* The EA of a data TLB miss is automatically stored in the MD_EPN
-	 * register.  The EA of a data TLB error is automatically stored in
-	 * the DAR, but not the MD_EPN register.  We must copy the 20 most
-	 * significant bits of the EA from the DAR to MD_EPN before we
-	 * start walking the page tables.  We also need to copy the CASID
-	 * value from the M_CASID register.
-	 * Addendum:  The EA of a data TLB error is _supposed_ to be stored
-	 * in DAR, but it seems that this doesn't happen in some cases, such
-	 * as when the error is due to a dcbi instruction to a page with a
-	 * TLB that doesn't have the changed bit set.  In such cases, there
-	 * does not appear to be any way  to recover the EA of the error
-	 * since it is neither in DAR nor MD_EPN.  As a workaround, the
-	 * _PAGE_HWWRITE bit is set for all kernel data pages when the PTEs
-	 * are initialized in mapin_ram().  This will avoid the problem,
-	 * assuming we only use the dcbi instruction on kernel addresses.
-	 */
 	mfspr	r10, SPRN_DAR
-	rlwinm	r11, r10, 0, 0, 19
-	ori	r11, r11, MD_EVALID
-	mfspr	r10, SPRN_M_CASID
-	rlwimi	r11, r10, 0, 28, 31
-	DO_8xx_CPU6(0x3780, r3)
-	mtspr	SPRN_MD_EPN, r11
-
-	mfspr	r10, SPRN_M_TWB	/* Get level 1 table entry address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	andi.	r11, r10, 0x0800
-	beq	3f
-	lis	r11, swapper_pg_dir@h
-	ori	r11, r11, swapper_pg_dir@l
-	rlwimi	r10, r11, 0, 2, 19
-3:
-	lwz	r11, 0(r10)	/* Get the level 1 entry */
-	rlwinm.	r10, r11,0,0,19	/* Extract page descriptor page address */
-	beq	2f		/* If zero, bail */
-
-	/* We have a pte table, so fetch the pte from the table.
-	 */
-	ori	r11, r11, 1		/* Set valid bit in physical L2 page */
-	DO_8xx_CPU6(0x3b80, r3)
-	mtspr	SPRN_MD_TWC, r11		/* Load pte table base address */
-	mfspr	r11, SPRN_MD_TWC		/* ....and get the pte address */
-	lwz	r10, 0(r11)		/* Get the pte */
-
-	andi.	r11, r10, _PAGE_RW	/* Is it writeable? */
-	beq	2f			/* Bail out if not */
-
-	/* Update 'changed', among others.
-	*/
-#ifdef CONFIG_SWAP
-	ori	r10, r10, _PAGE_DIRTY|_PAGE_HWWRITE
-	/* do not set the _PAGE_ACCESSED bit of a non-present page */
-	andi.	r11, r10, _PAGE_PRESENT
-	beq	4f
-	ori	r10, r10, _PAGE_ACCESSED
-4:
-#else
-	ori	r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
-#endif
-	mfspr	r11, SPRN_MD_TWC		/* Get pte address again */
-	stw	r10, 0(r11)		/* and update pte in table */
-
-	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 21, 22 and 28 must be clear.
-	 * Software indicator bits 24, 25, 26, and 27 must be
-	 * set.  All other Linux PTE bits control the behavior
-	 * of the MMU.
-	 */
-	li	r11, 0x00f0
-	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
-	DO_8xx_CPU6(0x3d80, r3)
-	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
-
-	mfspr	r10, SPRN_M_TW	/* Restore registers */
-	lwz	r11, 0(r0)
-	mtcr	r11
-	lwz	r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
-	lwz	r3, 8(r0)
-#endif
-	rfi
-2:
+	cmpwi	cr0, r10, 0x00f0
+	beq-	FixupDAR	/* must be a buggy dcbX, icbi insn. */
+DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */
 	mfspr	r10, SPRN_M_TW	/* Restore registers */
 	lwz	r11, 0(r0)
 	mtcr	r11
@@ -588,6 +533,140 @@
 
 	. = 0x2000
 
+/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
+ * by decoding the registers used by the dcbx instruction and adding them.
+ * DAR is set to the calculated address and r10 also holds the EA on exit.
+ */
+ /* define if you don't want to use self modifying code */
+#define NO_SELF_MODIFYING_CODE
+FixupDAR:/* Entry point for dcbx workaround. */
+	/* fetch instruction from memory. */
+	mfspr	r10, SPRN_SRR0
+	DO_8xx_CPU6(0x3780, r3)
+	mtspr	SPRN_MD_EPN, r10
+	mfspr	r11, SPRN_M_TWB	/* Get level 1 table entry address */
+	cmplwi	cr0, r11, 0x0800
+	blt-	3f		/* Branch if user space */
+	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@h
+	ori	r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
+	rlwimi	r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */
+3:	lwz	r11, 0(r11)	/* Get the level 1 entry */
+	DO_8xx_CPU6(0x3b80, r3)
+	mtspr	SPRN_MD_TWC, r11	/* Load pte table base address */
+	mfspr	r11, SPRN_MD_TWC	/* ....and get the pte address */
+	lwz	r11, 0(r11)	/* Get the pte */
+	/* concat physical page address(r11) and page offset(r10) */
+	rlwimi	r11, r10, 0, 20, 31
+	lwz	r11,0(r11)
+/* Check if it really is a dcbx instruction. */
+/* dcbt and dcbtst does not generate DTLB Misses/Errors,
+ * no need to include them here */
+	srwi	r10, r11, 26	/* check if major OP code is 31 */
+	cmpwi	cr0, r10, 31
+	bne-	141f
+	rlwinm	r10, r11, 0, 21, 30
+	cmpwi	cr0, r10, 2028	/* Is dcbz? */
+	beq+	142f
+	cmpwi	cr0, r10, 940	/* Is dcbi? */
+	beq+	142f
+	cmpwi	cr0, r10, 108	/* Is dcbst? */
+	beq+	144f		/* Fix up store bit! */
+	cmpwi	cr0, r10, 172	/* Is dcbf? */
+	beq+	142f
+	cmpwi	cr0, r10, 1964	/* Is icbi? */
+	beq+	142f
+141:	mfspr	r10, SPRN_DAR	/* r10 must hold DAR at exit */
+	b	DARFixed	/* Nope, go back to normal TLB processing */
+
+144:	mfspr	r10, SPRN_DSISR
+	rlwinm	r10, r10,0,7,5	/* Clear store bit for buggy dcbst insn */
+	mtspr	SPRN_DSISR, r10
+142:	/* continue, it was a dcbx, dcbi instruction. */
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)	/* restore r3 from memory */
+#endif
+#ifndef NO_SELF_MODIFYING_CODE
+	andis.	r10,r11,0x1f	/* test if reg RA is r0 */
+	li	r10,modified_instr@l
+	dcbtst	r0,r10		/* touch for store */
+	rlwinm	r11,r11,0,0,20	/* Zero lower 10 bits */
+	oris	r11,r11,640	/* Transform instr. to a "add r10,RA,RB" */
+	ori	r11,r11,532
+	stw	r11,0(r10)	/* store add/and instruction */
+	dcbf	0,r10		/* flush new instr. to memory. */
+	icbi	0,r10		/* invalidate instr. cache line */
+	lwz	r11, 4(r0)	/* restore r11 from memory */
+	mfspr	r10, SPRN_M_TW	/* restore r10 from M_TW */
+	isync			/* Wait until new instr is loaded from memory */
+modified_instr:
+	.space	4		/* this is where the add instr. is stored */
+	bne+	143f
+	subf	r10,r0,r10	/* r10=r10-r0, only if reg RA is r0 */
+143:	mtdar	r10		/* store faulting EA in DAR */
+	b	DARFixed	/* Go back to normal TLB handling */
+#else
+	mfctr	r10
+	mtdar	r10			/* save ctr reg in DAR */
+	rlwinm	r10, r11, 24, 24, 28	/* offset into jump table for reg RB */
+	addi	r10, r10, 150f@l	/* add start of table */
+	mtctr	r10			/* load ctr with jump address */
+	xor	r10, r10, r10		/* sum starts at zero */
+	bctr				/* jump into table */
+150:
+	add	r10, r10, r0	;b	151f
+	add	r10, r10, r1	;b	151f
+	add	r10, r10, r2	;b	151f
+	add	r10, r10, r3	;b	151f
+	add	r10, r10, r4	;b	151f
+	add	r10, r10, r5	;b	151f
+	add	r10, r10, r6	;b	151f
+	add	r10, r10, r7	;b	151f
+	add	r10, r10, r8	;b	151f
+	add	r10, r10, r9	;b	151f
+	mtctr	r11	;b	154f	/* r10 needs special handling */
+	mtctr	r11	;b	153f	/* r11 needs special handling */
+	add	r10, r10, r12	;b	151f
+	add	r10, r10, r13	;b	151f
+	add	r10, r10, r14	;b	151f
+	add	r10, r10, r15	;b	151f
+	add	r10, r10, r16	;b	151f
+	add	r10, r10, r17	;b	151f
+	add	r10, r10, r18	;b	151f
+	add	r10, r10, r19	;b	151f
+	add	r10, r10, r20	;b	151f
+	add	r10, r10, r21	;b	151f
+	add	r10, r10, r22	;b	151f
+	add	r10, r10, r23	;b	151f
+	add	r10, r10, r24	;b	151f
+	add	r10, r10, r25	;b	151f
+	add	r10, r10, r26	;b	151f
+	add	r10, r10, r27	;b	151f
+	add	r10, r10, r28	;b	151f
+	add	r10, r10, r29	;b	151f
+	add	r10, r10, r30	;b	151f
+	add	r10, r10, r31
+151:
+	rlwinm. r11,r11,19,24,28	/* offset into jump table for reg RA */
+	beq	152f			/* if reg RA is zero, don't add it */
+	addi	r11, r11, 150b@l	/* add start of table */
+	mtctr	r11			/* load ctr with jump address */
+	rlwinm	r11,r11,0,16,10		/* make sure we don't execute this more than once */
+	bctr				/* jump into table */
+152:
+	mfdar	r11
+	mtctr	r11			/* restore ctr reg from DAR */
+	mtdar	r10			/* save fault EA to DAR */
+	b	DARFixed		/* Go back to normal TLB handling */
+
+	/* special handling for r10,r11 since these are modified already */
+153:	lwz	r11, 4(r0)	/* load r11 from memory */
+	b	155f
+154:	mfspr	r11, SPRN_M_TW	/* load r10 from M_TW */
+155:	add	r10, r10, r11	/* add it */
+	mfctr	r11		/* restore r11 */
+	b	151b
+#endif
+
 	.globl	giveup_fpu
 giveup_fpu:
 	blr
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 975788c..7f4bd7f 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -944,28 +944,6 @@
 	blr
 
 /*
- * extern void loadcam_entry(unsigned int index)
- *
- * Load TLBCAM[index] entry in to the L2 CAM MMU
- */
-_GLOBAL(loadcam_entry)
-	lis	r4,TLBCAM@ha
-	addi	r4,r4,TLBCAM@l
-	mulli	r5,r3,TLBCAM_SIZE
-	add	r3,r5,r4
-	lwz	r4,0(r3)
-	mtspr	SPRN_MAS0,r4
-	lwz	r4,4(r3)
-	mtspr	SPRN_MAS1,r4
-	lwz	r4,8(r3)
-	mtspr	SPRN_MAS2,r4
-	lwz	r4,12(r3)
-	mtspr	SPRN_MAS3,r4
-	tlbwe
-	isync
-	blr
-
-/*
  * extern void giveup_altivec(struct task_struct *prev)
  *
  * The e500 core does not have an AltiVec unit.
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index 1882bf4..8dc7547 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -161,7 +161,7 @@
 		dest++;
 		n--;
 	}
-	while(n > 4) {
+	while(n >= 4) {
 		*((u32 *)dest) = *((volatile u32 *)vsrc);
 		eieio();
 		vsrc += 4;
@@ -190,7 +190,7 @@
 		vdest++;
 		n--;
 	}
-	while(n > 4) {
+	while(n >= 4) {
 		*((volatile u32 *)vdest) = *((volatile u32 *)src);
 		src += 4;
 		vdest += 4;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 02a3346..f6dca4f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -87,7 +87,10 @@
 #endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PPC64
+
+#ifndef CONFIG_SPARSE_IRQ
 EXPORT_SYMBOL(irq_desc);
+#endif
 
 int distribute_irqs = 1;
 
@@ -189,33 +192,7 @@
 		for_each_online_cpu(j)
 			seq_printf(p, "CPU%d       ", j);
 		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		desc = get_irq_desc(i);
-		spin_lock_irqsave(&desc->lock, flags);
-		action = desc->action;
-		if (!action || !action->handler)
-			goto skip;
-		seq_printf(p, "%3d: ", i);
-#ifdef CONFIG_SMP
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#else
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#endif /* CONFIG_SMP */
-		if (desc->chip)
-			seq_printf(p, " %s ", desc->chip->typename);
-		else
-			seq_puts(p, "  None      ");
-		seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge  ");
-		seq_printf(p, "    %s", action->name);
-		for (action = action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-		seq_putc(p, '\n');
-skip:
-		spin_unlock_irqrestore(&desc->lock, flags);
-	} else if (i == NR_IRQS) {
+	} else if (i == nr_irqs) {
 #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT)
 		if (tau_initialized){
 			seq_puts(p, "TAU: ");
@@ -225,30 +202,68 @@
 		}
 #endif /* CONFIG_PPC32 && CONFIG_TAU_INT*/
 		seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
+
+		return 0;
 	}
+
+	desc = irq_to_desc(i);
+	if (!desc)
+		return 0;
+
+	spin_lock_irqsave(&desc->lock, flags);
+
+	action = desc->action;
+	if (!action || !action->handler)
+		goto skip;
+
+	seq_printf(p, "%3d: ", i);
+#ifdef CONFIG_SMP
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+#else
+	seq_printf(p, "%10u ", kstat_irqs(i));
+#endif /* CONFIG_SMP */
+
+	if (desc->chip)
+		seq_printf(p, " %s ", desc->chip->name);
+	else
+		seq_puts(p, "  None      ");
+
+	seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge  ");
+	seq_printf(p, "    %s", action->name);
+
+	for (action = action->next; action; action = action->next)
+		seq_printf(p, ", %s", action->name);
+	seq_putc(p, '\n');
+
+skip:
+	spin_unlock_irqrestore(&desc->lock, flags);
+
 	return 0;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 void fixup_irqs(cpumask_t map)
 {
+	struct irq_desc *desc;
 	unsigned int irq;
 	static int warned;
 
 	for_each_irq(irq) {
 		cpumask_t mask;
 
-		if (irq_desc[irq].status & IRQ_PER_CPU)
+		desc = irq_to_desc(irq);
+		if (desc && desc->status & IRQ_PER_CPU)
 			continue;
 
-		cpumask_and(&mask, irq_desc[irq].affinity, &map);
+		cpumask_and(&mask, desc->affinity, &map);
 		if (any_online_cpu(mask) == NR_CPUS) {
 			printk("Breaking affinity for irq %i\n", irq);
 			mask = map;
 		}
-		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, &mask);
-		else if (irq_desc[irq].action && !(warned++))
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, &mask);
+		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
 
@@ -275,7 +290,7 @@
 		return;
 	}
 
-	desc = irq_desc + irq;
+	desc = irq_to_desc(irq);
 	saved_sp_limit = current->thread.ksp_limit;
 
 	irqtp->task = curtp->task;
@@ -541,7 +556,7 @@
 			smp_wmb();
 
 			/* Clear norequest flags */
-			get_irq_desc(i)->status &= ~IRQ_NOREQUEST;
+			irq_to_desc(i)->status &= ~IRQ_NOREQUEST;
 
 			/* Legacy flags are left to default at this point,
 			 * one can then use irq_create_mapping() to
@@ -607,8 +622,16 @@
 static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 			    irq_hw_number_t hwirq)
 {
+	struct irq_desc *desc;
+
+	desc = irq_to_desc_alloc_node(virq, 0);
+	if (!desc) {
+		pr_debug("irq: -> allocating desc failed\n");
+		goto error;
+	}
+
 	/* Clear IRQ_NOREQUEST flag */
-	get_irq_desc(virq)->status &= ~IRQ_NOREQUEST;
+	desc->status &= ~IRQ_NOREQUEST;
 
 	/* map it */
 	smp_wmb();
@@ -617,11 +640,14 @@
 
 	if (host->ops->map(host, virq, hwirq)) {
 		pr_debug("irq: -> mapping failed, freeing\n");
-		irq_free_virt(virq, 1);
-		return -1;
+		goto error;
 	}
 
 	return 0;
+
+error:
+	irq_free_virt(virq, 1);
+	return -1;
 }
 
 unsigned int irq_create_direct_mapping(struct irq_host *host)
@@ -705,7 +731,7 @@
 EXPORT_SYMBOL_GPL(irq_create_mapping);
 
 unsigned int irq_create_of_mapping(struct device_node *controller,
-				   u32 *intspec, unsigned int intsize)
+				   const u32 *intspec, unsigned int intsize)
 {
 	struct irq_host *host;
 	irq_hw_number_t hwirq;
@@ -738,7 +764,7 @@
 
 	/* Set type if specified and different than the current one */
 	if (type != IRQ_TYPE_NONE &&
-	    type != (get_irq_desc(virq)->status & IRQF_TRIGGER_MASK))
+	    type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
 		set_irq_type(virq, type);
 	return virq;
 }
@@ -810,7 +836,7 @@
 	irq_map[virq].hwirq = host->inval_irq;
 
 	/* Set some flags */
-	get_irq_desc(virq)->status |= IRQ_NOREQUEST;
+	irq_to_desc(virq)->status |= IRQ_NOREQUEST;
 
 	/* Free it */
 	irq_free_virt(virq, 1);
@@ -1002,12 +1028,24 @@
 	spin_unlock_irqrestore(&irq_big_lock, flags);
 }
 
-void irq_early_init(void)
+int arch_early_irq_init(void)
 {
-	unsigned int i;
+	struct irq_desc *desc;
+	int i;
 
-	for (i = 0; i < NR_IRQS; i++)
-		get_irq_desc(i)->status |= IRQ_NOREQUEST;
+	for (i = 0; i < NR_IRQS; i++) {
+		desc = irq_to_desc(i);
+		if (desc)
+			desc->status |= IRQ_NOREQUEST;
+	}
+
+	return 0;
+}
+
+int arch_init_chip_data(struct irq_desc *desc, int node)
+{
+	desc->status |= IRQ_NOREQUEST;
+	return 0;
 }
 
 /* We need to create the radix trees late */
@@ -1069,16 +1107,19 @@
 	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
 		      "chip name", "host name");
 
-	for (i = 1; i < NR_IRQS; i++) {
-		desc = get_irq_desc(i);
+	for (i = 1; i < nr_irqs; i++) {
+		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
+
 		spin_lock_irqsave(&desc->lock, flags);
 
 		if (desc->action && desc->action->handler) {
 			seq_printf(m, "%5d  ", i);
 			seq_printf(m, "0x%05lx  ", virq_to_hw(i));
 
-			if (desc->chip && desc->chip->typename)
-				p = desc->chip->typename;
+			if (desc->chip && desc->chip->name)
+				p = desc->chip->name;
 			else
 				p = none;
 			seq_printf(m, "%-15s  ", p);
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index ed0ac4e..79a00bb 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -781,9 +781,9 @@
 			!firmware_has_feature(FW_FEATURE_ISERIES))
 		mode |= S_IWUSR;
 
-	ent = proc_create("ppc64/lparcfg", mode, NULL, &lparcfg_fops);
+	ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops);
 	if (!ent) {
-		printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
+		printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
 		return -EIO;
 	}
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index da9c0c4..8649f53 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -502,15 +502,7 @@
 	li	r0,PAGE_SIZE/L1_CACHE_BYTES
 	slw	r0,r0,r4
 	mtctr	r0
-#ifdef CONFIG_8xx
-	li	r4, 0
-1:	stw	r4, 0(r3)
-	stw	r4, 4(r3)
-	stw	r4, 8(r3)
-	stw	r4, 12(r3)
-#else
 1:	dcbz	0,r3
-#endif
 	addi	r3,r3,L1_CACHE_BYTES
 	bdnz	1b
 	blr
@@ -535,15 +527,6 @@
 	addi	r3,r3,-4
 	addi	r4,r4,-4
 
-#ifdef CONFIG_8xx
-	/* don't use prefetch on 8xx */
-    	li	r0,4096/L1_CACHE_BYTES
-	mtctr	r0
-1:	COPY_16_BYTES
-	bdnz	1b
-	blr
-
-#else	/* not 8xx, we can prefetch */
 	li	r5,4
 
 #if MAX_COPY_PREFETCH > 1
@@ -584,7 +567,6 @@
 	li	r0,MAX_COPY_PREFETCH
 	li	r11,4
 	b	2b
-#endif	/* CONFIG_8xx */
 
 /*
  * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 0ed31f2..ad461e7 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -139,8 +139,8 @@
 
 }
 
-static int dev_nvram_ioctl(struct inode *inode, struct file *file,
-	unsigned int cmd, unsigned long arg)
+static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
 {
 	switch(cmd) {
 #ifdef CONFIG_PPC_PMAC
@@ -169,11 +169,11 @@
 }
 
 const struct file_operations nvram_fops = {
-	.owner =	THIS_MODULE,
-	.llseek =	dev_nvram_llseek,
-	.read =		dev_nvram_read,
-	.write =	dev_nvram_write,
-	.ioctl =	dev_nvram_ioctl,
+	.owner		= THIS_MODULE,
+	.llseek		= dev_nvram_llseek,
+	.read		= dev_nvram_read,
+	.write		= dev_nvram_write,
+	.unlocked_ioctl	= dev_nvram_ioctl,
 };
 
 static struct miscdevice nvram_dev = {
@@ -184,7 +184,7 @@
 
 
 #ifdef DEBUG_NVRAM
-static void nvram_print_partitions(char * label)
+static void __init nvram_print_partitions(char * label)
 {
 	struct list_head * p;
 	struct nvram_partition * tmp_part;
@@ -202,7 +202,7 @@
 #endif
 
 
-static int nvram_write_header(struct nvram_partition * part)
+static int __init nvram_write_header(struct nvram_partition * part)
 {
 	loff_t tmp_index;
 	int rc;
@@ -214,7 +214,7 @@
 }
 
 
-static unsigned char nvram_checksum(struct nvram_header *p)
+static unsigned char __init nvram_checksum(struct nvram_header *p)
 {
 	unsigned int c_sum, c_sum2;
 	unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
@@ -228,32 +228,7 @@
 	return c_sum;
 }
 
-
-/*
- * Find an nvram partition, sig can be 0 for any
- * partition or name can be NULL for any name, else
- * tries to match both
- */
-struct nvram_partition *nvram_find_partition(int sig, const char *name)
-{
-	struct nvram_partition * part;
-	struct list_head * p;
-
-	list_for_each(p, &nvram_part->partition) {
-		part = list_entry(p, struct nvram_partition, partition);
-
-		if (sig && part->header.signature != sig)
-			continue;
-		if (name && 0 != strncmp(name, part->header.name, 12))
-			continue;
-		return part; 
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(nvram_find_partition);
-
-
-static int nvram_remove_os_partition(void)
+static int __init nvram_remove_os_partition(void)
 {
 	struct list_head *i;
 	struct list_head *j;
@@ -319,7 +294,7 @@
  * Will create a partition starting at the first free
  * space found if space has enough room.
  */
-static int nvram_create_os_partition(void)
+static int __init nvram_create_os_partition(void)
 {
 	struct nvram_partition *part;
 	struct nvram_partition *new_part;
@@ -422,7 +397,7 @@
  * 5.) If the max chunk cannot be allocated then try finding a chunk
  * that will satisfy the minum needed (NVRAM_MIN_REQ).
  */
-static int nvram_setup_partition(void)
+static int __init nvram_setup_partition(void)
 {
 	struct list_head * p;
 	struct nvram_partition * part;
@@ -480,7 +455,7 @@
 }
 
 
-static int nvram_scan_partitions(void)
+static int __init nvram_scan_partitions(void)
 {
 	loff_t cur_index = 0;
 	struct nvram_header phead;
@@ -706,6 +681,9 @@
 	int clear_word = ERR_FLAG_ALREADY_LOGGED;
 	int rc;
 
+	if (nvram_error_log_index == -1)
+		return -1;
+
 	tmp_index = nvram_error_log_index;
 	
 	rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index 0a03cf7..936f04d 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -119,13 +119,6 @@
 }
 
 #ifdef CONFIG_PPC64
-
-#ifdef CONFIG_HUGETLB_PAGE
-#define is_huge_psize(pagesize)	(HPAGE_SHIFT && mmu_huge_psizes[pagesize])
-#else
-#define is_huge_psize(pagesize)	0
-#endif
-
 /*
  * On 64-bit we don't want to invoke hash_page on user addresses from
  * interrupt context, so if the access faults, we read the page tables
@@ -135,7 +128,7 @@
 {
 	pgd_t *pgdir;
 	pte_t *ptep, pte;
-	int pagesize;
+	unsigned shift;
 	unsigned long addr = (unsigned long) ptr;
 	unsigned long offset;
 	unsigned long pfn;
@@ -145,17 +138,14 @@
 	if (!pgdir)
 		return -EFAULT;
 
-	pagesize = get_slice_psize(current->mm, addr);
+	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+	if (!shift)
+		shift = PAGE_SHIFT;
 
 	/* align address to page boundary */
-	offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1);
+	offset = addr & ((1UL << shift) - 1);
 	addr -= offset;
 
-	if (is_huge_psize(pagesize))
-		ptep = huge_pte_offset(current->mm, addr);
-	else
-		ptep = find_linux_pte(pgdir, addr);
-
 	if (ptep == NULL)
 		return -EFAULT;
 	pte = *ptep;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index c8b27bb..4254514 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -96,8 +96,6 @@
 EXPORT_SYMBOL(isa_io_base);
 EXPORT_SYMBOL(isa_mem_base);
 EXPORT_SYMBOL(pci_dram_offset);
-EXPORT_SYMBOL(pci_alloc_consistent);
-EXPORT_SYMBOL(pci_free_consistent);
 #endif /* CONFIG_PCI */
 
 EXPORT_SYMBOL(start_thread);
@@ -162,7 +160,6 @@
 
 #ifdef CONFIG_PPC32
 EXPORT_SYMBOL(timer_interrupt);
-EXPORT_SYMBOL(irq_desc);
 EXPORT_SYMBOL(tb_ticks_per_jiffy);
 EXPORT_SYMBOL(cacheable_memcpy);
 EXPORT_SYMBOL(cacheable_memzero);
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_powerpc.c
similarity index 86%
rename from arch/powerpc/kernel/proc_ppc64.c
rename to arch/powerpc/kernel/proc_powerpc.c
index c647dde..1ed3b8d 100644
--- a/arch/powerpc/kernel/proc_ppc64.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -28,55 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/prom.h>
 
-static loff_t  page_map_seek( struct file *file, loff_t off, int whence);
-static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
-			      loff_t *ppos);
-static int     page_map_mmap( struct file *file, struct vm_area_struct *vma );
-
-static const struct file_operations page_map_fops = {
-	.llseek	= page_map_seek,
-	.read	= page_map_read,
-	.mmap	= page_map_mmap
-};
-
-/*
- * Create the ppc64 and ppc64/rtas directories early. This allows us to
- * assume that they have been previously created in drivers.
- */
-static int __init proc_ppc64_create(void)
-{
-	struct proc_dir_entry *root;
-
-	root = proc_mkdir("ppc64", NULL);
-	if (!root)
-		return 1;
-
-	if (!of_find_node_by_path("/rtas"))
-		return 0;
-
-	if (!proc_mkdir("rtas", root))
-		return 1;
-
-	if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
-		return 1;
-
-	return 0;
-}
-core_initcall(proc_ppc64_create);
-
-static int __init proc_ppc64_init(void)
-{
-	struct proc_dir_entry *pde;
-
-	pde = proc_create_data("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL,
-			       &page_map_fops, vdso_data);
-	if (!pde)
-		return 1;
-	pde->size = PAGE_SIZE;
-
-	return 0;
-}
-__initcall(proc_ppc64_init);
+#ifdef CONFIG_PPC64
 
 static loff_t page_map_seek( struct file *file, loff_t off, int whence)
 {
@@ -120,3 +72,55 @@
 	return 0;
 }
 
+static const struct file_operations page_map_fops = {
+	.llseek	= page_map_seek,
+	.read	= page_map_read,
+	.mmap	= page_map_mmap
+};
+
+
+static int __init proc_ppc64_init(void)
+{
+	struct proc_dir_entry *pde;
+
+	pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL,
+			       &page_map_fops, vdso_data);
+	if (!pde)
+		return 1;
+	pde->size = PAGE_SIZE;
+
+	return 0;
+}
+__initcall(proc_ppc64_init);
+
+#endif /* CONFIG_PPC64 */
+
+/*
+ * Create the ppc64 and ppc64/rtas directories early. This allows us to
+ * assume that they have been previously created in drivers.
+ */
+static int __init proc_ppc64_create(void)
+{
+	struct proc_dir_entry *root;
+
+	root = proc_mkdir("powerpc", NULL);
+	if (!root)
+		return 1;
+
+#ifdef CONFIG_PPC64
+	if (!proc_symlink("ppc64", NULL, "powerpc"))
+		pr_err("Failed to create link /proc/ppc64 -> /proc/powerpc\n");
+#endif
+
+	if (!of_find_node_by_path("/rtas"))
+		return 0;
+
+	if (!proc_mkdir("rtas", root))
+		return 1;
+
+	if (!proc_symlink("rtas", NULL, "powerpc/rtas"))
+		return 1;
+
+	return 0;
+}
+core_initcall(proc_ppc64_create);
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 13011a9..a85117d 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -6,7 +6,7 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  *
- * /proc/ppc64/rtas/firmware_flash interface
+ * /proc/powerpc/rtas/firmware_flash interface
  *
  * This file implements a firmware_flash interface to pump a firmware
  * image into the kernel.  At reboot time rtas_restart() will see the
@@ -740,7 +740,7 @@
 		return 1;
 	}
 
-	firmware_flash_pde = create_flash_pde("ppc64/rtas/"
+	firmware_flash_pde = create_flash_pde("powerpc/rtas/"
 					      FIRMWARE_FLASH_NAME,
 					      &rtas_flash_operations);
 	if (firmware_flash_pde == NULL) {
@@ -754,7 +754,7 @@
 	if (rc != 0)
 		goto cleanup;
 
-	firmware_update_pde = create_flash_pde("ppc64/rtas/"
+	firmware_update_pde = create_flash_pde("powerpc/rtas/"
 					       FIRMWARE_UPDATE_NAME,
 					       &rtas_flash_operations);
 	if (firmware_update_pde == NULL) {
@@ -768,7 +768,7 @@
 	if (rc != 0)
 		goto cleanup;
 
-	validate_pde = create_flash_pde("ppc64/rtas/" VALIDATE_FLASH_NAME,
+	validate_pde = create_flash_pde("powerpc/rtas/" VALIDATE_FLASH_NAME,
 			      		&validate_flash_operations);
 	if (validate_pde == NULL) {
 		rc = -ENOMEM;
@@ -781,7 +781,7 @@
 	if (rc != 0)
 		goto cleanup;
 
-	manage_pde = create_flash_pde("ppc64/rtas/" MANAGE_FLASH_NAME,
+	manage_pde = create_flash_pde("powerpc/rtas/" MANAGE_FLASH_NAME,
 				      &manage_flash_operations);
 	if (manage_pde == NULL) {
 		rc = -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/kernel/rtasd.c
similarity index 95%
rename from arch/powerpc/platforms/pseries/rtasd.c
rename to arch/powerpc/kernel/rtasd.c
index b3cbac8..2e4832a 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -39,6 +39,7 @@
 static unsigned long rtas_log_size;
 
 static int surveillance_timeout = -1;
+
 static unsigned int rtas_error_log_max;
 static unsigned int rtas_error_log_buffer_max;
 
@@ -213,9 +214,11 @@
 		return;
 	}
 
+#ifdef CONFIG_PPC64
 	/* Write error to NVRAM */
 	if (logging_enabled && !(err_type & ERR_FLAG_BOOT))
 		nvram_write_error_log(buf, len, err_type, error_log_cnt);
+#endif /* CONFIG_PPC64 */
 
 	/*
 	 * rtas errors can occur during boot, and we do want to capture
@@ -264,7 +267,6 @@
 
 }
 
-
 static int rtas_log_open(struct inode * inode, struct file * file)
 {
 	return 0;
@@ -300,6 +302,7 @@
 		return -ENOMEM;
 
 	spin_lock_irqsave(&rtasd_log_lock, s);
+
 	/* if it's 0, then we know we got the last one (the one in NVRAM) */
 	while (rtas_log_size == 0) {
 		if (file->f_flags & O_NONBLOCK) {
@@ -313,7 +316,9 @@
 			error = -ENODATA;
 			goto out;
 		}
+#ifdef CONFIG_PPC64
 		nvram_clear_error_log();
+#endif /* CONFIG_PPC64 */
 
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
@@ -427,14 +432,11 @@
 	put_online_cpus();
 }
 
-static void start_event_scan(void)
+#ifdef CONFIG_PPC64
+static void retreive_nvram_error_log(void)
 {
-	unsigned int err_type;
-	int rc;
-
-	printk(KERN_DEBUG "RTAS daemon started\n");
-	pr_debug("rtasd: will sleep for %d milliseconds\n",
-		 (30000 / rtas_event_scan_rate));
+	unsigned int err_type ;
+	int rc ;
 
 	/* See if we have any error stored in NVRAM */
 	memset(logdata, 0, rtas_error_log_max);
@@ -442,12 +444,26 @@
 	                          &err_type, &error_log_cnt);
 	/* We can use rtas_log_buf now */
 	logging_enabled = 1;
-
 	if (!rc) {
 		if (err_type != ERR_FLAG_ALREADY_LOGGED) {
 			pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
 		}
 	}
+}
+#else /* CONFIG_PPC64 */
+static void retreive_nvram_error_log(void)
+{
+}
+#endif /* CONFIG_PPC64 */
+
+static void start_event_scan(void)
+{
+	printk(KERN_DEBUG "RTAS daemon started\n");
+	pr_debug("rtasd: will sleep for %d milliseconds\n",
+		 (30000 / rtas_event_scan_rate));
+
+	/* Retreive errors from nvram if any */
+	retreive_nvram_error_log();
 
 	schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work,
 				 event_scan_delay);
@@ -457,13 +473,13 @@
 {
 	struct proc_dir_entry *entry;
 
-	if (!machine_is(pseries))
+	if (!machine_is(pseries) && !machine_is(chrp))
 		return 0;
 
 	/* No RTAS */
 	event_scan = rtas_token("event-scan");
 	if (event_scan == RTAS_UNKNOWN_SERVICE) {
-		printk(KERN_DEBUG "rtasd: no event-scan on system\n");
+		printk(KERN_INFO "rtasd: No event-scan on system\n");
 		return -ENODEV;
 	}
 
@@ -483,7 +499,7 @@
 		return -ENOMEM;
 	}
 
-	entry = proc_create("ppc64/rtas/error_log", S_IRUSR, NULL,
+	entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
 			    &proc_rtas_log_operations);
 	if (!entry)
 		printk(KERN_ERR "Failed to create error_log proc entry\n");
@@ -492,11 +508,16 @@
 
 	return 0;
 }
+__initcall(rtas_init);
 
 static int __init surveillance_setup(char *str)
 {
 	int i;
 
+	/* We only do surveillance on pseries */
+	if (!machine_is(pseries))
+		return 0;
+
 	if (get_option(&str,&i)) {
 		if (i >= 0 && i <= 255)
 			surveillance_timeout = i;
@@ -504,6 +525,7 @@
 
 	return 1;
 }
+__setup("surveillance=", surveillance_setup);
 
 static int __init rtasmsgs_setup(char *str)
 {
@@ -514,6 +536,4 @@
 
 	return 1;
 }
-__initcall(rtas_init);
-__setup("surveillance=", surveillance_setup);
 __setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index df2c9e9..6568406 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -356,11 +356,6 @@
 	 */
 	initialize_cache_info();
 
-	/*
-	 * Initialize irq remapping subsystem
-	 */
-	irq_early_init();
-
 #ifdef CONFIG_PPC_RTAS
 	/*
 	 * Initialize RTAS if available
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9b86a74..97196ee 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -218,6 +218,9 @@
 
 static void stop_this_cpu(void *dummy)
 {
+	/* Remove this CPU */
+	set_cpu_online(smp_processor_id(), false);
+
 	local_irq_disable();
 	while (1)
 		;
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 956ab33..e235e52 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -461,6 +461,25 @@
 
 	cacheinfo_cpu_offline(cpu);
 }
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+	if (ppc_md.cpu_probe)
+		return ppc_md.cpu_probe(buf, count);
+
+	return -EINVAL;
+}
+
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+	if (ppc_md.cpu_release)
+		return ppc_md.cpu_release(buf, count);
+
+	return -EINVAL;
+}
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 674800b..9ba2cc8 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -269,6 +269,7 @@
 	per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(account_system_vtime);
 
 /*
  * Transfer the user and system times accumulated in the paca
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 9d1f935..804f0f3 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -198,28 +198,6 @@
 	info.si_code = code;
 	info.si_addr = (void __user *) addr;
 	force_sig_info(signr, &info, current);
-
-	/*
-	 * Init gets no signals that it doesn't have a handler for.
-	 * That's all very well, but if it has caused a synchronous
-	 * exception and we ignore the resulting signal, it will just
-	 * generate the same exception over and over again and we get
-	 * nowhere.  Better to kill it and let the kernel panic.
-	 */
-	if (is_global_init(current)) {
-		__sighandler_t handler;
-
-		spin_lock_irq(&current->sighand->siglock);
-		handler = current->sighand->action[signr-1].sa.sa_handler;
-		spin_unlock_irq(&current->sighand->siglock);
-		if (handler == SIG_DFL) {
-			/* init has generated a synchronous exception
-			   and it doesn't have a handler for the signal */
-			printk(KERN_CRIT "init has generated signal %d "
-			       "but has no handler for it\n", signr);
-			do_exit(signr);
-		}
-	}
 }
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 67b6916..fe46048 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -58,7 +58,7 @@
 	 * all 1's
 	 */
 	mfspr	r4,SPRN_VRSAVE
-	cmpdi	0,r4,0
+	cmpwi	0,r4,0
 	bne+	1f
 	li	r4,-1
 	mtspr	SPRN_VRSAVE,r4
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index c299268..07703f7 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -21,6 +21,23 @@
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 
+config KVM_BOOK3S_64_HANDLER
+	bool
+
+config KVM_BOOK3S_64
+	tristate "KVM support for PowerPC book3s_64 processors"
+	depends on EXPERIMENTAL && PPC64
+	select KVM
+	select KVM_BOOK3S_64_HANDLER
+	---help---
+	  Support running unmodified book3s_64 and book3s_32 guest kernels
+	  in virtual machines on book3s_64 host processors.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  If unsure, say N.
+
 config KVM_440
 	bool "KVM support for PowerPC 440 processors"
 	depends on EXPERIMENTAL && 44x
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 37655fe..56484d6 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -12,26 +12,45 @@
 CFLAGS_e500_tlb.o := -I.
 CFLAGS_emulate.o  := -I.
 
-kvm-objs := $(common-objs-y) powerpc.o emulate.o
+common-objs-y += powerpc.o emulate.o
 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
-obj-$(CONFIG_KVM) += kvm.o
+obj-$(CONFIG_KVM_BOOK3S_64_HANDLER) += book3s_64_exports.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
 kvm-440-objs := \
+	$(common-objs-y) \
 	booke.o \
 	booke_emulate.o \
 	booke_interrupts.o \
 	44x.o \
 	44x_tlb.o \
 	44x_emulate.o
-obj-$(CONFIG_KVM_440) += kvm-440.o
+kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs)
 
 kvm-e500-objs := \
+	$(common-objs-y) \
 	booke.o \
 	booke_emulate.o \
 	booke_interrupts.o \
 	e500.o \
 	e500_tlb.o \
 	e500_emulate.o
-obj-$(CONFIG_KVM_E500) += kvm-e500.o
+kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
+
+kvm-book3s_64-objs := \
+	$(common-objs-y) \
+	book3s.o \
+	book3s_64_emulate.o \
+	book3s_64_interrupts.o \
+	book3s_64_mmu_host.o \
+	book3s_64_mmu.o \
+	book3s_32_mmu.o
+kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs)
+
+kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
+
+obj-$(CONFIG_KVM_440) += kvm.o
+obj-$(CONFIG_KVM_E500) += kvm.o
+obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
+
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
new file mode 100644
index 0000000..3e294bd
--- /dev/null
+++ b/arch/powerpc/kvm/book3s.c
@@ -0,0 +1,974 @@
+/*
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *    Alexander Graf <agraf@suse.de>
+ *    Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu_context.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+/* #define EXIT_DEBUG */
+/* #define EXIT_DEBUG_SIMPLE */
+
+/* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0.
+ * When set, we retrigger a DEC interrupt after that if DEC <= 0.
+ * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */
+
+/* #define AGGRESSIVE_DEC */
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "exits",       VCPU_STAT(sum_exits) },
+	{ "mmio",        VCPU_STAT(mmio_exits) },
+	{ "sig",         VCPU_STAT(signal_exits) },
+	{ "sysc",        VCPU_STAT(syscall_exits) },
+	{ "inst_emu",    VCPU_STAT(emulated_inst_exits) },
+	{ "dec",         VCPU_STAT(dec_exits) },
+	{ "ext_intr",    VCPU_STAT(ext_intr_exits) },
+	{ "queue_intr",  VCPU_STAT(queue_intr) },
+	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+	{ "pf_storage",  VCPU_STAT(pf_storage) },
+	{ "sp_storage",  VCPU_STAT(sp_storage) },
+	{ "pf_instruc",  VCPU_STAT(pf_instruc) },
+	{ "sp_instruc",  VCPU_STAT(sp_instruc) },
+	{ "ld",          VCPU_STAT(ld) },
+	{ "ld_slow",     VCPU_STAT(ld_slow) },
+	{ "st",          VCPU_STAT(st) },
+	{ "st_slow",     VCPU_STAT(st_slow) },
+	{ NULL }
+};
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
+	get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
+	to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
+}
+
+#if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG)
+static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
+{
+	u64 jd = mftb() - vcpu->arch.dec_jiffies;
+	return vcpu->arch.dec - jd;
+}
+#endif
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+	ulong old_msr = vcpu->arch.msr;
+
+#ifdef EXIT_DEBUG
+	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
+#endif
+	msr &= to_book3s(vcpu)->msr_mask;
+	vcpu->arch.msr = msr;
+	vcpu->arch.shadow_msr = msr | MSR_USER32;
+	vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 |
+				   MSR_USER64 | MSR_SE | MSR_BE | MSR_DE |
+				   MSR_FE1);
+
+	if (msr & (MSR_WE|MSR_POW)) {
+		if (!vcpu->arch.pending_exceptions) {
+			kvm_vcpu_block(vcpu);
+			vcpu->stat.halt_wakeup++;
+		}
+	}
+
+	if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) ||
+	    (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) {
+		kvmppc_mmu_flush_segments(vcpu);
+		kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc);
+	}
+}
+
+void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
+{
+	vcpu->arch.srr0 = vcpu->arch.pc;
+	vcpu->arch.srr1 = vcpu->arch.msr | flags;
+	vcpu->arch.pc = to_book3s(vcpu)->hior + vec;
+	vcpu->arch.mmu.reset_msr(vcpu);
+}
+
+void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+{
+	unsigned int prio;
+
+	vcpu->stat.queue_intr++;
+	switch (vec) {
+	case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET;		break;
+	case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK;	break;
+	case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE;		break;
+	case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT;		break;
+	case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE;		break;
+	case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT;		break;
+	case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL;		break;
+	case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT;		break;
+	case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM;		break;
+	case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL;		break;
+	case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER;		break;
+	case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL;		break;
+	case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG;		break;
+	case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC;		break;
+	case 0xf40: prio = BOOK3S_IRQPRIO_VSX;			break;
+	default:    prio = BOOK3S_IRQPRIO_MAX;			break;
+	}
+
+	set_bit(prio, &vcpu->arch.pending_exceptions);
+#ifdef EXIT_DEBUG
+	printk(KERN_INFO "Queueing interrupt %x\n", vec);
+#endif
+}
+
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+{
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+	return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                struct kvm_interrupt *irq)
+{
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+}
+
+int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+	int deliver = 1;
+	int vec = 0;
+
+	switch (priority) {
+	case BOOK3S_IRQPRIO_DECREMENTER:
+		deliver = vcpu->arch.msr & MSR_EE;
+		vec = BOOK3S_INTERRUPT_DECREMENTER;
+		break;
+	case BOOK3S_IRQPRIO_EXTERNAL:
+		deliver = vcpu->arch.msr & MSR_EE;
+		vec = BOOK3S_INTERRUPT_EXTERNAL;
+		break;
+	case BOOK3S_IRQPRIO_SYSTEM_RESET:
+		vec = BOOK3S_INTERRUPT_SYSTEM_RESET;
+		break;
+	case BOOK3S_IRQPRIO_MACHINE_CHECK:
+		vec = BOOK3S_INTERRUPT_MACHINE_CHECK;
+		break;
+	case BOOK3S_IRQPRIO_DATA_STORAGE:
+		vec = BOOK3S_INTERRUPT_DATA_STORAGE;
+		break;
+	case BOOK3S_IRQPRIO_INST_STORAGE:
+		vec = BOOK3S_INTERRUPT_INST_STORAGE;
+		break;
+	case BOOK3S_IRQPRIO_DATA_SEGMENT:
+		vec = BOOK3S_INTERRUPT_DATA_SEGMENT;
+		break;
+	case BOOK3S_IRQPRIO_INST_SEGMENT:
+		vec = BOOK3S_INTERRUPT_INST_SEGMENT;
+		break;
+	case BOOK3S_IRQPRIO_ALIGNMENT:
+		vec = BOOK3S_INTERRUPT_ALIGNMENT;
+		break;
+	case BOOK3S_IRQPRIO_PROGRAM:
+		vec = BOOK3S_INTERRUPT_PROGRAM;
+		break;
+	case BOOK3S_IRQPRIO_VSX:
+		vec = BOOK3S_INTERRUPT_VSX;
+		break;
+	case BOOK3S_IRQPRIO_ALTIVEC:
+		vec = BOOK3S_INTERRUPT_ALTIVEC;
+		break;
+	case BOOK3S_IRQPRIO_FP_UNAVAIL:
+		vec = BOOK3S_INTERRUPT_FP_UNAVAIL;
+		break;
+	case BOOK3S_IRQPRIO_SYSCALL:
+		vec = BOOK3S_INTERRUPT_SYSCALL;
+		break;
+	case BOOK3S_IRQPRIO_DEBUG:
+		vec = BOOK3S_INTERRUPT_TRACE;
+		break;
+	case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
+		vec = BOOK3S_INTERRUPT_PERFMON;
+		break;
+	default:
+		deliver = 0;
+		printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
+		break;
+	}
+
+#if 0
+	printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
+#endif
+
+	if (deliver)
+		kvmppc_inject_interrupt(vcpu, vec, 0ULL);
+
+	return deliver;
+}
+
+void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+{
+	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned int priority;
+
+	/* XXX be more clever here - no need to mftb() on every entry */
+	/* Issue DEC again if it's still active */
+#ifdef AGGRESSIVE_DEC
+	if (vcpu->arch.msr & MSR_EE)
+		if (kvmppc_get_dec(vcpu) & 0x80000000)
+			kvmppc_core_queue_dec(vcpu);
+#endif
+
+#ifdef EXIT_DEBUG
+	if (vcpu->arch.pending_exceptions)
+		printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
+#endif
+	priority = __ffs(*pending);
+	while (priority <= (sizeof(unsigned int) * 8)) {
+		if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) {
+			clear_bit(priority, &vcpu->arch.pending_exceptions);
+			break;
+		}
+
+		priority = find_next_bit(pending,
+					 BITS_PER_BYTE * sizeof(*pending),
+					 priority + 1);
+	}
+}
+
+void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+{
+	vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
+	vcpu->arch.pvr = pvr;
+	if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
+		kvmppc_mmu_book3s_64_init(vcpu);
+		to_book3s(vcpu)->hior = 0xfff00000;
+		to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
+	} else {
+		kvmppc_mmu_book3s_32_init(vcpu);
+		to_book3s(vcpu)->hior = 0;
+		to_book3s(vcpu)->msr_mask = 0xffffffffULL;
+	}
+
+	/* If we are in hypervisor level on 970, we can tell the CPU to
+	 * treat DCBZ as 32 bytes store */
+	vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
+	if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
+	    !strcmp(cur_cpu_spec->platform, "ppc970"))
+		vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+
+}
+
+/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
+ * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
+ * emulate 32 bytes dcbz length.
+ *
+ * The Book3s_64 inventors also realized this case and implemented a special bit
+ * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
+ *
+ * My approach here is to patch the dcbz instruction on executing pages.
+ */
+static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+	bool touched = false;
+	hva_t hpage;
+	u32 *page;
+	int i;
+
+	hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+	if (kvm_is_error_hva(hpage))
+		return;
+
+	hpage |= pte->raddr & ~PAGE_MASK;
+	hpage &= ~0xFFFULL;
+
+	page = vmalloc(HW_PAGE_SIZE);
+
+	if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE))
+		goto out;
+
+	for (i=0; i < HW_PAGE_SIZE / 4; i++)
+		if ((page[i] & 0xff0007ff) == INS_DCBZ) {
+			page[i] &= 0xfffffff7; // reserved instruction, so we trap
+			touched = true;
+		}
+
+	if (touched)
+		copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE);
+
+out:
+	vfree(page);
+}
+
+static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
+			 struct kvmppc_pte *pte)
+{
+	int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
+	int r;
+
+	if (relocated) {
+		r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
+	} else {
+		pte->eaddr = eaddr;
+		pte->raddr = eaddr & 0xffffffff;
+		pte->vpage = eaddr >> 12;
+		switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+		case 0:
+			pte->vpage |= VSID_REAL;
+		case MSR_DR:
+			pte->vpage |= VSID_REAL_DR;
+		case MSR_IR:
+			pte->vpage |= VSID_REAL_IR;
+		}
+		pte->may_read = true;
+		pte->may_write = true;
+		pte->may_execute = true;
+		r = 0;
+	}
+
+	return r;
+}
+
+static hva_t kvmppc_bad_hva(void)
+{
+	return PAGE_OFFSET;
+}
+
+static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
+			       bool read)
+{
+	hva_t hpage;
+
+	if (read && !pte->may_read)
+		goto err;
+
+	if (!read && !pte->may_write)
+		goto err;
+
+	hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+	if (kvm_is_error_hva(hpage))
+		goto err;
+
+	return hpage | (pte->raddr & ~PAGE_MASK);
+err:
+	return kvmppc_bad_hva();
+}
+
+int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr)
+{
+	struct kvmppc_pte pte;
+	hva_t hva = eaddr;
+
+	vcpu->stat.st++;
+
+	if (kvmppc_xlate(vcpu, eaddr, false, &pte))
+		goto err;
+
+	hva = kvmppc_pte_to_hva(vcpu, &pte, false);
+	if (kvm_is_error_hva(hva))
+		goto err;
+
+	if (copy_to_user((void __user *)hva, ptr, size)) {
+		printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	return -ENOENT;
+}
+
+int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr,
+		      bool data)
+{
+	struct kvmppc_pte pte;
+	hva_t hva = eaddr;
+
+	vcpu->stat.ld++;
+
+	if (kvmppc_xlate(vcpu, eaddr, data, &pte))
+		goto err;
+
+	hva = kvmppc_pte_to_hva(vcpu, &pte, true);
+	if (kvm_is_error_hva(hva))
+		goto err;
+
+	if (copy_from_user(ptr, (void __user *)hva, size)) {
+		printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	return -ENOENT;
+}
+
+static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	return kvm_is_visible_gfn(vcpu->kvm, gfn);
+}
+
+int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+			    ulong eaddr, int vec)
+{
+	bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+	int r = RESUME_GUEST;
+	int relocated;
+	int page_found = 0;
+	struct kvmppc_pte pte;
+	bool is_mmio = false;
+
+	if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) {
+		relocated = (vcpu->arch.msr & MSR_DR);
+	} else {
+		relocated = (vcpu->arch.msr & MSR_IR);
+	}
+
+	/* Resolve real address if translation turned on */
+	if (relocated) {
+		page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
+	} else {
+		pte.may_execute = true;
+		pte.may_read = true;
+		pte.may_write = true;
+		pte.raddr = eaddr & 0xffffffff;
+		pte.eaddr = eaddr;
+		pte.vpage = eaddr >> 12;
+		switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+		case 0:
+			pte.vpage |= VSID_REAL;
+		case MSR_DR:
+			pte.vpage |= VSID_REAL_DR;
+		case MSR_IR:
+			pte.vpage |= VSID_REAL_IR;
+		}
+	}
+
+	if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+	   (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+		/*
+		 * If we do the dcbz hack, we have to NX on every execution,
+		 * so we can patch the executing code. This renders our guest
+		 * NX-less.
+		 */
+		pte.may_execute = !data;
+	}
+
+	if (page_found == -ENOENT) {
+		/* Page not found in guest PTE entries */
+		vcpu->arch.dear = vcpu->arch.fault_dear;
+		to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
+		vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
+		kvmppc_book3s_queue_irqprio(vcpu, vec);
+	} else if (page_found == -EPERM) {
+		/* Storage protection */
+		vcpu->arch.dear = vcpu->arch.fault_dear;
+		to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
+		to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
+		vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
+		kvmppc_book3s_queue_irqprio(vcpu, vec);
+	} else if (page_found == -EINVAL) {
+		/* Page not found in guest SLB */
+		vcpu->arch.dear = vcpu->arch.fault_dear;
+		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
+	} else if (!is_mmio &&
+		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+		/* The guest's PTE is not mapped yet. Map on the host */
+		kvmppc_mmu_map_page(vcpu, &pte);
+		if (data)
+			vcpu->stat.sp_storage++;
+		else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+			(!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+			kvmppc_patch_dcbz(vcpu, &pte);
+	} else {
+		/* MMIO */
+		vcpu->stat.mmio_exits++;
+		vcpu->arch.paddr_accessed = pte.raddr;
+		r = kvmppc_emulate_mmio(run, vcpu);
+		if ( r == RESUME_HOST_NV )
+			r = RESUME_HOST;
+		if ( r == RESUME_GUEST_NV )
+			r = RESUME_GUEST;
+	}
+
+	return r;
+}
+
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                       unsigned int exit_nr)
+{
+	int r = RESUME_HOST;
+
+	vcpu->stat.sum_exits++;
+
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	run->ready_for_interrupt_injection = 1;
+#ifdef EXIT_DEBUG
+	printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
+		exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
+		kvmppc_get_dec(vcpu), vcpu->arch.msr);
+#elif defined (EXIT_DEBUG_SIMPLE)
+	if ((exit_nr != 0x900) && (exit_nr != 0x500))
+		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
+			exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
+			vcpu->arch.msr);
+#endif
+	kvm_resched(vcpu);
+	switch (exit_nr) {
+	case BOOK3S_INTERRUPT_INST_STORAGE:
+		vcpu->stat.pf_instruc++;
+		/* only care about PTEG not found errors, but leave NX alone */
+		if (vcpu->arch.shadow_msr & 0x40000000) {
+			r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
+			vcpu->stat.sp_instruc++;
+		} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+			  (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
+			/*
+			 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
+			 *     so we can't use the NX bit inside the guest. Let's cross our fingers,
+			 *     that no guest that needs the dcbz hack does NX.
+			 */
+			kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
+		} else {
+			vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000);
+			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
+			r = RESUME_GUEST;
+		}
+		break;
+	case BOOK3S_INTERRUPT_DATA_STORAGE:
+		vcpu->stat.pf_storage++;
+		/* The only case we need to handle is missing shadow PTEs */
+		if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) {
+			r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr);
+		} else {
+			vcpu->arch.dear = vcpu->arch.fault_dear;
+			to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
+			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL);
+			r = RESUME_GUEST;
+		}
+		break;
+	case BOOK3S_INTERRUPT_DATA_SEGMENT:
+		if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) {
+			vcpu->arch.dear = vcpu->arch.fault_dear;
+			kvmppc_book3s_queue_irqprio(vcpu,
+				BOOK3S_INTERRUPT_DATA_SEGMENT);
+		}
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_INST_SEGMENT:
+		if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) {
+			kvmppc_book3s_queue_irqprio(vcpu,
+				BOOK3S_INTERRUPT_INST_SEGMENT);
+		}
+		r = RESUME_GUEST;
+		break;
+	/* We're good on these - the host merely wanted to get our attention */
+	case BOOK3S_INTERRUPT_DECREMENTER:
+		vcpu->stat.dec_exits++;
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_EXTERNAL:
+		vcpu->stat.ext_intr_exits++;
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_PROGRAM:
+	{
+		enum emulation_result er;
+
+		if (vcpu->arch.msr & MSR_PR) {
+#ifdef EXIT_DEBUG
+			printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst);
+#endif
+			if ((vcpu->arch.last_inst & 0xff0007ff) !=
+			    (INS_DCBZ & 0xfffffff7)) {
+				kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+				r = RESUME_GUEST;
+				break;
+			}
+		}
+
+		vcpu->stat.emulated_inst_exits++;
+		er = kvmppc_emulate_instruction(run, vcpu);
+		switch (er) {
+		case EMULATE_DONE:
+			r = RESUME_GUEST;
+			break;
+		case EMULATE_FAIL:
+			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			r = RESUME_GUEST;
+			break;
+		default:
+			BUG();
+		}
+		break;
+	}
+	case BOOK3S_INTERRUPT_SYSCALL:
+#ifdef EXIT_DEBUG
+		printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]);
+#endif
+		vcpu->stat.syscall_exits++;
+		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+		r = RESUME_GUEST;
+		break;
+	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+	case BOOK3S_INTERRUPT_FP_UNAVAIL:
+	case BOOK3S_INTERRUPT_TRACE:
+	case BOOK3S_INTERRUPT_ALTIVEC:
+	case BOOK3S_INTERRUPT_VSX:
+		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+		r = RESUME_GUEST;
+		break;
+	default:
+		/* Ugh - bork here! What did we get? */
+		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr);
+		r = RESUME_HOST;
+		BUG();
+		break;
+	}
+
+
+	if (!(r & RESUME_HOST)) {
+		/* To avoid clobbering exit_reason, only check for signals if
+		 * we aren't already exiting to userspace for some other
+		 * reason. */
+		if (signal_pending(current)) {
+#ifdef EXIT_DEBUG
+			printk(KERN_EMERG "KVM: Going back to host\n");
+#endif
+			vcpu->stat.signal_exits++;
+			run->exit_reason = KVM_EXIT_INTR;
+			r = -EINTR;
+		} else {
+			/* In case an interrupt came in that was triggered
+			 * from userspace (like DEC), we need to check what
+			 * to inject now! */
+			kvmppc_core_deliver_interrupts(vcpu);
+		}
+	}
+
+#ifdef EXIT_DEBUG
+	printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r);
+#endif
+
+	return r;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	regs->pc = vcpu->arch.pc;
+	regs->cr = vcpu->arch.cr;
+	regs->ctr = vcpu->arch.ctr;
+	regs->lr = vcpu->arch.lr;
+	regs->xer = vcpu->arch.xer;
+	regs->msr = vcpu->arch.msr;
+	regs->srr0 = vcpu->arch.srr0;
+	regs->srr1 = vcpu->arch.srr1;
+	regs->pid = vcpu->arch.pid;
+	regs->sprg0 = vcpu->arch.sprg0;
+	regs->sprg1 = vcpu->arch.sprg1;
+	regs->sprg2 = vcpu->arch.sprg2;
+	regs->sprg3 = vcpu->arch.sprg3;
+	regs->sprg5 = vcpu->arch.sprg4;
+	regs->sprg6 = vcpu->arch.sprg5;
+	regs->sprg7 = vcpu->arch.sprg6;
+
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		regs->gpr[i] = vcpu->arch.gpr[i];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	vcpu->arch.pc = regs->pc;
+	vcpu->arch.cr = regs->cr;
+	vcpu->arch.ctr = regs->ctr;
+	vcpu->arch.lr = regs->lr;
+	vcpu->arch.xer = regs->xer;
+	kvmppc_set_msr(vcpu, regs->msr);
+	vcpu->arch.srr0 = regs->srr0;
+	vcpu->arch.srr1 = regs->srr1;
+	vcpu->arch.sprg0 = regs->sprg0;
+	vcpu->arch.sprg1 = regs->sprg1;
+	vcpu->arch.sprg2 = regs->sprg2;
+	vcpu->arch.sprg3 = regs->sprg3;
+	vcpu->arch.sprg5 = regs->sprg4;
+	vcpu->arch.sprg6 = regs->sprg5;
+	vcpu->arch.sprg7 = regs->sprg6;
+
+	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
+		vcpu->arch.gpr[i] = regs->gpr[i];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	int i;
+
+	sregs->pvr = vcpu->arch.pvr;
+
+	sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+		for (i = 0; i < 64; i++) {
+			sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i;
+			sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
+		}
+	} else {
+		for (i = 0; i < 16; i++) {
+			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
+			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
+		}
+		for (i = 0; i < 8; i++) {
+			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
+			sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
+		}
+	}
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+	int i;
+
+	kvmppc_set_pvr(vcpu, sregs->pvr);
+
+	vcpu3s->sdr1 = sregs->u.s.sdr1;
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+		for (i = 0; i < 64; i++) {
+			vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
+						    sregs->u.s.ppc64.slb[i].slbe);
+		}
+	} else {
+		for (i = 0; i < 16; i++) {
+			vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
+		}
+		for (i = 0; i < 8; i++) {
+			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
+				       (u32)sregs->u.s.ppc32.ibat[i]);
+			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
+				       (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
+			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
+				       (u32)sregs->u.s.ppc32.dbat[i]);
+			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
+				       (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
+		}
+	}
+
+	/* Flush the MMU after messing with the segments */
+	kvmppc_mmu_pte_flush(vcpu, 0, 0);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                  struct kvm_translation *tr)
+{
+	return 0;
+}
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+				      struct kvm_dirty_log *log)
+{
+	struct kvm_memory_slot *memslot;
+	struct kvm_vcpu *vcpu;
+	ulong ga, ga_end;
+	int is_dirty = 0;
+	int r, n;
+
+	down_write(&kvm->slots_lock);
+
+	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	if (r)
+		goto out;
+
+	/* If nothing is dirty, don't bother messing with page tables. */
+	if (is_dirty) {
+		memslot = &kvm->memslots[log->slot];
+
+		ga = memslot->base_gfn << PAGE_SHIFT;
+		ga_end = ga + (memslot->npages << PAGE_SHIFT);
+
+		kvm_for_each_vcpu(n, vcpu, kvm)
+			kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
+
+		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+
+	r = 0;
+out:
+	up_write(&kvm->slots_lock);
+	return r;
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+	return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s;
+	struct kvm_vcpu *vcpu;
+	int err;
+
+	vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO,
+			get_order(sizeof(struct kvmppc_vcpu_book3s)));
+	if (!vcpu_book3s) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	vcpu = &vcpu_book3s->vcpu;
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	vcpu->arch.host_retip = kvm_return_point;
+	vcpu->arch.host_msr = mfmsr();
+	/* default to book3s_64 (970fx) */
+	vcpu->arch.pvr = 0x3C0301;
+	kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+	vcpu_book3s->slb_nr = 64;
+
+	/* remember where some real-mode handlers are */
+	vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
+	vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
+	vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
+
+	vcpu->arch.shadow_msr = MSR_USER64;
+
+	err = __init_new_context();
+	if (err < 0)
+		goto free_vcpu;
+	vcpu_book3s->context_id = err;
+
+	vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1;
+	vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS;
+	vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+
+	return vcpu;
+
+free_vcpu:
+	free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s)));
+out:
+	return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+
+	__destroy_context(vcpu_book3s->context_id);
+	kvm_vcpu_uninit(vcpu);
+	free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s)));
+}
+
+extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	/* No need to go into the guest when all we do is going out */
+	if (signal_pending(current)) {
+		kvm_run->exit_reason = KVM_EXIT_INTR;
+		return -EINTR;
+	}
+
+	/* XXX we get called with irq disabled - change that! */
+	local_irq_enable();
+
+	ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
+
+	local_irq_disable();
+
+	return ret;
+}
+
+static int kvmppc_book3s_init(void)
+{
+	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE);
+}
+
+static void kvmppc_book3s_exit(void)
+{
+	kvm_exit();
+}
+
+module_init(kvmppc_book3s_init);
+module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
new file mode 100644
index 0000000..faf99f2
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -0,0 +1,372 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+/* #define DEBUG_MMU */
+/* #define DEBUG_MMU_PTE */
+/* #define DEBUG_MMU_PTE_IP 0xfff14c40 */
+
+#ifdef DEBUG_MMU
+#define dprintk(X...) printk(KERN_INFO X)
+#else
+#define dprintk(X...) do { } while(0)
+#endif
+
+#ifdef DEBUG_PTE
+#define dprintk_pte(X...) printk(KERN_INFO X)
+#else
+#define dprintk_pte(X...) do { } while(0)
+#endif
+
+#define PTEG_FLAG_ACCESSED	0x00000100
+#define PTEG_FLAG_DIRTY		0x00000080
+
+static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
+{
+#ifdef DEBUG_MMU_PTE_IP
+	return vcpu->arch.pc == DEBUG_MMU_PTE_IP;
+#else
+	return true;
+#endif
+}
+
+static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
+					  struct kvmppc_pte *pte, bool data);
+
+static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr)
+{
+	return &vcpu_book3s->sr[(eaddr >> 28) & 0xf];
+}
+
+static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
+					 bool data)
+{
+	struct kvmppc_sr *sre = find_sr(to_book3s(vcpu), eaddr);
+	struct kvmppc_pte pte;
+
+	if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data))
+		return pte.vpage;
+
+	return (((u64)eaddr >> 12) & 0xffff) | (((u64)sre->vsid) << 16);
+}
+
+static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
+{
+	kvmppc_set_msr(vcpu, 0);
+}
+
+static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
+				      struct kvmppc_sr *sre, gva_t eaddr,
+				      bool primary)
+{
+	u32 page, hash, pteg, htabmask;
+	hva_t r;
+
+	page = (eaddr & 0x0FFFFFFF) >> 12;
+	htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0;
+
+	hash = ((sre->vsid ^ page) << 6);
+	if (!primary)
+		hash = ~hash;
+	hash &= htabmask;
+
+	pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
+
+	dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
+		vcpu_book3s->vcpu.arch.pc, eaddr, vcpu_book3s->sdr1, pteg,
+		sre->vsid);
+
+	r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+	if (kvm_is_error_hva(r))
+		return r;
+	return r | (pteg & ~PAGE_MASK);
+}
+
+static u32 kvmppc_mmu_book3s_32_get_ptem(struct kvmppc_sr *sre, gva_t eaddr,
+				    bool primary)
+{
+	return ((eaddr & 0x0fffffff) >> 22) | (sre->vsid << 7) |
+	       (primary ? 0 : 0x40) | 0x80000000;
+}
+
+static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
+					  struct kvmppc_pte *pte, bool data)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_bat *bat;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		if (data)
+			bat = &vcpu_book3s->dbat[i];
+		else
+			bat = &vcpu_book3s->ibat[i];
+
+		if (vcpu->arch.msr & MSR_PR) {
+			if (!bat->vp)
+				continue;
+		} else {
+			if (!bat->vs)
+				continue;
+		}
+
+		if (check_debug_ip(vcpu))
+		{
+			dprintk_pte("%cBAT %02d: 0x%lx - 0x%x (0x%x)\n",
+				    data ? 'd' : 'i', i, eaddr, bat->bepi,
+				    bat->bepi_mask);
+		}
+		if ((eaddr & bat->bepi_mask) == bat->bepi) {
+			pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask);
+			pte->vpage = (eaddr >> 12) | VSID_BAT;
+			pte->may_read = bat->pp;
+			pte->may_write = bat->pp > 1;
+			pte->may_execute = true;
+			if (!pte->may_read) {
+				printk(KERN_INFO "BAT is not readable!\n");
+				continue;
+			}
+			if (!pte->may_write) {
+				/* let's treat r/o BATs as not-readable for now */
+				dprintk_pte("BAT is read-only!\n");
+				continue;
+			}
+
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
+				     struct kvmppc_pte *pte, bool data,
+				     bool primary)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_sr *sre;
+	hva_t ptegp;
+	u32 pteg[16];
+	u64 ptem = 0;
+	int i;
+	int found = 0;
+
+	sre = find_sr(vcpu_book3s, eaddr);
+
+	dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28,
+		    sre->vsid, sre->raw);
+
+	pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
+
+	ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary);
+	if (kvm_is_error_hva(ptegp)) {
+		printk(KERN_INFO "KVM: Invalid PTEG!\n");
+		goto no_page_found;
+	}
+
+	ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary);
+
+	if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
+		printk(KERN_ERR "KVM: Can't copy data from 0x%lx!\n", ptegp);
+		goto no_page_found;
+	}
+
+	for (i=0; i<16; i+=2) {
+		if (ptem == pteg[i]) {
+			u8 pp;
+
+			pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
+			pp = pteg[i+1] & 3;
+
+			if ((sre->Kp &&  (vcpu->arch.msr & MSR_PR)) ||
+			    (sre->Ks && !(vcpu->arch.msr & MSR_PR)))
+				pp |= 4;
+
+			pte->may_write = false;
+			pte->may_read = false;
+			pte->may_execute = true;
+			switch (pp) {
+				case 0:
+				case 1:
+				case 2:
+				case 6:
+					pte->may_write = true;
+				case 3:
+				case 5:
+				case 7:
+					pte->may_read = true;
+					break;
+			}
+
+			if ( !pte->may_read )
+				continue;
+
+			dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
+				    pteg[i], pteg[i+1], pp);
+			found = 1;
+			break;
+		}
+	}
+
+	/* Update PTE C and A bits, so the guest's swapper knows we used the
+	   page */
+	if (found) {
+		u32 oldpte = pteg[i+1];
+
+		if (pte->may_read)
+			pteg[i+1] |= PTEG_FLAG_ACCESSED;
+		if (pte->may_write)
+			pteg[i+1] |= PTEG_FLAG_DIRTY;
+		else
+			dprintk_pte("KVM: Mapping read-only page!\n");
+
+		/* Write back into the PTEG */
+		if (pteg[i+1] != oldpte)
+			copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+
+		return 0;
+	}
+
+no_page_found:
+
+	if (check_debug_ip(vcpu)) {
+		dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n",
+			    to_book3s(vcpu)->sdr1, ptegp);
+		for (i=0; i<16; i+=2) {
+			dprintk_pte("   %02d: 0x%x - 0x%x (0x%llx)\n",
+				    i, pteg[i], pteg[i+1], ptem);
+		}
+	}
+
+	return -ENOENT;
+}
+
+static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+				      struct kvmppc_pte *pte, bool data)
+{
+	int r;
+
+	pte->eaddr = eaddr;
+	r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
+	if (r < 0)
+	       r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
+	if (r < 0)
+	       r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false);
+
+	return r;
+}
+
+
+static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
+{
+	return to_book3s(vcpu)->sr[srnum].raw;
+}
+
+static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
+					ulong value)
+{
+	struct kvmppc_sr *sre;
+
+	sre = &to_book3s(vcpu)->sr[srnum];
+
+	/* Flush any left-over shadows from the previous SR */
+
+	/* XXX Not necessary? */
+	/* kvmppc_mmu_pte_flush(vcpu, ((u64)sre->vsid) << 28, 0xf0000000ULL); */
+
+	/* And then put in the new SR */
+	sre->raw = value;
+	sre->vsid = (value & 0x0fffffff);
+	sre->Ks = (value & 0x40000000) ? true : false;
+	sre->Kp = (value & 0x20000000) ? true : false;
+	sre->nx = (value & 0x10000000) ? true : false;
+
+	/* Map the new segment */
+	kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
+}
+
+static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
+{
+	kvmppc_mmu_pte_flush(vcpu, ea, ~0xFFFULL);
+}
+
+static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid,
+					     u64 *vsid)
+{
+	/* In case we only have one of MSR_IR or MSR_DR set, let's put
+	   that in the real-mode context (and hope RM doesn't access
+	   high memory) */
+	switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+	case 0:
+		*vsid = (VSID_REAL >> 16) | esid;
+		break;
+	case MSR_IR:
+		*vsid = (VSID_REAL_IR >> 16) | esid;
+		break;
+	case MSR_DR:
+		*vsid = (VSID_REAL_DR >> 16) | esid;
+		break;
+	case MSR_DR|MSR_IR:
+	{
+		ulong ea;
+		ea = esid << SID_SHIFT;
+		*vsid = find_sr(to_book3s(vcpu), ea)->vsid;
+		break;
+	}
+	default:
+		BUG();
+	}
+
+	return 0;
+}
+
+static bool kvmppc_mmu_book3s_32_is_dcbz32(struct kvm_vcpu *vcpu)
+{
+	return true;
+}
+
+
+void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+	mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
+	mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
+	mmu->xlate = kvmppc_mmu_book3s_32_xlate;
+	mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
+	mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
+	mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
+	mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
+	mmu->is_dcbz32 = kvmppc_mmu_book3s_32_is_dcbz32;
+
+	mmu->slbmte = NULL;
+	mmu->slbmfee = NULL;
+	mmu->slbmfev = NULL;
+	mmu->slbie = NULL;
+	mmu->slbia = NULL;
+}
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c
new file mode 100644
index 0000000..1027eac
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -0,0 +1,345 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_book3s.h>
+#include <asm/reg.h>
+
+#define OP_19_XOP_RFID		18
+#define OP_19_XOP_RFI		50
+
+#define OP_31_XOP_MFMSR		83
+#define OP_31_XOP_MTMSR		146
+#define OP_31_XOP_MTMSRD	178
+#define OP_31_XOP_MTSRIN	242
+#define OP_31_XOP_TLBIEL	274
+#define OP_31_XOP_TLBIE		306
+#define OP_31_XOP_SLBMTE	402
+#define OP_31_XOP_SLBIE		434
+#define OP_31_XOP_SLBIA		498
+#define OP_31_XOP_MFSRIN	659
+#define OP_31_XOP_SLBMFEV	851
+#define OP_31_XOP_EIOIO		854
+#define OP_31_XOP_SLBMFEE	915
+
+/* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
+#define OP_31_XOP_DCBZ		1010
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                           unsigned int inst, int *advance)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (get_op(inst)) {
+	case 19:
+		switch (get_xop(inst)) {
+		case OP_19_XOP_RFID:
+		case OP_19_XOP_RFI:
+			vcpu->arch.pc = vcpu->arch.srr0;
+			kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+			*advance = 0;
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+	case 31:
+		switch (get_xop(inst)) {
+		case OP_31_XOP_MFMSR:
+			vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr;
+			break;
+		case OP_31_XOP_MTMSRD:
+		{
+			ulong rs = vcpu->arch.gpr[get_rs(inst)];
+			if (inst & 0x10000) {
+				vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
+				vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
+			} else
+				kvmppc_set_msr(vcpu, rs);
+			break;
+		}
+		case OP_31_XOP_MTMSR:
+			kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]);
+			break;
+		case OP_31_XOP_MFSRIN:
+		{
+			int srnum;
+
+			srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf;
+			if (vcpu->arch.mmu.mfsrin) {
+				u32 sr;
+				sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
+				vcpu->arch.gpr[get_rt(inst)] = sr;
+			}
+			break;
+		}
+		case OP_31_XOP_MTSRIN:
+			vcpu->arch.mmu.mtsrin(vcpu,
+				(vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf,
+				vcpu->arch.gpr[get_rs(inst)]);
+			break;
+		case OP_31_XOP_TLBIE:
+		case OP_31_XOP_TLBIEL:
+		{
+			bool large = (inst & 0x00200000) ? true : false;
+			ulong addr = vcpu->arch.gpr[get_rb(inst)];
+			vcpu->arch.mmu.tlbie(vcpu, addr, large);
+			break;
+		}
+		case OP_31_XOP_EIOIO:
+			break;
+		case OP_31_XOP_SLBMTE:
+			if (!vcpu->arch.mmu.slbmte)
+				return EMULATE_FAIL;
+
+			vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)],
+						vcpu->arch.gpr[get_rb(inst)]);
+			break;
+		case OP_31_XOP_SLBIE:
+			if (!vcpu->arch.mmu.slbie)
+				return EMULATE_FAIL;
+
+			vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]);
+			break;
+		case OP_31_XOP_SLBIA:
+			if (!vcpu->arch.mmu.slbia)
+				return EMULATE_FAIL;
+
+			vcpu->arch.mmu.slbia(vcpu);
+			break;
+		case OP_31_XOP_SLBMFEE:
+			if (!vcpu->arch.mmu.slbmfee) {
+				emulated = EMULATE_FAIL;
+			} else {
+				ulong t, rb;
+
+				rb = vcpu->arch.gpr[get_rb(inst)];
+				t = vcpu->arch.mmu.slbmfee(vcpu, rb);
+				vcpu->arch.gpr[get_rt(inst)] = t;
+			}
+			break;
+		case OP_31_XOP_SLBMFEV:
+			if (!vcpu->arch.mmu.slbmfev) {
+				emulated = EMULATE_FAIL;
+			} else {
+				ulong t, rb;
+
+				rb = vcpu->arch.gpr[get_rb(inst)];
+				t = vcpu->arch.mmu.slbmfev(vcpu, rb);
+				vcpu->arch.gpr[get_rt(inst)] = t;
+			}
+			break;
+		case OP_31_XOP_DCBZ:
+		{
+			ulong rb =  vcpu->arch.gpr[get_rb(inst)];
+			ulong ra = 0;
+			ulong addr;
+			u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+			if (get_ra(inst))
+				ra = vcpu->arch.gpr[get_ra(inst)];
+
+			addr = (ra + rb) & ~31ULL;
+			if (!(vcpu->arch.msr & MSR_SF))
+				addr &= 0xffffffff;
+
+			if (kvmppc_st(vcpu, addr, 32, zeros)) {
+				vcpu->arch.dear = addr;
+				vcpu->arch.fault_dear = addr;
+				to_book3s(vcpu)->dsisr = DSISR_PROTFAULT |
+						      DSISR_ISSTORE;
+				kvmppc_book3s_queue_irqprio(vcpu,
+					BOOK3S_INTERRUPT_DATA_STORAGE);
+				kvmppc_mmu_pte_flush(vcpu, addr, ~0xFFFULL);
+			}
+
+			break;
+		}
+		default:
+			emulated = EMULATE_FAIL;
+		}
+		break;
+	default:
+		emulated = EMULATE_FAIL;
+	}
+
+	return emulated;
+}
+
+void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper,
+                    u32 val)
+{
+	if (upper) {
+		/* Upper BAT */
+		u32 bl = (val >> 2) & 0x7ff;
+		bat->bepi_mask = (~bl << 17);
+		bat->bepi = val & 0xfffe0000;
+		bat->vs = (val & 2) ? 1 : 0;
+		bat->vp = (val & 1) ? 1 : 0;
+		bat->raw = (bat->raw & 0xffffffff00000000ULL) | val;
+	} else {
+		/* Lower BAT */
+		bat->brpn = val & 0xfffe0000;
+		bat->wimg = (val >> 3) & 0xf;
+		bat->pp = val & 3;
+		bat->raw = (bat->raw & 0x00000000ffffffffULL) | ((u64)val << 32);
+	}
+}
+
+static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_bat *bat;
+
+	switch (sprn) {
+	case SPRN_IBAT0U ... SPRN_IBAT3L:
+		bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2];
+		break;
+	case SPRN_IBAT4U ... SPRN_IBAT7L:
+		bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT4U) / 2];
+		break;
+	case SPRN_DBAT0U ... SPRN_DBAT3L:
+		bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2];
+		break;
+	case SPRN_DBAT4U ... SPRN_DBAT7L:
+		bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT4U) / 2];
+		break;
+	default:
+		BUG();
+	}
+
+	kvmppc_set_bat(vcpu, bat, !(sprn % 2), val);
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_SDR1:
+		to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_DSISR:
+		to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_DAR:
+		vcpu->arch.dear = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_HIOR:
+		to_book3s(vcpu)->hior = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_IBAT0U ... SPRN_IBAT3L:
+	case SPRN_IBAT4U ... SPRN_IBAT7L:
+	case SPRN_DBAT0U ... SPRN_DBAT3L:
+	case SPRN_DBAT4U ... SPRN_DBAT7L:
+		kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]);
+		/* BAT writes happen so rarely that we're ok to flush
+		 * everything here */
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+		break;
+	case SPRN_HID0:
+		to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_HID1:
+		to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_HID2:
+		to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_HID4:
+		to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs];
+		break;
+	case SPRN_HID5:
+		to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs];
+		/* guest HID5 set can change is_dcbz32 */
+		if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+		    (mfmsr() & MSR_HV))
+			vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+		break;
+	case SPRN_ICTC:
+	case SPRN_THRM1:
+	case SPRN_THRM2:
+	case SPRN_THRM3:
+	case SPRN_CTRLF:
+	case SPRN_CTRLT:
+		break;
+	default:
+		printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
+#ifndef DEBUG_SPR
+		emulated = EMULATE_FAIL;
+#endif
+		break;
+	}
+
+	return emulated;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+	int emulated = EMULATE_DONE;
+
+	switch (sprn) {
+	case SPRN_SDR1:
+		vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1;
+		break;
+	case SPRN_DSISR:
+		vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr;
+		break;
+	case SPRN_DAR:
+		vcpu->arch.gpr[rt] = vcpu->arch.dear;
+		break;
+	case SPRN_HIOR:
+		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior;
+		break;
+	case SPRN_HID0:
+		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0];
+		break;
+	case SPRN_HID1:
+		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1];
+		break;
+	case SPRN_HID2:
+		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2];
+		break;
+	case SPRN_HID4:
+		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4];
+		break;
+	case SPRN_HID5:
+		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5];
+		break;
+	case SPRN_THRM1:
+	case SPRN_THRM2:
+	case SPRN_THRM3:
+	case SPRN_CTRLF:
+	case SPRN_CTRLT:
+		vcpu->arch.gpr[rt] = 0;
+		break;
+	default:
+		printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
+#ifndef DEBUG_SPR
+		emulated = EMULATE_FAIL;
+#endif
+		break;
+	}
+
+	return emulated;
+}
+
diff --git a/arch/powerpc/kvm/book3s_64_exports.c b/arch/powerpc/kvm/book3s_64_exports.c
new file mode 100644
index 0000000..5b2db38
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_exports.c
@@ -0,0 +1,24 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/module.h>
+#include <asm/kvm_book3s.h>
+
+EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
+EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_64_interrupts.S
new file mode 100644
index 0000000..7b55d80
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_interrupts.S
@@ -0,0 +1,392 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+
+#define KVMPPC_HANDLE_EXIT .kvmppc_handle_exit
+#define ULONG_SIZE 8
+#define VCPU_GPR(n)     (VCPU_GPRS + (n * ULONG_SIZE))
+
+.macro mfpaca tmp_reg, src_reg, offset, vcpu_reg
+	ld	\tmp_reg, (PACA_EXMC+\offset)(r13)
+	std	\tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg)
+.endm
+
+.macro DISABLE_INTERRUPTS
+       mfmsr   r0
+       rldicl  r0,r0,48,1
+       rotldi  r0,r0,16
+       mtmsrd  r0,1
+.endm
+
+/*****************************************************************************
+ *                                                                           *
+ *     Guest entry / exit code that is in kernel module memory (highmem)     *
+ *                                                                           *
+ ****************************************************************************/
+
+/* Registers:
+ *  r3: kvm_run pointer
+ *  r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_entry)
+
+kvm_start_entry:
+	/* Write correct stack frame */
+	mflr    r0
+	std     r0,16(r1)
+
+	/* Save host state to the stack */
+	stdu	r1, -SWITCH_FRAME_SIZE(r1)
+
+	/* Save r3 (kvm_run) and r4 (vcpu) */
+	SAVE_2GPRS(3, r1)
+
+	/* Save non-volatile registers (r14 - r31) */
+	SAVE_NVGPRS(r1)
+
+	/* Save LR */
+	mflr	r14
+	std	r14, _LINK(r1)
+
+/* XXX optimize non-volatile loading away */
+kvm_start_lightweight:
+
+	DISABLE_INTERRUPTS
+
+	/* Save R1/R2 in the PACA */
+	std	r1, PACAR1(r13)
+	std	r2, (PACA_EXMC+EX_SRR0)(r13)
+	ld	r3, VCPU_HIGHMEM_HANDLER(r4)
+	std	r3, PACASAVEDMSR(r13)
+
+	/* Load non-volatile guest state from the vcpu */
+	ld	r14, VCPU_GPR(r14)(r4)
+	ld	r15, VCPU_GPR(r15)(r4)
+	ld	r16, VCPU_GPR(r16)(r4)
+	ld	r17, VCPU_GPR(r17)(r4)
+	ld	r18, VCPU_GPR(r18)(r4)
+	ld	r19, VCPU_GPR(r19)(r4)
+	ld	r20, VCPU_GPR(r20)(r4)
+	ld	r21, VCPU_GPR(r21)(r4)
+	ld	r22, VCPU_GPR(r22)(r4)
+	ld	r23, VCPU_GPR(r23)(r4)
+	ld	r24, VCPU_GPR(r24)(r4)
+	ld	r25, VCPU_GPR(r25)(r4)
+	ld	r26, VCPU_GPR(r26)(r4)
+	ld	r27, VCPU_GPR(r27)(r4)
+	ld	r28, VCPU_GPR(r28)(r4)
+	ld	r29, VCPU_GPR(r29)(r4)
+	ld	r30, VCPU_GPR(r30)(r4)
+	ld	r31, VCPU_GPR(r31)(r4)
+
+	ld	r9, VCPU_PC(r4)			/* r9 = vcpu->arch.pc */
+	ld	r10, VCPU_SHADOW_MSR(r4)	/* r10 = vcpu->arch.shadow_msr */
+
+	ld	r3, VCPU_TRAMPOLINE_ENTER(r4)
+	mtsrr0	r3
+
+	LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+	mtsrr1	r3
+
+	/* Load guest state in the respective registers */
+	lwz	r3, VCPU_CR(r4)		/* r3 = vcpu->arch.cr */
+	stw	r3, (PACA_EXMC + EX_CCR)(r13)
+
+	ld	r3, VCPU_CTR(r4)	/* r3 = vcpu->arch.ctr */
+	mtctr	r3			/* CTR = r3 */
+
+	ld	r3, VCPU_LR(r4)		/* r3 = vcpu->arch.lr */
+	mtlr	r3			/* LR = r3 */
+
+	ld	r3, VCPU_XER(r4)	/* r3 = vcpu->arch.xer */
+	std	r3, (PACA_EXMC + EX_R3)(r13)
+
+	/* Some guests may need to have dcbz set to 32 byte length.
+	 *
+	 * Usually we ensure that by patching the guest's instructions
+	 * to trap on dcbz and emulate it in the hypervisor.
+	 *
+	 * If we can, we should tell the CPU to use 32 byte dcbz though,
+	 * because that's a lot faster.
+	 */
+
+	ld	r3, VCPU_HFLAGS(r4)
+	rldicl.	r3, r3, 0, 63		/* CR = ((r3 & 1) == 0) */
+	beq	no_dcbz32_on
+
+	mfspr   r3,SPRN_HID5
+	ori     r3, r3, 0x80		/* XXX HID5_dcbz32 = 0x80 */
+	mtspr   SPRN_HID5,r3
+
+no_dcbz32_on:
+	/*	Load guest GPRs */
+
+	ld	r3, VCPU_GPR(r9)(r4)
+	std	r3, (PACA_EXMC + EX_R9)(r13)
+	ld	r3, VCPU_GPR(r10)(r4)
+	std	r3, (PACA_EXMC + EX_R10)(r13)
+	ld	r3, VCPU_GPR(r11)(r4)
+	std	r3, (PACA_EXMC + EX_R11)(r13)
+	ld	r3, VCPU_GPR(r12)(r4)
+	std	r3, (PACA_EXMC + EX_R12)(r13)
+	ld	r3, VCPU_GPR(r13)(r4)
+	std	r3, (PACA_EXMC + EX_R13)(r13)
+
+	ld	r0, VCPU_GPR(r0)(r4)
+	ld	r1, VCPU_GPR(r1)(r4)
+	ld	r2, VCPU_GPR(r2)(r4)
+	ld	r3, VCPU_GPR(r3)(r4)
+	ld	r5, VCPU_GPR(r5)(r4)
+	ld	r6, VCPU_GPR(r6)(r4)
+	ld	r7, VCPU_GPR(r7)(r4)
+	ld	r8, VCPU_GPR(r8)(r4)
+	ld	r4, VCPU_GPR(r4)(r4)
+
+	/* This sets the Magic value for the trampoline */
+
+	li	r11, 1
+	stb	r11, PACA_KVM_IN_GUEST(r13)
+
+	/* Jump to SLB patching handlder and into our guest */
+	RFI
+
+/*
+ * This is the handler in module memory. It gets jumped at from the
+ * lowmem trampoline code, so it's basically the guest exit code.
+ *
+ */
+
+.global kvmppc_handler_highmem
+kvmppc_handler_highmem:
+
+	/*
+	 * Register usage at this point:
+	 *
+	 * R00   = guest R13
+	 * R01   = host R1
+	 * R02   = host R2
+	 * R10   = guest PC
+	 * R11   = guest MSR
+	 * R12   = exit handler id
+	 * R13   = PACA
+	 * PACA.exmc.R9    = guest R1
+	 * PACA.exmc.R10   = guest R10
+	 * PACA.exmc.R11   = guest R11
+	 * PACA.exmc.R12   = guest R12
+	 * PACA.exmc.R13   = guest R2
+	 * PACA.exmc.DAR   = guest DAR
+	 * PACA.exmc.DSISR = guest DSISR
+	 * PACA.exmc.LR    = guest instruction
+	 * PACA.exmc.CCR   = guest CR
+	 * PACA.exmc.SRR0  = guest R0
+	 *
+	 */
+
+	std	r3, (PACA_EXMC+EX_R3)(r13)
+
+	/* save the exit id in R3 */
+	mr	r3, r12
+
+	/* R12 = vcpu */
+	ld	r12, GPR4(r1)
+
+	/* Now save the guest state */
+
+	std	r0, VCPU_GPR(r13)(r12)
+	std	r4, VCPU_GPR(r4)(r12)
+	std	r5, VCPU_GPR(r5)(r12)
+	std	r6, VCPU_GPR(r6)(r12)
+	std	r7, VCPU_GPR(r7)(r12)
+	std	r8, VCPU_GPR(r8)(r12)
+	std	r9, VCPU_GPR(r9)(r12)
+
+	/* get registers from PACA */
+	mfpaca	r5, r0, EX_SRR0, r12
+	mfpaca	r5, r3, EX_R3, r12
+	mfpaca	r5, r1, EX_R9, r12
+	mfpaca	r5, r10, EX_R10, r12
+	mfpaca	r5, r11, EX_R11, r12
+	mfpaca	r5, r12, EX_R12, r12
+	mfpaca	r5, r2, EX_R13, r12
+
+	lwz	r5, (PACA_EXMC+EX_LR)(r13)
+	stw	r5, VCPU_LAST_INST(r12)
+
+	lwz	r5, (PACA_EXMC+EX_CCR)(r13)
+	stw	r5, VCPU_CR(r12)
+
+	ld	r5, VCPU_HFLAGS(r12)
+	rldicl.	r5, r5, 0, 63		/* CR = ((r5 & 1) == 0) */
+	beq	no_dcbz32_off
+
+	mfspr   r5,SPRN_HID5
+	rldimi  r5,r5,6,56
+	mtspr   SPRN_HID5,r5
+
+no_dcbz32_off:
+
+	/* XXX maybe skip on lightweight? */
+	std	r14, VCPU_GPR(r14)(r12)
+	std	r15, VCPU_GPR(r15)(r12)
+	std	r16, VCPU_GPR(r16)(r12)
+	std	r17, VCPU_GPR(r17)(r12)
+	std	r18, VCPU_GPR(r18)(r12)
+	std	r19, VCPU_GPR(r19)(r12)
+	std	r20, VCPU_GPR(r20)(r12)
+	std	r21, VCPU_GPR(r21)(r12)
+	std	r22, VCPU_GPR(r22)(r12)
+	std	r23, VCPU_GPR(r23)(r12)
+	std	r24, VCPU_GPR(r24)(r12)
+	std	r25, VCPU_GPR(r25)(r12)
+	std	r26, VCPU_GPR(r26)(r12)
+	std	r27, VCPU_GPR(r27)(r12)
+	std	r28, VCPU_GPR(r28)(r12)
+	std	r29, VCPU_GPR(r29)(r12)
+	std	r30, VCPU_GPR(r30)(r12)
+	std	r31, VCPU_GPR(r31)(r12)
+
+	/* Restore non-volatile host registers (r14 - r31) */
+	REST_NVGPRS(r1)
+
+	/* Save guest PC (R10) */
+	std	r10, VCPU_PC(r12)
+
+	/* Save guest msr (R11) */
+	std	r11, VCPU_SHADOW_MSR(r12)
+
+	/* Save guest CTR (in R12) */
+	mfctr	r5
+	std	r5, VCPU_CTR(r12)
+
+	/* Save guest LR */
+	mflr	r5
+	std	r5, VCPU_LR(r12)
+
+	/* Save guest XER */
+	mfxer	r5
+	std	r5, VCPU_XER(r12)
+
+	/* Save guest DAR */
+	ld	r5, (PACA_EXMC+EX_DAR)(r13)
+	std	r5, VCPU_FAULT_DEAR(r12)
+
+	/* Save guest DSISR */
+	lwz	r5, (PACA_EXMC+EX_DSISR)(r13)
+	std	r5, VCPU_FAULT_DSISR(r12)
+
+	/* Restore host msr -> SRR1 */
+	ld	r7, VCPU_HOST_MSR(r12)
+	mtsrr1	r7
+
+	/* Restore host IP -> SRR0 */
+	ld	r6, VCPU_HOST_RETIP(r12)
+	mtsrr0	r6
+
+	/*
+	 * For some interrupts, we need to call the real Linux
+	 * handler, so it can do work for us. This has to happen
+	 * as if the interrupt arrived from the kernel though,
+	 * so let's fake it here where most state is restored.
+	 *
+	 * Call Linux for hardware interrupts/decrementer
+	 * r3 = address of interrupt handler (exit reason)
+	 */
+
+	cmpwi	r3, BOOK3S_INTERRUPT_EXTERNAL
+	beq	call_linux_handler
+	cmpwi	r3, BOOK3S_INTERRUPT_DECREMENTER
+	beq	call_linux_handler
+
+	/* Back to Interruptable Mode! (goto kvm_return_point) */
+	RFI
+
+call_linux_handler:
+
+	/*
+	 * If we land here we need to jump back to the handler we
+	 * came from.
+	 *
+	 * We have a page that we can access from real mode, so let's
+	 * jump back to that and use it as a trampoline to get back into the
+	 * interrupt handler!
+	 *
+	 * R3 still contains the exit code,
+	 * R6 VCPU_HOST_RETIP and
+	 * R7 VCPU_HOST_MSR
+	 */
+
+	mtlr	r3
+
+	ld	r5, VCPU_TRAMPOLINE_LOWMEM(r12)
+	mtsrr0	r5
+	LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+	mtsrr1	r5
+
+	RFI
+
+.global kvm_return_point
+kvm_return_point:
+
+	/* Jump back to lightweight entry if we're supposed to */
+	/* go back into the guest */
+	mr	r5, r3
+	/* Restore r3 (kvm_run) and r4 (vcpu) */
+	REST_2GPRS(3, r1)
+	bl	KVMPPC_HANDLE_EXIT
+
+#if 0 /* XXX get lightweight exits back */
+	cmpwi	r3, RESUME_GUEST
+	bne	kvm_exit_heavyweight
+
+	/* put VCPU and KVM_RUN back into place and roll again! */
+	REST_2GPRS(3, r1)
+	b	kvm_start_lightweight
+
+kvm_exit_heavyweight:
+	/* Restore non-volatile host registers */
+	ld	r14, _LINK(r1)
+	mtlr	r14
+	REST_NVGPRS(r1)
+
+	addi    r1, r1, SWITCH_FRAME_SIZE
+#else
+	ld	r4, _LINK(r1)
+	mtlr	r4
+
+	cmpwi	r3, RESUME_GUEST
+	bne	kvm_exit_heavyweight
+
+	REST_2GPRS(3, r1)
+
+	addi    r1, r1, SWITCH_FRAME_SIZE
+
+	b	kvm_start_entry
+
+kvm_exit_heavyweight:
+
+	addi    r1, r1, SWITCH_FRAME_SIZE
+#endif
+
+	blr
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
new file mode 100644
index 0000000..5598f88
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -0,0 +1,478 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+
+#include <asm/tlbflush.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+/* #define DEBUG_MMU */
+
+#ifdef DEBUG_MMU
+#define dprintk(X...) printk(KERN_INFO X)
+#else
+#define dprintk(X...) do { } while(0)
+#endif
+
+static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
+{
+	kvmppc_set_msr(vcpu, MSR_SF);
+}
+
+static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
+				struct kvmppc_vcpu_book3s *vcpu_book3s,
+				gva_t eaddr)
+{
+	int i;
+	u64 esid = GET_ESID(eaddr);
+	u64 esid_1t = GET_ESID_1T(eaddr);
+
+	for (i = 0; i < vcpu_book3s->slb_nr; i++) {
+		u64 cmp_esid = esid;
+
+		if (!vcpu_book3s->slb[i].valid)
+			continue;
+
+		if (vcpu_book3s->slb[i].large)
+			cmp_esid = esid_1t;
+
+		if (vcpu_book3s->slb[i].esid == cmp_esid)
+			return &vcpu_book3s->slb[i];
+	}
+
+	dprintk("KVM: No SLB entry found for 0x%lx [%llx | %llx]\n",
+		eaddr, esid, esid_1t);
+	for (i = 0; i < vcpu_book3s->slb_nr; i++) {
+	    if (vcpu_book3s->slb[i].vsid)
+		dprintk("  %d: %c%c %llx %llx\n", i,
+			vcpu_book3s->slb[i].valid ? 'v' : ' ',
+			vcpu_book3s->slb[i].large ? 'l' : ' ',
+			vcpu_book3s->slb[i].esid,
+			vcpu_book3s->slb[i].vsid);
+	}
+
+	return NULL;
+}
+
+static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
+					 bool data)
+{
+	struct kvmppc_slb *slb;
+
+	slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), eaddr);
+	if (!slb)
+		return 0;
+
+	if (slb->large)
+		return (((u64)eaddr >> 12) & 0xfffffff) |
+		       (((u64)slb->vsid) << 28);
+
+	return (((u64)eaddr >> 12) & 0xffff) | (((u64)slb->vsid) << 16);
+}
+
+static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
+{
+	return slbe->large ? 24 : 12;
+}
+
+static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
+{
+	int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
+	return ((eaddr & 0xfffffff) >> p);
+}
+
+static hva_t kvmppc_mmu_book3s_64_get_pteg(
+				struct kvmppc_vcpu_book3s *vcpu_book3s,
+				struct kvmppc_slb *slbe, gva_t eaddr,
+				bool second)
+{
+	u64 hash, pteg, htabsize;
+	u32 page;
+	hva_t r;
+
+	page = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
+	htabsize = ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1);
+
+	hash = slbe->vsid ^ page;
+	if (second)
+		hash = ~hash;
+	hash &= ((1ULL << 39ULL) - 1ULL);
+	hash &= htabsize;
+	hash <<= 7ULL;
+
+	pteg = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
+	pteg |= hash;
+
+	dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
+		page, vcpu_book3s->sdr1, pteg, slbe->vsid);
+
+	r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+	if (kvm_is_error_hva(r))
+		return r;
+	return r | (pteg & ~PAGE_MASK);
+}
+
+static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
+{
+	int p = kvmppc_mmu_book3s_64_get_pagesize(slbe);
+	u64 avpn;
+
+	avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
+	avpn |= slbe->vsid << (28 - p);
+
+	if (p < 24)
+		avpn >>= ((80 - p) - 56) - 8;
+	else
+		avpn <<= 8;
+
+	return avpn;
+}
+
+static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+				struct kvmppc_pte *gpte, bool data)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_slb *slbe;
+	hva_t ptegp;
+	u64 pteg[16];
+	u64 avpn = 0;
+	int i;
+	u8 key = 0;
+	bool found = false;
+	bool perm_err = false;
+	int second = 0;
+
+	slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr);
+	if (!slbe)
+		goto no_seg_found;
+
+do_second:
+	ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
+	if (kvm_is_error_hva(ptegp))
+		goto no_page_found;
+
+	avpn = kvmppc_mmu_book3s_64_get_avpn(slbe, eaddr);
+
+	if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
+		printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
+		goto no_page_found;
+	}
+
+	if ((vcpu->arch.msr & MSR_PR) && slbe->Kp)
+		key = 4;
+	else if (!(vcpu->arch.msr & MSR_PR) && slbe->Ks)
+		key = 4;
+
+	for (i=0; i<16; i+=2) {
+		u64 v = pteg[i];
+		u64 r = pteg[i+1];
+
+		/* Valid check */
+		if (!(v & HPTE_V_VALID))
+			continue;
+		/* Hash check */
+		if ((v & HPTE_V_SECONDARY) != second)
+			continue;
+
+		/* AVPN compare */
+		if (HPTE_V_AVPN_VAL(avpn) == HPTE_V_AVPN_VAL(v)) {
+			u8 pp = (r & HPTE_R_PP) | key;
+			int eaddr_mask = 0xFFF;
+
+			gpte->eaddr = eaddr;
+			gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu,
+								    eaddr,
+								    data);
+			if (slbe->large)
+				eaddr_mask = 0xFFFFFF;
+			gpte->raddr = (r & HPTE_R_RPN) | (eaddr & eaddr_mask);
+			gpte->may_execute = ((r & HPTE_R_N) ? false : true);
+			gpte->may_read = false;
+			gpte->may_write = false;
+
+			switch (pp) {
+			case 0:
+			case 1:
+			case 2:
+			case 6:
+				gpte->may_write = true;
+				/* fall through */
+			case 3:
+			case 5:
+			case 7:
+				gpte->may_read = true;
+				break;
+			}
+
+			if (!gpte->may_read) {
+				perm_err = true;
+				continue;
+			}
+
+			dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
+				"-> 0x%llx\n",
+				eaddr, avpn, gpte->vpage, gpte->raddr);
+			found = true;
+			break;
+		}
+	}
+
+	/* Update PTE R and C bits, so the guest's swapper knows we used the
+	 * page */
+	if (found) {
+		u32 oldr = pteg[i+1];
+
+		if (gpte->may_read) {
+			/* Set the accessed flag */
+			pteg[i+1] |= HPTE_R_R;
+		}
+		if (gpte->may_write) {
+			/* Set the dirty flag */
+			pteg[i+1] |= HPTE_R_C;
+		} else {
+			dprintk("KVM: Mapping read-only page!\n");
+		}
+
+		/* Write back into the PTEG */
+		if (pteg[i+1] != oldr)
+			copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
+
+		return 0;
+	} else {
+		dprintk("KVM MMU: No PTE found (ea=0x%lx sdr1=0x%llx "
+			"ptegp=0x%lx)\n",
+			eaddr, to_book3s(vcpu)->sdr1, ptegp);
+		for (i = 0; i < 16; i += 2)
+			dprintk("   %02d: 0x%llx - 0x%llx (0x%llx)\n",
+				i, pteg[i], pteg[i+1], avpn);
+
+		if (!second) {
+			second = HPTE_V_SECONDARY;
+			goto do_second;
+		}
+	}
+
+
+no_page_found:
+
+
+	if (perm_err)
+		return -EPERM;
+
+	return -ENOENT;
+
+no_seg_found:
+
+	dprintk("KVM MMU: Trigger segment fault\n");
+	return -EINVAL;
+}
+
+static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s;
+	u64 esid, esid_1t;
+	int slb_nr;
+	struct kvmppc_slb *slbe;
+
+	dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
+
+	vcpu_book3s = to_book3s(vcpu);
+
+	esid = GET_ESID(rb);
+	esid_1t = GET_ESID_1T(rb);
+	slb_nr = rb & 0xfff;
+
+	if (slb_nr > vcpu_book3s->slb_nr)
+		return;
+
+	slbe = &vcpu_book3s->slb[slb_nr];
+
+	slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
+	slbe->esid  = slbe->large ? esid_1t : esid;
+	slbe->vsid  = rs >> 12;
+	slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
+	slbe->Ks    = (rs & SLB_VSID_KS) ? 1 : 0;
+	slbe->Kp    = (rs & SLB_VSID_KP) ? 1 : 0;
+	slbe->nx    = (rs & SLB_VSID_N) ? 1 : 0;
+	slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
+
+	slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
+	slbe->origv = rs;
+
+	/* Map the new segment */
+	kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT);
+}
+
+static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_slb *slbe;
+
+	if (slb_nr > vcpu_book3s->slb_nr)
+		return 0;
+
+	slbe = &vcpu_book3s->slb[slb_nr];
+
+	return slbe->orige;
+}
+
+static u64 kvmppc_mmu_book3s_64_slbmfev(struct kvm_vcpu *vcpu, u64 slb_nr)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_slb *slbe;
+
+	if (slb_nr > vcpu_book3s->slb_nr)
+		return 0;
+
+	slbe = &vcpu_book3s->slb[slb_nr];
+
+	return slbe->origv;
+}
+
+static void kvmppc_mmu_book3s_64_slbie(struct kvm_vcpu *vcpu, u64 ea)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	struct kvmppc_slb *slbe;
+
+	dprintk("KVM MMU: slbie(0x%llx)\n", ea);
+
+	slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, ea);
+
+	if (!slbe)
+		return;
+
+	dprintk("KVM MMU: slbie(0x%llx, 0x%llx)\n", ea, slbe->esid);
+
+	slbe->valid = false;
+
+	kvmppc_mmu_map_segment(vcpu, ea);
+}
+
+static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	int i;
+
+	dprintk("KVM MMU: slbia()\n");
+
+	for (i = 1; i < vcpu_book3s->slb_nr; i++)
+		vcpu_book3s->slb[i].valid = false;
+
+	if (vcpu->arch.msr & MSR_IR) {
+		kvmppc_mmu_flush_segments(vcpu);
+		kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc);
+	}
+}
+
+static void kvmppc_mmu_book3s_64_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
+					ulong value)
+{
+	u64 rb = 0, rs = 0;
+
+	/* ESID = srnum */
+	rb |= (srnum & 0xf) << 28;
+	/* Set the valid bit */
+	rb |= 1 << 27;
+	/* Index = ESID */
+	rb |= srnum;
+
+	/* VSID = VSID */
+	rs |= (value & 0xfffffff) << 12;
+	/* flags = flags */
+	rs |= ((value >> 27) & 0xf) << 9;
+
+	kvmppc_mmu_book3s_64_slbmte(vcpu, rs, rb);
+}
+
+static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
+				       bool large)
+{
+	u64 mask = 0xFFFFFFFFFULL;
+
+	dprintk("KVM MMU: tlbie(0x%lx)\n", va);
+
+	if (large)
+		mask = 0xFFFFFF000ULL;
+	kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
+}
+
+static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid,
+					     u64 *vsid)
+{
+	switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+	case 0:
+		*vsid = (VSID_REAL >> 16) | esid;
+		break;
+	case MSR_IR:
+		*vsid = (VSID_REAL_IR >> 16) | esid;
+		break;
+	case MSR_DR:
+		*vsid = (VSID_REAL_DR >> 16) | esid;
+		break;
+	case MSR_DR|MSR_IR:
+	{
+		ulong ea;
+		struct kvmppc_slb *slb;
+		ea = esid << SID_SHIFT;
+		slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
+		if (slb)
+			*vsid = slb->vsid;
+		else
+			return -ENOENT;
+
+		break;
+	}
+	default:
+		BUG();
+		break;
+	}
+
+	return 0;
+}
+
+static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu)
+{
+	return (to_book3s(vcpu)->hid[5] & 0x80);
+}
+
+void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
+
+	mmu->mfsrin = NULL;
+	mmu->mtsrin = kvmppc_mmu_book3s_64_mtsrin;
+	mmu->slbmte = kvmppc_mmu_book3s_64_slbmte;
+	mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee;
+	mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev;
+	mmu->slbie = kvmppc_mmu_book3s_64_slbie;
+	mmu->slbia = kvmppc_mmu_book3s_64_slbia;
+	mmu->xlate = kvmppc_mmu_book3s_64_xlate;
+	mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
+	mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
+	mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
+	mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
+	mmu->is_dcbz32 = kvmppc_mmu_book3s_64_is_dcbz32;
+
+	vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
+}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
new file mode 100644
index 0000000..f2899b2
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2009 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *     Alexander Graf <agraf@suse.de>
+ *     Kevin Wolf <mail@kevin-wolf.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+
+#define PTE_SIZE 12
+#define VSID_ALL 0
+
+/* #define DEBUG_MMU */
+/* #define DEBUG_SLB */
+
+#ifdef DEBUG_MMU
+#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_mmu(a, ...) do { } while(0)
+#endif
+
+#ifdef DEBUG_SLB
+#define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__)
+#else
+#define dprintk_slb(a, ...) do { } while(0)
+#endif
+
+static void invalidate_pte(struct hpte_cache *pte)
+{
+	dprintk_mmu("KVM: Flushing SPT %d: 0x%llx (0x%llx) -> 0x%llx\n",
+		    i, pte->pte.eaddr, pte->pte.vpage, pte->host_va);
+
+	ppc_md.hpte_invalidate(pte->slot, pte->host_va,
+			       MMU_PAGE_4K, MMU_SEGSIZE_256M,
+			       false);
+	pte->host_va = 0;
+	kvm_release_pfn_dirty(pte->pfn);
+}
+
+void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 guest_ea, u64 ea_mask)
+{
+	int i;
+
+	dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%llx & 0x%llx\n",
+		    vcpu->arch.hpte_cache_offset, guest_ea, ea_mask);
+	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
+
+	guest_ea &= ea_mask;
+	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
+		struct hpte_cache *pte;
+
+		pte = &vcpu->arch.hpte_cache[i];
+		if (!pte->host_va)
+			continue;
+
+		if ((pte->pte.eaddr & ea_mask) == guest_ea) {
+			invalidate_pte(pte);
+		}
+	}
+
+	/* Doing a complete flush -> start from scratch */
+	if (!ea_mask)
+		vcpu->arch.hpte_cache_offset = 0;
+}
+
+void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
+{
+	int i;
+
+	dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
+		    vcpu->arch.hpte_cache_offset, guest_vp, vp_mask);
+	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
+
+	guest_vp &= vp_mask;
+	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
+		struct hpte_cache *pte;
+
+		pte = &vcpu->arch.hpte_cache[i];
+		if (!pte->host_va)
+			continue;
+
+		if ((pte->pte.vpage & vp_mask) == guest_vp) {
+			invalidate_pte(pte);
+		}
+	}
+}
+
+void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end)
+{
+	int i;
+
+	dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n",
+		    vcpu->arch.hpte_cache_offset, guest_pa, pa_mask);
+	BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
+
+	for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
+		struct hpte_cache *pte;
+
+		pte = &vcpu->arch.hpte_cache[i];
+		if (!pte->host_va)
+			continue;
+
+		if ((pte->pte.raddr >= pa_start) &&
+		    (pte->pte.raddr < pa_end)) {
+			invalidate_pte(pte);
+		}
+	}
+}
+
+struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data)
+{
+	int i;
+	u64 guest_vp;
+
+	guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false);
+	for (i=0; i<vcpu->arch.hpte_cache_offset; i++) {
+		struct hpte_cache *pte;
+
+		pte = &vcpu->arch.hpte_cache[i];
+		if (!pte->host_va)
+			continue;
+
+		if (pte->pte.vpage == guest_vp)
+			return &pte->pte;
+	}
+
+	return NULL;
+}
+
+static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM)
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+	return vcpu->arch.hpte_cache_offset++;
+}
+
+/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
+ * a hash, so we don't waste cycles on looping */
+static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
+}
+
+
+static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	struct kvmppc_sid_map *map;
+	u16 sid_map_mask;
+
+	if (vcpu->arch.msr & MSR_PR)
+		gvsid |= VSID_PR;
+
+	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+	map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+	if (map->guest_vsid == gvsid) {
+		dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n",
+			    gvsid, map->host_vsid);
+		return map;
+	}
+
+	map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
+	if (map->guest_vsid == gvsid) {
+		dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n",
+			    gvsid, map->host_vsid);
+		return map;
+	}
+
+	dprintk_slb("SLB: Searching 0x%llx -> not found\n", gvsid);
+	return NULL;
+}
+
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+{
+	pfn_t hpaddr;
+	ulong hash, hpteg, va;
+	u64 vsid;
+	int ret;
+	int rflags = 0x192;
+	int vflags = 0;
+	int attempt = 0;
+	struct kvmppc_sid_map *map;
+
+	/* Get host physical address for gpa */
+	hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+	if (kvm_is_error_hva(hpaddr)) {
+		printk(KERN_INFO "Couldn't get guest page for gfn %llx!\n", orig_pte->eaddr);
+		return -EINVAL;
+	}
+	hpaddr <<= PAGE_SHIFT;
+#if PAGE_SHIFT == 12
+#elif PAGE_SHIFT == 16
+	hpaddr |= orig_pte->raddr & 0xf000;
+#else
+#error Unknown page size
+#endif
+
+	/* and write the mapping ea -> hpa into the pt */
+	vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
+	map = find_sid_vsid(vcpu, vsid);
+	if (!map) {
+		kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr);
+		map = find_sid_vsid(vcpu, vsid);
+	}
+	BUG_ON(!map);
+
+	vsid = map->host_vsid;
+	va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+
+	if (!orig_pte->may_write)
+		rflags |= HPTE_R_PP;
+	else
+		mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+
+	if (!orig_pte->may_execute)
+		rflags |= HPTE_R_N;
+
+	hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
+
+map_again:
+	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+	/* In case we tried normal mapping already, let's nuke old entries */
+	if (attempt > 1)
+		if (ppc_md.hpte_remove(hpteg) < 0)
+			return -1;
+
+	ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+
+	if (ret < 0) {
+		/* If we couldn't map a primary PTE, try a secondary */
+#ifdef USE_SECONDARY
+		hash = ~hash;
+		attempt++;
+		if (attempt % 2)
+			vflags = HPTE_V_SECONDARY;
+		else
+			vflags = 0;
+#else
+		attempt = 2;
+#endif
+		goto map_again;
+	} else {
+		int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu);
+		struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id];
+
+		dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%lx (0x%llx) -> %lx\n",
+			    ((rflags & HPTE_R_PP) == 3) ? '-' : 'w',
+			    (rflags & HPTE_R_N) ? '-' : 'x',
+			    orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr);
+
+		pte->slot = hpteg + (ret & 7);
+		pte->host_va = va;
+		pte->pte = *orig_pte;
+		pte->pfn = hpaddr >> PAGE_SHIFT;
+	}
+
+	return 0;
+}
+
+static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+	struct kvmppc_sid_map *map;
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	u16 sid_map_mask;
+	static int backwards_map = 0;
+
+	if (vcpu->arch.msr & MSR_PR)
+		gvsid |= VSID_PR;
+
+	/* We might get collisions that trap in preceding order, so let's
+	   map them differently */
+
+	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
+	if (backwards_map)
+		sid_map_mask = SID_MAP_MASK - sid_map_mask;
+
+	map = &to_book3s(vcpu)->sid_map[sid_map_mask];
+
+	/* Make sure we're taking the other map next time */
+	backwards_map = !backwards_map;
+
+	/* Uh-oh ... out of mappings. Let's flush! */
+	if (vcpu_book3s->vsid_next == vcpu_book3s->vsid_max) {
+		vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+		memset(vcpu_book3s->sid_map, 0,
+		       sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+		kvmppc_mmu_flush_segments(vcpu);
+	}
+	map->host_vsid = vcpu_book3s->vsid_next++;
+
+	map->guest_vsid = gvsid;
+	map->valid = true;
+
+	return map;
+}
+
+static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
+{
+	int i;
+	int max_slb_size = 64;
+	int found_inval = -1;
+	int r;
+
+	if (!get_paca()->kvm_slb_max)
+		get_paca()->kvm_slb_max = 1;
+
+	/* Are we overwriting? */
+	for (i = 1; i < get_paca()->kvm_slb_max; i++) {
+		if (!(get_paca()->kvm_slb[i].esid & SLB_ESID_V))
+			found_inval = i;
+		else if ((get_paca()->kvm_slb[i].esid & ESID_MASK) == esid)
+			return i;
+	}
+
+	/* Found a spare entry that was invalidated before */
+	if (found_inval > 0)
+		return found_inval;
+
+	/* No spare invalid entry, so create one */
+
+	if (mmu_slb_size < 64)
+		max_slb_size = mmu_slb_size;
+
+	/* Overflowing -> purge */
+	if ((get_paca()->kvm_slb_max) == max_slb_size)
+		kvmppc_mmu_flush_segments(vcpu);
+
+	r = get_paca()->kvm_slb_max;
+	get_paca()->kvm_slb_max++;
+
+	return r;
+}
+
+int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
+{
+	u64 esid = eaddr >> SID_SHIFT;
+	u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V;
+	u64 slb_vsid = SLB_VSID_USER;
+	u64 gvsid;
+	int slb_index;
+	struct kvmppc_sid_map *map;
+
+	slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK);
+
+	if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
+		/* Invalidate an entry */
+		get_paca()->kvm_slb[slb_index].esid = 0;
+		return -ENOENT;
+	}
+
+	map = find_sid_vsid(vcpu, gvsid);
+	if (!map)
+		map = create_sid_map(vcpu, gvsid);
+
+	map->guest_esid = esid;
+
+	slb_vsid |= (map->host_vsid << 12);
+	slb_vsid &= ~SLB_VSID_KP;
+	slb_esid |= slb_index;
+
+	get_paca()->kvm_slb[slb_index].esid = slb_esid;
+	get_paca()->kvm_slb[slb_index].vsid = slb_vsid;
+
+	dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid);
+
+	return 0;
+}
+
+void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
+{
+	get_paca()->kvm_slb_max = 1;
+	get_paca()->kvm_slb[0].esid = 0;
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvmppc_mmu_pte_flush(vcpu, 0, 0);
+}
diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_64_rmhandlers.S
new file mode 100644
index 0000000..fb7dd2e
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_rmhandlers.S
@@ -0,0 +1,131 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/exception-64s.h>
+
+/*****************************************************************************
+ *                                                                           *
+ *        Real Mode handlers that need to be in low physical memory          *
+ *                                                                           *
+ ****************************************************************************/
+
+
+.macro INTERRUPT_TRAMPOLINE intno
+
+.global kvmppc_trampoline_\intno
+kvmppc_trampoline_\intno:
+
+	mtspr	SPRN_SPRG_SCRATCH0, r13		/* Save r13 */
+
+	/*
+	 * First thing to do is to find out if we're coming
+	 * from a KVM guest or a Linux process.
+	 *
+	 * To distinguish, we check a magic byte in the PACA
+	 */
+	mfspr	r13, SPRN_SPRG_PACA		/* r13 = PACA */
+	std	r12, (PACA_EXMC + EX_R12)(r13)
+	mfcr	r12
+	stw	r12, (PACA_EXMC + EX_CCR)(r13)
+	lbz	r12, PACA_KVM_IN_GUEST(r13)
+	cmpwi	r12, 0
+	bne	..kvmppc_handler_hasmagic_\intno
+	/* No KVM guest? Then jump back to the Linux handler! */
+	lwz	r12, (PACA_EXMC + EX_CCR)(r13)
+	mtcr	r12
+	ld	r12, (PACA_EXMC + EX_R12)(r13)
+	mfspr	r13, SPRN_SPRG_SCRATCH0		/* r13 = original r13 */
+	b	kvmppc_resume_\intno		/* Get back original handler */
+
+	/* Now we know we're handling a KVM guest */
+..kvmppc_handler_hasmagic_\intno:
+	/* Unset guest state */
+	li	r12, 0
+	stb	r12, PACA_KVM_IN_GUEST(r13)
+
+	std	r1, (PACA_EXMC+EX_R9)(r13)
+	std	r10, (PACA_EXMC+EX_R10)(r13)
+	std	r11, (PACA_EXMC+EX_R11)(r13)
+	std	r2, (PACA_EXMC+EX_R13)(r13)
+
+	mfsrr0	r10
+	mfsrr1	r11
+
+	/* Restore R1/R2 so we can handle faults */
+	ld	r1, PACAR1(r13)
+	ld	r2, (PACA_EXMC+EX_SRR0)(r13)
+
+	/* Let's store which interrupt we're handling */
+	li	r12, \intno
+
+	/* Jump into the SLB exit code that goes to the highmem handler */
+	b	kvmppc_handler_trampoline_exit
+
+.endm
+
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_SYSTEM_RESET
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_MACHINE_CHECK
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DATA_STORAGE
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DATA_SEGMENT
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_INST_STORAGE
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_INST_SEGMENT
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALIGNMENT
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PROGRAM
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_FP_UNAVAIL
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DECREMENTER
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_SYSCALL
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_TRACE
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PERFMON
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALTIVEC
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_VSX
+
+/*
+ * This trampoline brings us back to a real mode handler
+ *
+ * Input Registers:
+ *
+ * R6 = SRR0
+ * R7 = SRR1
+ * LR = real-mode IP
+ *
+ */
+.global kvmppc_handler_lowmem_trampoline
+kvmppc_handler_lowmem_trampoline:
+
+	mtsrr0	r6
+	mtsrr1	r7
+	blr
+kvmppc_handler_lowmem_trampoline_end:
+
+.global kvmppc_trampoline_lowmem
+kvmppc_trampoline_lowmem:
+	.long kvmppc_handler_lowmem_trampoline - _stext
+
+.global kvmppc_trampoline_enter
+kvmppc_trampoline_enter:
+	.long kvmppc_handler_trampoline_enter - _stext
+
+#include "book3s_64_slb.S"
+
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
new file mode 100644
index 0000000..ecd237a
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -0,0 +1,262 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2009
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#define SHADOW_SLB_ESID(num)	(SLBSHADOW_SAVEAREA + (num * 0x10))
+#define SHADOW_SLB_VSID(num)	(SLBSHADOW_SAVEAREA + (num * 0x10) + 0x8)
+#define UNBOLT_SLB_ENTRY(num) \
+	ld	r9, SHADOW_SLB_ESID(num)(r12); \
+	/* Invalid? Skip. */; \
+	rldicl. r0, r9, 37, 63; \
+	beq	slb_entry_skip_ ## num; \
+	xoris	r9, r9, SLB_ESID_V@h; \
+	std	r9, SHADOW_SLB_ESID(num)(r12); \
+  slb_entry_skip_ ## num:
+
+#define REBOLT_SLB_ENTRY(num) \
+	ld	r10, SHADOW_SLB_ESID(num)(r11); \
+	cmpdi	r10, 0; \
+	beq	slb_exit_skip_1; \
+	oris	r10, r10, SLB_ESID_V@h; \
+	ld	r9, SHADOW_SLB_VSID(num)(r11); \
+	slbmte	r9, r10; \
+	std	r10, SHADOW_SLB_ESID(num)(r11); \
+slb_exit_skip_ ## num:
+
+/******************************************************************************
+ *                                                                            *
+ *                               Entry code                                   *
+ *                                                                            *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_enter
+kvmppc_handler_trampoline_enter:
+
+	/* Required state:
+	 *
+	 * MSR = ~IR|DR
+	 * R13 = PACA
+	 * R9 = guest IP
+	 * R10 = guest MSR
+	 * R11 = free
+	 * R12 = free
+	 * PACA[PACA_EXMC + EX_R9] = guest R9
+	 * PACA[PACA_EXMC + EX_R10] = guest R10
+	 * PACA[PACA_EXMC + EX_R11] = guest R11
+	 * PACA[PACA_EXMC + EX_R12] = guest R12
+	 * PACA[PACA_EXMC + EX_R13] = guest R13
+	 * PACA[PACA_EXMC + EX_CCR] = guest CR
+	 * PACA[PACA_EXMC + EX_R3] = guest XER
+	 */
+
+	mtsrr0	r9
+	mtsrr1	r10
+
+	mtspr	SPRN_SPRG_SCRATCH0, r0
+
+	/* Remove LPAR shadow entries */
+
+#if SLB_NUM_BOLTED == 3
+
+	ld	r12, PACA_SLBSHADOWPTR(r13)
+
+	/* Save off the first entry so we can slbie it later */
+	ld	r10, SHADOW_SLB_ESID(0)(r12)
+	ld	r11, SHADOW_SLB_VSID(0)(r12)
+
+	/* Remove bolted entries */
+	UNBOLT_SLB_ENTRY(0)
+	UNBOLT_SLB_ENTRY(1)
+	UNBOLT_SLB_ENTRY(2)
+	
+#else
+#error unknown number of bolted entries
+#endif
+
+	/* Flush SLB */
+
+	slbia
+
+	/* r0 = esid & ESID_MASK */
+	rldicr  r10, r10, 0, 35
+	/* r0 |= CLASS_BIT(VSID) */
+	rldic   r12, r11, 56 - 36, 36
+	or      r10, r10, r12
+	slbie	r10
+
+	isync
+
+	/* Fill SLB with our shadow */
+
+	lbz	r12, PACA_KVM_SLB_MAX(r13)
+	mulli	r12, r12, 16
+	addi	r12, r12, PACA_KVM_SLB
+	add	r12, r12, r13
+
+	/* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */
+	li	r11, PACA_KVM_SLB
+	add	r11, r11, r13
+
+slb_loop_enter:
+
+	ld	r10, 0(r11)
+
+	rldicl. r0, r10, 37, 63
+	beq	slb_loop_enter_skip
+
+	ld	r9, 8(r11)
+	slbmte	r9, r10
+
+slb_loop_enter_skip:
+	addi	r11, r11, 16
+	cmpd	cr0, r11, r12
+	blt	slb_loop_enter
+
+slb_do_enter:
+
+	/* Enter guest */
+
+	mfspr	r0, SPRN_SPRG_SCRATCH0
+
+	ld	r9, (PACA_EXMC+EX_R9)(r13)
+	ld	r10, (PACA_EXMC+EX_R10)(r13)
+	ld	r12, (PACA_EXMC+EX_R12)(r13)
+
+	lwz	r11, (PACA_EXMC+EX_CCR)(r13)
+	mtcr	r11
+
+	ld	r11, (PACA_EXMC+EX_R3)(r13)
+	mtxer	r11
+
+	ld	r11, (PACA_EXMC+EX_R11)(r13)
+	ld	r13, (PACA_EXMC+EX_R13)(r13)
+
+	RFI
+kvmppc_handler_trampoline_enter_end:
+
+
+
+/******************************************************************************
+ *                                                                            *
+ *                               Exit code                                    *
+ *                                                                            *
+ *****************************************************************************/
+
+.global kvmppc_handler_trampoline_exit
+kvmppc_handler_trampoline_exit:
+
+	/* Register usage at this point:
+	 *
+	 * SPRG_SCRATCH0 = guest R13
+	 * R01           = host R1
+	 * R02           = host R2
+	 * R10           = guest PC
+	 * R11           = guest MSR
+	 * R12           = exit handler id
+	 * R13           = PACA
+	 * PACA.exmc.CCR  = guest CR
+	 * PACA.exmc.R9  = guest R1
+	 * PACA.exmc.R10 = guest R10
+	 * PACA.exmc.R11 = guest R11
+	 * PACA.exmc.R12 = guest R12
+	 * PACA.exmc.R13 = guest R2
+	 *
+	 */
+
+	/* Save registers */
+
+	std	r0, (PACA_EXMC+EX_SRR0)(r13)
+	std	r9, (PACA_EXMC+EX_R3)(r13)
+	std	r10, (PACA_EXMC+EX_LR)(r13)
+	std	r11, (PACA_EXMC+EX_DAR)(r13)
+
+	/*
+	 * In order for us to easily get the last instruction,
+	 * we got the #vmexit at, we exploit the fact that the
+	 * virtual layout is still the same here, so we can just
+	 * ld from the guest's PC address
+	 */
+
+	/* We only load the last instruction when it's safe */
+	cmpwi	r12, BOOK3S_INTERRUPT_DATA_STORAGE
+	beq	ld_last_inst
+	cmpwi	r12, BOOK3S_INTERRUPT_PROGRAM
+	beq	ld_last_inst
+
+	b	no_ld_last_inst
+
+ld_last_inst:
+	/* Save off the guest instruction we're at */
+	/*    1) enable paging for data */
+	mfmsr	r9
+	ori	r11, r9, MSR_DR			/* Enable paging for data */
+	mtmsr	r11
+	/*    2) fetch the instruction */
+	lwz	r0, 0(r10)
+	/*    3) disable paging again */
+	mtmsr	r9
+
+no_ld_last_inst:
+
+	/* Restore bolted entries from the shadow and fix it along the way */
+
+	/* We don't store anything in entry 0, so we don't need to take care of it */
+	slbia
+	isync
+
+#if SLB_NUM_BOLTED == 3
+
+	ld	r11, PACA_SLBSHADOWPTR(r13)
+
+	REBOLT_SLB_ENTRY(0)
+	REBOLT_SLB_ENTRY(1)
+	REBOLT_SLB_ENTRY(2)
+	
+#else
+#error unknown number of bolted entries
+#endif
+
+slb_do_exit:
+
+	/* Restore registers */
+
+	ld	r11, (PACA_EXMC+EX_DAR)(r13)
+	ld	r10, (PACA_EXMC+EX_LR)(r13)
+	ld	r9, (PACA_EXMC+EX_R3)(r13)
+
+	/* Save last inst */
+	stw	r0, (PACA_EXMC+EX_LR)(r13)
+
+	/* Save DAR and DSISR before going to paged mode */
+	mfdar	r0
+	std	r0, (PACA_EXMC+EX_DAR)(r13)
+	mfdsisr	r0
+	stw	r0, (PACA_EXMC+EX_DSISR)(r13)
+
+	/* RFI into the highmem handler */
+	mfmsr	r0
+	ori	r0, r0, MSR_IR|MSR_DR|MSR_RI	/* Enable paging */
+	mtsrr1	r0
+	ld	r0, PACASAVEDMSR(r13)		/* Highmem handler address */
+	mtsrr0	r0
+
+	mfspr	r0, SPRN_SPRG_SCRATCH0
+
+	RFI
+kvmppc_handler_trampoline_exit_end:
+
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index e7bf4d0..06f5a9e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -520,6 +520,11 @@
 	return kvmppc_core_vcpu_translate(vcpu, tr);
 }
 
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	return -ENOTSUPP;
+}
+
 int __init kvmppc_booke_init(void)
 {
 	unsigned long ivor[16];
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 7737146..4a9ac66 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -18,7 +18,7 @@
  */
 
 #include <linux/jiffies.h>
-#include <linux/timer.h>
+#include <linux/hrtimer.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kvm_host.h>
@@ -32,6 +32,7 @@
 #include "trace.h"
 
 #define OP_TRAP 3
+#define OP_TRAP_64 2
 
 #define OP_31_XOP_LWZX      23
 #define OP_31_XOP_LBZX      87
@@ -64,19 +65,45 @@
 #define OP_STH  44
 #define OP_STHU 45
 
+#ifdef CONFIG_PPC64
+static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
+{
+	return 1;
+}
+#else
+static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.tcr & TCR_DIE;
+}
+#endif
+
 void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->arch.tcr & TCR_DIE) {
+	unsigned long dec_nsec;
+
+	pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+#ifdef CONFIG_PPC64
+	/* POWER4+ triggers a dec interrupt if the value is < 0 */
+	if (vcpu->arch.dec & 0x80000000) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		kvmppc_core_queue_dec(vcpu);
+		return;
+	}
+#endif
+	if (kvmppc_dec_enabled(vcpu)) {
 		/* The decrementer ticks at the same rate as the timebase, so
 		 * that's how we convert the guest DEC value to the number of
 		 * host ticks. */
-		unsigned long nr_jiffies;
 
-		nr_jiffies = vcpu->arch.dec / tb_ticks_per_jiffy;
-		mod_timer(&vcpu->arch.dec_timer,
-		          get_jiffies_64() + nr_jiffies);
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		dec_nsec = vcpu->arch.dec;
+		dec_nsec *= 1000;
+		dec_nsec /= tb_ticks_per_usec;
+		hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
+			      HRTIMER_MODE_REL);
+		vcpu->arch.dec_jiffies = get_tb();
 	} else {
-		del_timer(&vcpu->arch.dec_timer);
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
 	}
 }
 
@@ -111,9 +138,15 @@
 	/* this default type might be overwritten by subcategories */
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
 
+	pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
+
 	switch (get_op(inst)) {
 	case OP_TRAP:
+#ifdef CONFIG_PPC64
+	case OP_TRAP_64:
+#else
 		vcpu->arch.esr |= ESR_PTR;
+#endif
 		kvmppc_core_queue_program(vcpu);
 		advance = 0;
 		break;
@@ -188,17 +221,19 @@
 			case SPRN_SRR1:
 				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
 			case SPRN_PVR:
-				vcpu->arch.gpr[rt] = mfspr(SPRN_PVR); break;
+				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
 			case SPRN_PIR:
-				vcpu->arch.gpr[rt] = mfspr(SPRN_PIR); break;
+				vcpu->arch.gpr[rt] = vcpu->vcpu_id; break;
+			case SPRN_MSSSR0:
+				vcpu->arch.gpr[rt] = 0; break;
 
 			/* Note: mftb and TBRL/TBWL are user-accessible, so
 			 * the guest can always access the real TB anyways.
 			 * In fact, we probably will never see these traps. */
 			case SPRN_TBWL:
-				vcpu->arch.gpr[rt] = mftbl(); break;
+				vcpu->arch.gpr[rt] = get_tb() >> 32; break;
 			case SPRN_TBWU:
-				vcpu->arch.gpr[rt] = mftbu(); break;
+				vcpu->arch.gpr[rt] = get_tb(); break;
 
 			case SPRN_SPRG0:
 				vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
@@ -211,6 +246,13 @@
 			/* Note: SPRG4-7 are user-readable, so we don't get
 			 * a trap. */
 
+			case SPRN_DEC:
+			{
+				u64 jd = get_tb() - vcpu->arch.dec_jiffies;
+				vcpu->arch.gpr[rt] = vcpu->arch.dec - jd;
+				pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]);
+				break;
+			}
 			default:
 				emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
 				if (emulated == EMULATE_FAIL) {
@@ -260,6 +302,8 @@
 			case SPRN_TBWL: break;
 			case SPRN_TBWU: break;
 
+			case SPRN_MSSSR0: break;
+
 			case SPRN_DEC:
 				vcpu->arch.dec = vcpu->arch.gpr[rs];
 				kvmppc_emulate_dec(vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 5902bbc..f06cf93 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -23,6 +23,7 @@
 #include <linux/kvm_host.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/hrtimer.h>
 #include <linux/fs.h>
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
@@ -144,6 +145,9 @@
 	int r;
 
 	switch (ext) {
+	case KVM_CAP_PPC_SEGSTATE:
+		r = 1;
+		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
@@ -209,10 +213,25 @@
 	}
 }
 
+/*
+ * low level hrtimer wake routine. Because this runs in hardirq context
+ * we schedule a tasklet to do the real work.
+ */
+enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
+	tasklet_schedule(&vcpu->arch.tasklet);
+
+	return HRTIMER_NORESTART;
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func,
-	            (unsigned long)vcpu);
+	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+	tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
+	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
 
 	return 0;
 }
@@ -410,11 +429,6 @@
 	return r;
 }
 
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
-{
-	return -ENOTSUPP;
-}
-
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 2aa371e..7037855 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -23,6 +23,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
+#include <linux/module.h>
 
 #include <asm/time.h>
 #include <asm-generic/div64.h>
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 67f219d..a8e8400 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -12,8 +12,8 @@
  * Tracepoint for guest mode entry.
  */
 TRACE_EVENT(kvm_ppc_instr,
-	TP_PROTO(unsigned int inst, unsigned long pc, unsigned int emulate),
-	TP_ARGS(inst, pc, emulate),
+	TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate),
+	TP_ARGS(inst, _pc, emulate),
 
 	TP_STRUCT__entry(
 		__field(	unsigned int,	inst		)
@@ -23,7 +23,7 @@
 
 	TP_fast_assign(
 		__entry->inst		= inst;
-		__entry->pc		= pc;
+		__entry->pc		= _pc;
 		__entry->emulate	= emulate;
 	),
 
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index c657de5..74a7f41 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -98,20 +98,7 @@
 	bdnz	4b
 3:	mtctr	r9
 	li	r7,4
-#if !defined(CONFIG_8xx)
 10:	dcbz	r7,r6
-#else
-10:	stw	r4, 4(r6)
-	stw	r4, 8(r6)
-	stw	r4, 12(r6)
-	stw	r4, 16(r6)
-#if CACHE_LINE_SIZE >= 32
-	stw	r4, 20(r6)
-	stw	r4, 24(r6)
-	stw	r4, 28(r6)
-	stw	r4, 32(r6)
-#endif /* CACHE_LINE_SIZE */
-#endif
 	addi	r6,r6,CACHELINE_BYTES
 	bdnz	10b
 	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
@@ -200,9 +187,7 @@
 	mtctr	r0
 	beq	63f
 53:
-#if !defined(CONFIG_8xx)
 	dcbz	r11,r6
-#endif
 	COPY_16_BYTES
 #if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES
@@ -356,14 +341,6 @@
 	li	r11,4
 	beq	63f
 
-#ifdef CONFIG_8xx
-	/* Don't use prefetch on 8xx */
-	mtctr	r0
-	li	r0,0
-53:	COPY_16_BYTES_WITHEX(0)
-	bdnz	53b
-
-#else /* not CONFIG_8xx */
 	/* Here we decide how far ahead to prefetch the source */
 	li	r3,4
 	cmpwi	r0,1
@@ -416,7 +393,6 @@
 	li	r3,4
 	li	r7,0
 	bne	114b
-#endif /* CONFIG_8xx */
 
 63:	srwi.	r0,r5,2
 	mtctr	r0
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 6fb8fc8..ce68708 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -28,7 +28,10 @@
 obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
 obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
 obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
-obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+ifeq ($(CONFIG_HUGETLB_PAGE),y)
+obj-y				+= hugetlbpage.o
+obj-$(CONFIG_PPC_STD_MMU_64)	+= hugetlbpage-hash64.o
+endif
 obj-$(CONFIG_PPC_SUBPAGE_PROT)	+= subpage-prot.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index e7dae82..26fb6b9 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -40,7 +40,7 @@
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/siginfo.h>
-
+#include <mm/mmu_decl.h>
 
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs)
@@ -246,6 +246,12 @@
 		goto bad_area;
 #endif /* CONFIG_6xx */
 #if defined(CONFIG_8xx)
+	/* 8xx sometimes need to load a invalid/non-present TLBs.
+	 * These must be invalidated separately as linux mm don't.
+	 */
+	if (error_code & 0x40000000) /* no translation? */
+		_tlbil_va(address, 0, 0, 0);
+
         /* The MPC8xx seems to always set 0x80000000, which is
          * "undefined".  Of those that can be set, this is the only
          * one which seems bad.
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index dc93e95..fcfcb6e 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -54,26 +54,35 @@
 
 #include "mmu_decl.h"
 
-extern void loadcam_entry(unsigned int index);
 unsigned int tlbcam_index;
-static unsigned long cam[CONFIG_LOWMEM_CAM_NUM];
 
-#define NUM_TLBCAMS	(16)
+#define NUM_TLBCAMS	(64)
 
 #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
 #error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
 #endif
 
-struct tlbcam TLBCAM[NUM_TLBCAMS];
+struct tlbcam {
+	u32	MAS0;
+	u32	MAS1;
+	unsigned long	MAS2;
+	u32	MAS3;
+	u32	MAS7;
+} TLBCAM[NUM_TLBCAMS];
 
 struct tlbcamrange {
-   	unsigned long start;
+	unsigned long start;
 	unsigned long limit;
 	phys_addr_t phys;
 } tlbcam_addrs[NUM_TLBCAMS];
 
 extern unsigned int tlbcam_index;
 
+unsigned long tlbcam_sz(int idx)
+{
+	return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
+}
+
 /*
  * Return PA for this VA if it is mapped by a CAM, or 0
  */
@@ -94,23 +103,36 @@
 	int b;
 	for (b = 0; b < tlbcam_index; ++b)
 		if (pa >= tlbcam_addrs[b].phys
-	    	    && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
+			&& pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
 		              +tlbcam_addrs[b].phys)
 			return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
 	return 0;
 }
 
+void loadcam_entry(int idx)
+{
+	mtspr(SPRN_MAS0, TLBCAM[idx].MAS0);
+	mtspr(SPRN_MAS1, TLBCAM[idx].MAS1);
+	mtspr(SPRN_MAS2, TLBCAM[idx].MAS2);
+	mtspr(SPRN_MAS3, TLBCAM[idx].MAS3);
+
+	if (cur_cpu_spec->cpu_features & MMU_FTR_BIG_PHYS)
+		mtspr(SPRN_MAS7, TLBCAM[idx].MAS7);
+
+	asm volatile("isync;tlbwe;isync" : : : "memory");
+}
+
 /*
  * Set up one of the I/D BAT (block address translation) register pairs.
  * The parameters are not checked; in particular size must be a power
  * of 4 between 4k and 256M.
  */
-void settlbcam(int index, unsigned long virt, phys_addr_t phys,
-		unsigned int size, int flags, unsigned int pid)
+static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+		unsigned long size, unsigned long flags, unsigned int pid)
 {
 	unsigned int tsize, lz;
 
-	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
+	asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (size));
 	tsize = 21 - lz;
 
 #ifdef CONFIG_SMP
@@ -128,8 +150,10 @@
 	TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
 	TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
 
-	TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR;
+	TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR;
 	TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
+	if (cur_cpu_spec->cpu_features & MMU_FTR_BIG_PHYS)
+		TLBCAM[index].MAS7 = (u64)phys >> 32;
 
 #ifndef CONFIG_KGDB /* want user access for breakpoints */
 	if (flags & _PAGE_USER) {
@@ -148,27 +172,44 @@
 	loadcam_entry(index);
 }
 
-void invalidate_tlbcam_entry(int index)
+unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
 {
-	TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index);
-	TLBCAM[index].MAS1 = ~MAS1_VALID;
+	int i;
+	unsigned long virt = PAGE_OFFSET;
+	phys_addr_t phys = memstart_addr;
+	unsigned long amount_mapped = 0;
+	unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf;
 
-	loadcam_entry(index);
+	/* Convert (4^max) kB to (2^max) bytes */
+	max_cam = max_cam * 2 + 10;
+
+	/* Calculate CAM values */
+	for (i = 0; ram && i < max_cam_idx; i++) {
+		unsigned int camsize = __ilog2(ram) & ~1U;
+		unsigned int align = __ffs(virt | phys) & ~1U;
+		unsigned long cam_sz;
+
+		if (camsize > align)
+			camsize = align;
+		if (camsize > max_cam)
+			camsize = max_cam;
+
+		cam_sz = 1UL << camsize;
+		settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0);
+
+		ram -= cam_sz;
+		amount_mapped += cam_sz;
+		virt += cam_sz;
+		phys += cam_sz;
+	}
+	tlbcam_index = i;
+
+	return amount_mapped;
 }
 
 unsigned long __init mmu_mapin_ram(void)
 {
-	unsigned long virt = PAGE_OFFSET;
-	phys_addr_t phys = memstart_addr;
-
-	while (tlbcam_index < ARRAY_SIZE(cam) && cam[tlbcam_index]) {
-		settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], PAGE_KERNEL_X, 0);
-		virt += cam[tlbcam_index];
-		phys += cam[tlbcam_index];
-		tlbcam_index++;
-	}
-
-	return virt - PAGE_OFFSET;
+	return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
 }
 
 /*
@@ -179,46 +220,21 @@
 	flush_instruction_cache();
 }
 
-void __init
-adjust_total_lowmem(void)
+void __init adjust_total_lowmem(void)
 {
-	phys_addr_t ram;
-	unsigned int max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xff;
-	char buf[ARRAY_SIZE(cam) * 5 + 1], *p = buf;
+	unsigned long ram;
 	int i;
-	unsigned long virt = PAGE_OFFSET & 0xffffffffUL;
-	unsigned long phys = memstart_addr & 0xffffffffUL;
-
-	/* Convert (4^max) kB to (2^max) bytes */
-	max_cam = max_cam * 2 + 10;
 
 	/* adjust lowmem size to __max_low_memory */
 	ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
 
-	/* Calculate CAM values */
-	__max_low_memory = 0;
-	for (i = 0; ram && i < ARRAY_SIZE(cam); i++) {
-		unsigned int camsize = __ilog2(ram) & ~1U;
-		unsigned int align = __ffs(virt | phys) & ~1U;
+	__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
 
-		if (camsize > align)
-			camsize = align;
-		if (camsize > max_cam)
-			camsize = max_cam;
-
-		cam[i] = 1UL << camsize;
-		ram -= cam[i];
-		__max_low_memory += cam[i];
-		virt += cam[i];
-		phys += cam[i];
-
-		p += sprintf(p, "%lu/", cam[i] >> 20);
-	}
-	for (; i < ARRAY_SIZE(cam); i++)
-		p += sprintf(p, "0/");
-	p[-1] = '\0';
-
-	pr_info("Memory CAM mapping: %s Mb, residual: %dMb\n", buf,
+	pr_info("Memory CAM mapping: ");
+	for (i = 0; i < tlbcam_index - 1; i++)
+		pr_cont("%lu/", tlbcam_sz(i) >> 20);
+	pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
 	        (unsigned int)((total_lowmem - __max_low_memory) >> 20));
+
 	__initial_memory_limit_addr = memstart_addr + __max_low_memory;
 }
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index bc122a1..d7efdbf 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -55,57 +55,6 @@
 	return 1;
 }
 
-#ifdef CONFIG_HUGETLB_PAGE
-static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate,
-				 unsigned long *addr, unsigned long end,
-				 int write, struct page **pages, int *nr)
-{
-	unsigned long mask;
-	unsigned long pte_end;
-	struct page *head, *page;
-	pte_t pte;
-	int refs;
-
-	pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate);
-	if (pte_end < end)
-		end = pte_end;
-
-	pte = *ptep;
-	mask = _PAGE_PRESENT|_PAGE_USER;
-	if (write)
-		mask |= _PAGE_RW;
-	if ((pte_val(pte) & mask) != mask)
-		return 0;
-	/* hugepages are never "special" */
-	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
-	refs = 0;
-	head = pte_page(pte);
-	page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT);
-	do {
-		VM_BUG_ON(compound_head(page) != head);
-		pages[*nr] = page;
-		(*nr)++;
-		page++;
-		refs++;
-	} while (*addr += PAGE_SIZE, *addr != end);
-
-	if (!page_cache_add_speculative(head, refs)) {
-		*nr -= refs;
-		return 0;
-	}
-	if (unlikely(pte_val(pte) != pte_val(*ptep))) {
-		/* Could be optimized better */
-		while (*nr) {
-			put_page(page);
-			(*nr)--;
-		}
-	}
-
-	return 1;
-}
-#endif /* CONFIG_HUGETLB_PAGE */
-
 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 		int write, struct page **pages, int *nr)
 {
@@ -119,7 +68,11 @@
 		next = pmd_addr_end(addr, end);
 		if (pmd_none(pmd))
 			return 0;
-		if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+		if (is_hugepd(pmdp)) {
+			if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
+					addr, next, write, pages, nr))
+				return 0;
+		} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
 			return 0;
 	} while (pmdp++, addr = next, addr != end);
 
@@ -139,7 +92,11 @@
 		next = pud_addr_end(addr, end);
 		if (pud_none(pud))
 			return 0;
-		if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+		if (is_hugepd(pudp)) {
+			if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
+					addr, next, write, pages, nr))
+				return 0;
+		} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
 			return 0;
 	} while (pudp++, addr = next, addr != end);
 
@@ -154,10 +111,6 @@
 	unsigned long next;
 	pgd_t *pgdp;
 	int nr = 0;
-#ifdef CONFIG_PPC64
-	unsigned int shift;
-	int psize;
-#endif
 
 	pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
 
@@ -172,25 +125,6 @@
 
 	pr_devel("  aligned: %lx .. %lx\n", start, end);
 
-#ifdef CONFIG_HUGETLB_PAGE
-	/* We bail out on slice boundary crossing when hugetlb is
-	 * enabled in order to not have to deal with two different
-	 * page table formats
-	 */
-	if (addr < SLICE_LOW_TOP) {
-		if (end > SLICE_LOW_TOP)
-			goto slow_irqon;
-
-		if (unlikely(GET_LOW_SLICE_INDEX(addr) !=
-			     GET_LOW_SLICE_INDEX(end - 1)))
-			goto slow_irqon;
-	} else {
-		if (unlikely(GET_HIGH_SLICE_INDEX(addr) !=
-			     GET_HIGH_SLICE_INDEX(end - 1)))
-			goto slow_irqon;
-	}
-#endif /* CONFIG_HUGETLB_PAGE */
-
 	/*
 	 * XXX: batch / limit 'nr', to avoid large irq off latency
 	 * needs some instrumenting to determine the common sizes used by
@@ -210,54 +144,23 @@
 	 */
 	local_irq_disable();
 
-#ifdef CONFIG_PPC64
-	/* Those bits are related to hugetlbfs implementation and only exist
-	 * on 64-bit for now
-	 */
-	psize = get_slice_psize(mm, addr);
-	shift = mmu_psize_defs[psize].shift;
-#endif /* CONFIG_PPC64 */
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
 
-#ifdef CONFIG_HUGETLB_PAGE
-	if (unlikely(mmu_huge_psizes[psize])) {
-		pte_t *ptep;
-		unsigned long a = addr;
-		unsigned long sz = ((1UL) << shift);
-		struct hstate *hstate = size_to_hstate(sz);
-
-		BUG_ON(!hstate);
-		/*
-		 * XXX: could be optimized to avoid hstate
-		 * lookup entirely (just use shift)
-		 */
-
-		do {
-			VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift);
-			ptep = huge_pte_offset(mm, a);
-			pr_devel(" %016lx: huge ptep %p\n", a, ptep);
-			if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages,
-						   &nr))
+		pr_devel("  %016lx: normal pgd %p\n", addr,
+			 (void *)pgd_val(pgd));
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			goto slow;
+		if (is_hugepd(pgdp)) {
+			if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
+					addr, next, write, pages, &nr))
 				goto slow;
-		} while (a != end);
-	} else
-#endif /* CONFIG_HUGETLB_PAGE */
-	{
-		pgdp = pgd_offset(mm, addr);
-		do {
-			pgd_t pgd = *pgdp;
+		} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			goto slow;
+	} while (pgdp++, addr = next, addr != end);
 
-#ifdef CONFIG_PPC64
-			VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift);
-#endif
-			pr_devel("  %016lx: normal pgd %p\n", addr,
-				 (void *)pgd_val(pgd));
-			next = pgd_addr_end(addr, end);
-			if (pgd_none(pgd))
-				goto slow;
-			if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
-				goto slow;
-		} while (pgdp++, addr = next, addr != end);
-	}
 	local_irq_enable();
 
 	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 1ade7eb..50f867d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -92,6 +92,7 @@
 struct hash_pte *htab_address;
 unsigned long htab_size_bytes;
 unsigned long htab_hash_mask;
+EXPORT_SYMBOL_GPL(htab_hash_mask);
 int mmu_linear_psize = MMU_PAGE_4K;
 int mmu_virtual_psize = MMU_PAGE_4K;
 int mmu_vmalloc_psize = MMU_PAGE_4K;
@@ -102,6 +103,7 @@
 int mmu_kernel_ssize = MMU_SEGSIZE_256M;
 int mmu_highuser_ssize = MMU_SEGSIZE_256M;
 u16 mmu_slb_size = 64;
+EXPORT_SYMBOL_GPL(mmu_slb_size);
 #ifdef CONFIG_HUGETLB_PAGE
 unsigned int HPAGE_SHIFT;
 #endif
@@ -481,16 +483,6 @@
 #ifdef CONFIG_HUGETLB_PAGE
 	/* Reserve 16G huge page memory sections for huge pages */
 	of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
-
-/* Set default large page size. Currently, we pick 16M or 1M depending
-	 * on what is available
-	 */
-	if (mmu_psize_defs[MMU_PAGE_16M].shift)
-		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
-	/* With 4k/4level pagetables, we can't (for now) cope with a
-	 * huge page size < PMD_SIZE */
-	else if (mmu_psize_defs[MMU_PAGE_1M].shift)
-		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
 #endif /* CONFIG_HUGETLB_PAGE */
 }
 
@@ -785,7 +777,7 @@
 	/* page is dirty */
 	if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
 		if (trap == 0x400) {
-			__flush_dcache_icache(page_address(page));
+			flush_dcache_icache_page(page);
 			set_bit(PG_arch_1, &page->flags);
 		} else
 			pp |= HPTE_R_N;
@@ -843,9 +835,9 @@
  * Result is 0: full permissions, _PAGE_RW: read-only,
  * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
  */
-static int subpage_protection(pgd_t *pgdir, unsigned long ea)
+static int subpage_protection(struct mm_struct *mm, unsigned long ea)
 {
-	struct subpage_prot_table *spt = pgd_subpage_prot(pgdir);
+	struct subpage_prot_table *spt = &mm->context.spt;
 	u32 spp = 0;
 	u32 **sbpm, *sbpp;
 
@@ -873,7 +865,7 @@
 }
 
 #else /* CONFIG_PPC_SUBPAGE_PROT */
-static inline int subpage_protection(pgd_t *pgdir, unsigned long ea)
+static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
 {
 	return 0;
 }
@@ -891,6 +883,7 @@
 	unsigned long vsid;
 	struct mm_struct *mm;
 	pte_t *ptep;
+	unsigned hugeshift;
 	const struct cpumask *tmp;
 	int rc, user_region = 0, local = 0;
 	int psize, ssize;
@@ -943,30 +936,31 @@
 	if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
 		local = 1;
 
-#ifdef CONFIG_HUGETLB_PAGE
-	/* Handle hugepage regions */
-	if (HPAGE_SHIFT && mmu_huge_psizes[psize]) {
-		DBG_LOW(" -> huge page !\n");
-		return hash_huge_page(mm, access, ea, vsid, local, trap);
-	}
-#endif /* CONFIG_HUGETLB_PAGE */
-
 #ifndef CONFIG_PPC_64K_PAGES
-	/* If we use 4K pages and our psize is not 4K, then we are hitting
-	 * a special driver mapping, we need to align the address before
-	 * we fetch the PTE
+	/* If we use 4K pages and our psize is not 4K, then we might
+	 * be hitting a special driver mapping, and need to align the
+	 * address before we fetch the PTE.
+	 *
+	 * It could also be a hugepage mapping, in which case this is
+	 * not necessary, but it's not harmful, either.
 	 */
 	if (psize != MMU_PAGE_4K)
 		ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 #endif /* CONFIG_PPC_64K_PAGES */
 
 	/* Get PTE and page size from page tables */
-	ptep = find_linux_pte(pgdir, ea);
+	ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
 	if (ptep == NULL || !pte_present(*ptep)) {
 		DBG_LOW(" no PTE !\n");
 		return 1;
 	}
 
+#ifdef CONFIG_HUGETLB_PAGE
+	if (hugeshift)
+		return __hash_page_huge(ea, access, vsid, ptep, trap, local,
+					ssize, hugeshift, psize);
+#endif /* CONFIG_HUGETLB_PAGE */
+
 #ifndef CONFIG_PPC_64K_PAGES
 	DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
 #else
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
new file mode 100644
index 0000000..1995398
--- /dev/null
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -0,0 +1,139 @@
+/*
+ * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * Based on the IA-32 version:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+
+int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
+		     pte_t *ptep, unsigned long trap, int local, int ssize,
+		     unsigned int shift, unsigned int mmu_psize)
+{
+	unsigned long old_pte, new_pte;
+	unsigned long va, rflags, pa, sz;
+	long slot;
+	int err = 1;
+
+	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
+
+	/* Search the Linux page table for a match with va */
+	va = hpt_va(ea, vsid, ssize);
+
+	/*
+	 * Check the user's access rights to the page.  If access should be
+	 * prevented then send the problem up to do_page_fault.
+	 */
+	if (unlikely(access & ~pte_val(*ptep)))
+		goto out;
+	/*
+	 * At this point, we have a pte (old_pte) which can be used to build
+	 * or update an HPTE. There are 2 cases:
+	 *
+	 * 1. There is a valid (present) pte with no associated HPTE (this is
+	 *	the most common case)
+	 * 2. There is a valid (present) pte with an associated HPTE. The
+	 *	current values of the pp bits in the HPTE prevent access
+	 *	because we are doing software DIRTY bit management and the
+	 *	page is currently not DIRTY.
+	 */
+
+
+	do {
+		old_pte = pte_val(*ptep);
+		if (old_pte & _PAGE_BUSY)
+			goto out;
+		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
+	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
+					 old_pte, new_pte));
+
+	rflags = 0x2 | (!(new_pte & _PAGE_RW));
+	/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
+	rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
+	sz = ((1UL) << shift);
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		/* No CPU has hugepages but lacks no execute, so we
+		 * don't need to worry about that case */
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+	/* Check if pte already has an hpte (case 2) */
+	if (unlikely(old_pte & _PAGE_HASHPTE)) {
+		/* There MIGHT be an HPTE for this pte */
+		unsigned long hash, slot;
+
+		hash = hpt_hash(va, shift, ssize);
+		if (old_pte & _PAGE_F_SECOND)
+			hash = ~hash;
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += (old_pte & _PAGE_F_GIX) >> 12;
+
+		if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
+					 ssize, local) == -1)
+			old_pte &= ~_PAGE_HPTEFLAGS;
+	}
+
+	if (likely(!(old_pte & _PAGE_HASHPTE))) {
+		unsigned long hash = hpt_hash(va, shift, ssize);
+		unsigned long hpte_group;
+
+		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+
+repeat:
+		hpte_group = ((hash & htab_hash_mask) *
+			      HPTES_PER_GROUP) & ~0x7UL;
+
+		/* clear HPTE slot informations in new PTE */
+#ifdef CONFIG_PPC_64K_PAGES
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
+#else
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+#endif
+		/* Add in WIMG bits */
+		rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
+				      _PAGE_COHERENT | _PAGE_GUARDED));
+
+		/* Insert into the hash table, primary slot */
+		slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+					  mmu_psize, ssize);
+
+		/* Primary is full, try the secondary */
+		if (unlikely(slot == -1)) {
+			hpte_group = ((~hash & htab_hash_mask) *
+				      HPTES_PER_GROUP) & ~0x7UL;
+			slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
+						  HPTE_V_SECONDARY,
+						  mmu_psize, ssize);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = ((hash & htab_hash_mask) *
+						      HPTES_PER_GROUP)&~0x7UL;
+
+				ppc_md.hpte_remove(hpte_group);
+				goto repeat;
+                        }
+		}
+
+		if (unlikely(slot == -2))
+			panic("hash_huge_page: pte_insert failed\n");
+
+		new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+	}
+
+	/*
+	 * No need to use ldarx/stdcx here
+	 */
+	*ptep = __pte(new_pte & ~_PAGE_BUSY);
+
+	err = 0;
+
+ out:
+	return err;
+}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 90df6ff..123f707 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -7,29 +7,17 @@
  * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
  */
 
-#include <linux/init.h>
-#include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/io.h>
 #include <linux/hugetlb.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/sysctl.h>
-#include <asm/mman.h>
+#include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/machdep.h>
-#include <asm/cputable.h>
-#include <asm/spu.h>
 
 #define PAGE_SHIFT_64K	16
 #define PAGE_SHIFT_16M	24
 #define PAGE_SHIFT_16G	34
 
-#define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
-#define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
 #define MAX_NUMBER_GPAGES	1024
 
 /* Tracks the 16G pages after the device tree is scanned and before the
@@ -37,53 +25,17 @@
 static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
 static unsigned nr_gpages;
 
-/* Array of valid huge page sizes - non-zero value(hugepte_shift) is
- * stored for the huge page sizes that are valid.
- */
-unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
-
-#define hugepte_shift			mmu_huge_psizes
-#define PTRS_PER_HUGEPTE(psize)		(1 << hugepte_shift[psize])
-#define HUGEPTE_TABLE_SIZE(psize)	(sizeof(pte_t) << hugepte_shift[psize])
-
-#define HUGEPD_SHIFT(psize)		(mmu_psize_to_shift(psize) \
-						+ hugepte_shift[psize])
-#define HUGEPD_SIZE(psize)		(1UL << HUGEPD_SHIFT(psize))
-#define HUGEPD_MASK(psize)		(~(HUGEPD_SIZE(psize)-1))
-
-/* Subtract one from array size because we don't need a cache for 4K since
- * is not a huge page size */
-#define HUGE_PGTABLE_INDEX(psize)	(HUGEPTE_CACHE_NUM + psize - 1)
-#define HUGEPTE_CACHE_NAME(psize)	(huge_pgtable_cache_name[psize])
-
-static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
-	[MMU_PAGE_64K]	= "hugepte_cache_64K",
-	[MMU_PAGE_1M]	= "hugepte_cache_1M",
-	[MMU_PAGE_16M]	= "hugepte_cache_16M",
-	[MMU_PAGE_16G]	= "hugepte_cache_16G",
-};
-
 /* Flag to mark huge PD pointers.  This means pmd_bad() and pud_bad()
  * will choke on pointers to hugepte tables, which is handy for
  * catching screwups early. */
-#define HUGEPD_OK	0x1
-
-typedef struct { unsigned long pd; } hugepd_t;
-
-#define hugepd_none(hpd)	((hpd).pd == 0)
 
 static inline int shift_to_mmu_psize(unsigned int shift)
 {
-	switch (shift) {
-#ifndef CONFIG_PPC_64K_PAGES
-	case PAGE_SHIFT_64K:
-	    return MMU_PAGE_64K;
-#endif
-	case PAGE_SHIFT_16M:
-	    return MMU_PAGE_16M;
-	case PAGE_SHIFT_16G:
-	    return MMU_PAGE_16G;
-	}
+	int psize;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
+		if (mmu_psize_defs[psize].shift == shift)
+			return psize;
 	return -1;
 }
 
@@ -94,71 +46,126 @@
 	BUG();
 }
 
+#define hugepd_none(hpd)	((hpd).pd == 0)
+
 static inline pte_t *hugepd_page(hugepd_t hpd)
 {
-	BUG_ON(!(hpd.pd & HUGEPD_OK));
-	return (pte_t *)(hpd.pd & ~HUGEPD_OK);
+	BUG_ON(!hugepd_ok(hpd));
+	return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
 }
 
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
-				    struct hstate *hstate)
+static inline unsigned int hugepd_shift(hugepd_t hpd)
 {
-	unsigned int shift = huge_page_shift(hstate);
-	int psize = shift_to_mmu_psize(shift);
-	unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1));
+	return hpd.pd & HUGEPD_SHIFT_MASK;
+}
+
+static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
+{
+	unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
 	pte_t *dir = hugepd_page(*hpdp);
 
 	return dir + idx;
 }
 
-static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
-			   unsigned long address, unsigned int psize)
+pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
 {
-	pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)],
-				      GFP_KERNEL|__GFP_REPEAT);
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	hugepd_t *hpdp = NULL;
+	unsigned pdshift = PGDIR_SHIFT;
+
+	if (shift)
+		*shift = 0;
+
+	pg = pgdir + pgd_index(ea);
+	if (is_hugepd(pg)) {
+		hpdp = (hugepd_t *)pg;
+	} else if (!pgd_none(*pg)) {
+		pdshift = PUD_SHIFT;
+		pu = pud_offset(pg, ea);
+		if (is_hugepd(pu))
+			hpdp = (hugepd_t *)pu;
+		else if (!pud_none(*pu)) {
+			pdshift = PMD_SHIFT;
+			pm = pmd_offset(pu, ea);
+			if (is_hugepd(pm))
+				hpdp = (hugepd_t *)pm;
+			else if (!pmd_none(*pm)) {
+				return pte_offset_map(pm, ea);
+			}
+		}
+	}
+
+	if (!hpdp)
+		return NULL;
+
+	if (shift)
+		*shift = hugepd_shift(*hpdp);
+	return hugepte_offset(hpdp, ea, pdshift);
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
+}
+
+static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
+			   unsigned long address, unsigned pdshift, unsigned pshift)
+{
+	pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift),
+				       GFP_KERNEL|__GFP_REPEAT);
+
+	BUG_ON(pshift > HUGEPD_SHIFT_MASK);
+	BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
 
 	if (! new)
 		return -ENOMEM;
 
 	spin_lock(&mm->page_table_lock);
 	if (!hugepd_none(*hpdp))
-		kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new);
+		kmem_cache_free(PGT_CACHE(pdshift - pshift), new);
 	else
-		hpdp->pd = (unsigned long)new | HUGEPD_OK;
+		hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift;
 	spin_unlock(&mm->page_table_lock);
 	return 0;
 }
 
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	hugepd_t *hpdp = NULL;
+	unsigned pshift = __ffs(sz);
+	unsigned pdshift = PGDIR_SHIFT;
 
-static pud_t *hpud_offset(pgd_t *pgd, unsigned long addr, struct hstate *hstate)
-{
-	if (huge_page_shift(hstate) < PUD_SHIFT)
-		return pud_offset(pgd, addr);
-	else
-		return (pud_t *) pgd;
-}
-static pud_t *hpud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr,
-			 struct hstate *hstate)
-{
-	if (huge_page_shift(hstate) < PUD_SHIFT)
-		return pud_alloc(mm, pgd, addr);
-	else
-		return (pud_t *) pgd;
-}
-static pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate)
-{
-	if (huge_page_shift(hstate) < PMD_SHIFT)
-		return pmd_offset(pud, addr);
-	else
-		return (pmd_t *) pud;
-}
-static pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr,
-			 struct hstate *hstate)
-{
-	if (huge_page_shift(hstate) < PMD_SHIFT)
-		return pmd_alloc(mm, pud, addr);
-	else
-		return (pmd_t *) pud;
+	addr &= ~(sz-1);
+
+	pg = pgd_offset(mm, addr);
+	if (pshift >= PUD_SHIFT) {
+		hpdp = (hugepd_t *)pg;
+	} else {
+		pdshift = PUD_SHIFT;
+		pu = pud_alloc(mm, pg, addr);
+		if (pshift >= PMD_SHIFT) {
+			hpdp = (hugepd_t *)pu;
+		} else {
+			pdshift = PMD_SHIFT;
+			pm = pmd_alloc(mm, pu, addr);
+			hpdp = (hugepd_t *)pm;
+		}
+	}
+
+	if (!hpdp)
+		return NULL;
+
+	BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
+
+	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
+		return NULL;
+
+	return hugepte_offset(hpdp, addr, pdshift);
 }
 
 /* Build list of addresses of gigantic pages.  This function is used in early
@@ -192,94 +199,38 @@
 	return 1;
 }
 
-
-/* Modelled after find_linux_pte() */
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pg;
-	pud_t *pu;
-	pmd_t *pm;
-
-	unsigned int psize;
-	unsigned int shift;
-	unsigned long sz;
-	struct hstate *hstate;
-	psize = get_slice_psize(mm, addr);
-	shift = mmu_psize_to_shift(psize);
-	sz = ((1UL) << shift);
-	hstate = size_to_hstate(sz);
-
-	addr &= hstate->mask;
-
-	pg = pgd_offset(mm, addr);
-	if (!pgd_none(*pg)) {
-		pu = hpud_offset(pg, addr, hstate);
-		if (!pud_none(*pu)) {
-			pm = hpmd_offset(pu, addr, hstate);
-			if (!pmd_none(*pm))
-				return hugepte_offset((hugepd_t *)pm, addr,
-						      hstate);
-		}
-	}
-
-	return NULL;
-}
-
-pte_t *huge_pte_alloc(struct mm_struct *mm,
-			unsigned long addr, unsigned long sz)
-{
-	pgd_t *pg;
-	pud_t *pu;
-	pmd_t *pm;
-	hugepd_t *hpdp = NULL;
-	struct hstate *hstate;
-	unsigned int psize;
-	hstate = size_to_hstate(sz);
-
-	psize = get_slice_psize(mm, addr);
-	BUG_ON(!mmu_huge_psizes[psize]);
-
-	addr &= hstate->mask;
-
-	pg = pgd_offset(mm, addr);
-	pu = hpud_alloc(mm, pg, addr, hstate);
-
-	if (pu) {
-		pm = hpmd_alloc(mm, pu, addr, hstate);
-		if (pm)
-			hpdp = (hugepd_t *)pm;
-	}
-
-	if (! hpdp)
-		return NULL;
-
-	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize))
-		return NULL;
-
-	return hugepte_offset(hpdp, addr, hstate);
-}
-
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 {
 	return 0;
 }
 
-static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
-			       unsigned int psize)
+static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
+			      unsigned long start, unsigned long end,
+			      unsigned long floor, unsigned long ceiling)
 {
 	pte_t *hugepte = hugepd_page(*hpdp);
+	unsigned shift = hugepd_shift(*hpdp);
+	unsigned long pdmask = ~((1UL << pdshift) - 1);
+
+	start &= pdmask;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= pdmask;
+		if (! ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
 
 	hpdp->pd = 0;
 	tlb->need_flush = 1;
-	pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
-						 HUGEPTE_CACHE_NUM+psize-1,
-						 PGF_CACHENUM_MASK));
+	pgtable_free_tlb(tlb, hugepte, pdshift - shift);
 }
 
 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 				   unsigned long addr, unsigned long end,
-				   unsigned long floor, unsigned long ceiling,
-				   unsigned int psize)
+				   unsigned long floor, unsigned long ceiling)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -291,7 +242,8 @@
 		next = pmd_addr_end(addr, end);
 		if (pmd_none(*pmd))
 			continue;
-		free_hugepte_range(tlb, (hugepd_t *)pmd, psize);
+		free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
+				  addr, next, floor, ceiling);
 	} while (pmd++, addr = next, addr != end);
 
 	start &= PUD_MASK;
@@ -317,23 +269,19 @@
 	pud_t *pud;
 	unsigned long next;
 	unsigned long start;
-	unsigned int shift;
-	unsigned int psize = get_slice_psize(tlb->mm, addr);
-	shift = mmu_psize_to_shift(psize);
 
 	start = addr;
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
-		if (shift < PMD_SHIFT) {
+		if (!is_hugepd(pud)) {
 			if (pud_none_or_clear_bad(pud))
 				continue;
 			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
-					       ceiling, psize);
+					       ceiling);
 		} else {
-			if (pud_none(*pud))
-				continue;
-			free_hugepte_range(tlb, (hugepd_t *)pud, psize);
+			free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
+					  addr, next, floor, ceiling);
 		}
 	} while (pud++, addr = next, addr != end);
 
@@ -364,121 +312,56 @@
 {
 	pgd_t *pgd;
 	unsigned long next;
-	unsigned long start;
 
 	/*
-	 * Comments below take from the normal free_pgd_range().  They
-	 * apply here too.  The tests against HUGEPD_MASK below are
-	 * essential, because we *don't* test for this at the bottom
-	 * level.  Without them we'll attempt to free a hugepte table
-	 * when we unmap just part of it, even if there are other
-	 * active mappings using it.
+	 * Because there are a number of different possible pagetable
+	 * layouts for hugepage ranges, we limit knowledge of how
+	 * things should be laid out to the allocation path
+	 * (huge_pte_alloc(), above).  Everything else works out the
+	 * structure as it goes from information in the hugepd
+	 * pointers.  That means that we can't here use the
+	 * optimization used in the normal page free_pgd_range(), of
+	 * checking whether we're actually covering a large enough
+	 * range to have to do anything at the top level of the walk
+	 * instead of at the bottom.
 	 *
-	 * The next few lines have given us lots of grief...
-	 *
-	 * Why are we testing HUGEPD* at this top level?  Because
-	 * often there will be no work to do at all, and we'd prefer
-	 * not to go all the way down to the bottom just to discover
-	 * that.
-	 *
-	 * Why all these "- 1"s?  Because 0 represents both the bottom
-	 * of the address space and the top of it (using -1 for the
-	 * top wouldn't help much: the masks would do the wrong thing).
-	 * The rule is that addr 0 and floor 0 refer to the bottom of
-	 * the address space, but end 0 and ceiling 0 refer to the top
-	 * Comparisons need to use "end - 1" and "ceiling - 1" (though
-	 * that end 0 case should be mythical).
-	 *
-	 * Wherever addr is brought up or ceiling brought down, we
-	 * must be careful to reject "the opposite 0" before it
-	 * confuses the subsequent tests.  But what about where end is
-	 * brought down by HUGEPD_SIZE below? no, end can't go down to
-	 * 0 there.
-	 *
-	 * Whereas we round start (addr) and ceiling down, by different
-	 * masks at different levels, in order to test whether a table
-	 * now has no other vmas using it, so can be freed, we don't
-	 * bother to round floor or end up - the tests don't need that.
+	 * To make sense of this, you should probably go read the big
+	 * block comment at the top of the normal free_pgd_range(),
+	 * too.
 	 */
-	unsigned int psize = get_slice_psize(tlb->mm, addr);
 
-	addr &= HUGEPD_MASK(psize);
-	if (addr < floor) {
-		addr += HUGEPD_SIZE(psize);
-		if (!addr)
-			return;
-	}
-	if (ceiling) {
-		ceiling &= HUGEPD_MASK(psize);
-		if (!ceiling)
-			return;
-	}
-	if (end - 1 > ceiling - 1)
-		end -= HUGEPD_SIZE(psize);
-	if (addr > end - 1)
-		return;
-
-	start = addr;
 	pgd = pgd_offset(tlb->mm, addr);
 	do {
-		psize = get_slice_psize(tlb->mm, addr);
-		BUG_ON(!mmu_huge_psizes[psize]);
 		next = pgd_addr_end(addr, end);
-		if (mmu_psize_to_shift(psize) < PUD_SHIFT) {
+		if (!is_hugepd(pgd)) {
 			if (pgd_none_or_clear_bad(pgd))
 				continue;
 			hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
 		} else {
-			if (pgd_none(*pgd))
-				continue;
-			free_hugepte_range(tlb, (hugepd_t *)pgd, psize);
+			free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
+					  addr, next, floor, ceiling);
 		}
 	} while (pgd++, addr = next, addr != end);
 }
 
-void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-		     pte_t *ptep, pte_t pte)
-{
-	if (pte_present(*ptep)) {
-		/* We open-code pte_clear because we need to pass the right
-		 * argument to hpte_need_flush (huge / !huge). Might not be
-		 * necessary anymore if we make hpte_need_flush() get the
-		 * page size from the slices
-		 */
-		unsigned int psize = get_slice_psize(mm, addr);
-		unsigned int shift = mmu_psize_to_shift(psize);
-		unsigned long sz = ((1UL) << shift);
-		struct hstate *hstate = size_to_hstate(sz);
-		pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1);
-	}
-	*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
-}
-
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep)
-{
-	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
-	return __pte(old);
-}
-
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 {
 	pte_t *ptep;
 	struct page *page;
-	unsigned int mmu_psize = get_slice_psize(mm, address);
+	unsigned shift;
+	unsigned long mask;
+
+	ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
 
 	/* Verify it is a huge page else bail. */
-	if (!mmu_huge_psizes[mmu_psize])
+	if (!ptep || !shift)
 		return ERR_PTR(-EINVAL);
 
-	ptep = huge_pte_offset(mm, address);
+	mask = (1UL << shift) - 1;
 	page = pte_page(*ptep);
-	if (page) {
-		unsigned int shift = mmu_psize_to_shift(mmu_psize);
-		unsigned long sz = ((1UL) << shift);
-		page += (address % sz) / PAGE_SIZE;
-	}
+	if (page)
+		page += (address & mask) / PAGE_SIZE;
 
 	return page;
 }
@@ -501,6 +384,82 @@
 	return NULL;
 }
 
+static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+		       unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long mask;
+	unsigned long pte_end;
+	struct page *head, *page;
+	pte_t pte;
+	int refs;
+
+	pte_end = (addr + sz) & ~(sz-1);
+	if (pte_end < end)
+		end = pte_end;
+
+	pte = *ptep;
+	mask = _PAGE_PRESENT | _PAGE_USER;
+	if (write)
+		mask |= _PAGE_RW;
+
+	if ((pte_val(pte) & mask) != mask)
+		return 0;
+
+	/* hugepages are never "special" */
+	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+	refs = 0;
+	head = pte_page(pte);
+
+	page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
+	do {
+		VM_BUG_ON(compound_head(page) != head);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+		/* Could be optimized better */
+		while (*nr) {
+			put_page(page);
+			(*nr)--;
+		}
+	}
+
+	return 1;
+}
+
+static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
+				      unsigned long sz)
+{
+	unsigned long __boundary = (addr + sz) & ~(sz-1);
+	return (__boundary - 1 < end - 1) ? __boundary : end;
+}
+
+int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
+	       unsigned long addr, unsigned long end,
+	       int write, struct page **pages, int *nr)
+{
+	pte_t *ptep;
+	unsigned long sz = 1UL << hugepd_shift(*hugepd);
+	unsigned long next;
+
+	ptep = hugepte_offset(hugepd, addr, pdshift);
+	do {
+		next = hugepte_addr_end(addr, end, sz);
+		if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+			return 0;
+	} while (ptep++, addr = next, addr != end);
+
+	return 1;
+}
 
 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 					unsigned long len, unsigned long pgoff,
@@ -509,8 +468,6 @@
 	struct hstate *hstate = hstate_file(file);
 	int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
 
-	if (!mmu_huge_psizes[mmu_psize])
-		return -EINVAL;
 	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
 }
 
@@ -521,229 +478,46 @@
 	return 1UL << mmu_psize_to_shift(psize);
 }
 
-/*
- * Called by asm hashtable.S for doing lazy icache flush
- */
-static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
-					pte_t pte, int trap, unsigned long sz)
+static int __init add_huge_page_size(unsigned long long size)
 {
-	struct page *page;
-	int i;
+	int shift = __ffs(size);
+	int mmu_psize;
 
-	if (!pfn_valid(pte_pfn(pte)))
-		return rflags;
-
-	page = pte_page(pte);
-
-	/* page is dirty */
-	if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
-		if (trap == 0x400) {
-			for (i = 0; i < (sz / PAGE_SIZE); i++)
-				__flush_dcache_icache(page_address(page+i));
-			set_bit(PG_arch_1, &page->flags);
-		} else {
-			rflags |= HPTE_R_N;
-		}
-	}
-	return rflags;
-}
-
-int hash_huge_page(struct mm_struct *mm, unsigned long access,
-		   unsigned long ea, unsigned long vsid, int local,
-		   unsigned long trap)
-{
-	pte_t *ptep;
-	unsigned long old_pte, new_pte;
-	unsigned long va, rflags, pa, sz;
-	long slot;
-	int err = 1;
-	int ssize = user_segment_size(ea);
-	unsigned int mmu_psize;
-	int shift;
-	mmu_psize = get_slice_psize(mm, ea);
-
-	if (!mmu_huge_psizes[mmu_psize])
-		goto out;
-	ptep = huge_pte_offset(mm, ea);
-
-	/* Search the Linux page table for a match with va */
-	va = hpt_va(ea, vsid, ssize);
-
-	/*
-	 * If no pte found or not present, send the problem up to
-	 * do_page_fault
-	 */
-	if (unlikely(!ptep || pte_none(*ptep)))
-		goto out;
-
-	/* 
-	 * Check the user's access rights to the page.  If access should be
-	 * prevented then send the problem up to do_page_fault.
-	 */
-	if (unlikely(access & ~pte_val(*ptep)))
-		goto out;
-	/*
-	 * At this point, we have a pte (old_pte) which can be used to build
-	 * or update an HPTE. There are 2 cases:
-	 *
-	 * 1. There is a valid (present) pte with no associated HPTE (this is 
-	 *	the most common case)
-	 * 2. There is a valid (present) pte with an associated HPTE. The
-	 *	current values of the pp bits in the HPTE prevent access
-	 *	because we are doing software DIRTY bit management and the
-	 *	page is currently not DIRTY. 
-	 */
-
-
-	do {
-		old_pte = pte_val(*ptep);
-		if (old_pte & _PAGE_BUSY)
-			goto out;
-		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
-	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
-					 old_pte, new_pte));
-
-	rflags = 0x2 | (!(new_pte & _PAGE_RW));
- 	/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
-	rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
-	shift = mmu_psize_to_shift(mmu_psize);
-	sz = ((1UL) << shift);
-	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
-		/* No CPU has hugepages but lacks no execute, so we
-		 * don't need to worry about that case */
-		rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
-						       trap, sz);
-
-	/* Check if pte already has an hpte (case 2) */
-	if (unlikely(old_pte & _PAGE_HASHPTE)) {
-		/* There MIGHT be an HPTE for this pte */
-		unsigned long hash, slot;
-
-		hash = hpt_hash(va, shift, ssize);
-		if (old_pte & _PAGE_F_SECOND)
-			hash = ~hash;
-		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-		slot += (old_pte & _PAGE_F_GIX) >> 12;
-
-		if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
-					 ssize, local) == -1)
-			old_pte &= ~_PAGE_HPTEFLAGS;
-	}
-
-	if (likely(!(old_pte & _PAGE_HASHPTE))) {
-		unsigned long hash = hpt_hash(va, shift, ssize);
-		unsigned long hpte_group;
-
-		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
-
-repeat:
-		hpte_group = ((hash & htab_hash_mask) *
-			      HPTES_PER_GROUP) & ~0x7UL;
-
-		/* clear HPTE slot informations in new PTE */
-#ifdef CONFIG_PPC_64K_PAGES
-		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
-#else
-		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
-#endif
-		/* Add in WIMG bits */
-		rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
-				      _PAGE_COHERENT | _PAGE_GUARDED));
-
-		/* Insert into the hash table, primary slot */
-		slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
-					  mmu_psize, ssize);
-
-		/* Primary is full, try the secondary */
-		if (unlikely(slot == -1)) {
-			hpte_group = ((~hash & htab_hash_mask) *
-				      HPTES_PER_GROUP) & ~0x7UL; 
-			slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
-						  HPTE_V_SECONDARY,
-						  mmu_psize, ssize);
-			if (slot == -1) {
-				if (mftb() & 0x1)
-					hpte_group = ((hash & htab_hash_mask) *
-						      HPTES_PER_GROUP)&~0x7UL;
-
-				ppc_md.hpte_remove(hpte_group);
-				goto repeat;
-                        }
-		}
-
-		if (unlikely(slot == -2))
-			panic("hash_huge_page: pte_insert failed\n");
-
-		new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
-	}
-
-	/*
-	 * No need to use ldarx/stdcx here
-	 */
-	*ptep = __pte(new_pte & ~_PAGE_BUSY);
-
-	err = 0;
-
- out:
-	return err;
-}
-
-static void __init set_huge_psize(int psize)
-{
 	/* Check that it is a page size supported by the hardware and
-	 * that it fits within pagetable limits. */
-	if (mmu_psize_defs[psize].shift &&
-		mmu_psize_defs[psize].shift < SID_SHIFT_1T &&
-		(mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
-		 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
-		 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
-		/* Return if huge page size has already been setup or is the
-		 * same as the base page size. */
-		if (mmu_huge_psizes[psize] ||
-		   mmu_psize_defs[psize].shift == PAGE_SHIFT)
-			return;
-		if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL))
-			return;
-		hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
+	 * that it fits within pagetable and slice limits. */
+	if (!is_power_of_2(size)
+	    || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
+		return -EINVAL;
 
-		switch (mmu_psize_defs[psize].shift) {
-		case PAGE_SHIFT_64K:
-		    /* We only allow 64k hpages with 4k base page,
-		     * which was checked above, and always put them
-		     * at the PMD */
-		    hugepte_shift[psize] = PMD_SHIFT;
-		    break;
-		case PAGE_SHIFT_16M:
-		    /* 16M pages can be at two different levels
-		     * of pagestables based on base page size */
-		    if (PAGE_SHIFT == PAGE_SHIFT_64K)
-			    hugepte_shift[psize] = PMD_SHIFT;
-		    else /* 4k base page */
-			    hugepte_shift[psize] = PUD_SHIFT;
-		    break;
-		case PAGE_SHIFT_16G:
-		    /* 16G pages are always at PGD level */
-		    hugepte_shift[psize] = PGDIR_SHIFT;
-		    break;
-		}
-		hugepte_shift[psize] -= mmu_psize_defs[psize].shift;
-	} else
-		hugepte_shift[psize] = 0;
+	if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
+		return -EINVAL;
+
+#ifdef CONFIG_SPU_FS_64K_LS
+	/* Disable support for 64K huge pages when 64K SPU local store
+	 * support is enabled as the current implementation conflicts.
+	 */
+	if (shift == PAGE_SHIFT_64K)
+		return -EINVAL;
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+	BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
+
+	/* Return if huge page size has already been setup */
+	if (size_to_hstate(size))
+		return 0;
+
+	hugetlb_add_hstate(shift - PAGE_SHIFT);
+
+	return 0;
 }
 
 static int __init hugepage_setup_sz(char *str)
 {
 	unsigned long long size;
-	int mmu_psize;
-	int shift;
 
 	size = memparse(str, &str);
 
-	shift = __ffs(size);
-	mmu_psize = shift_to_mmu_psize(shift);
-	if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
-		set_huge_psize(mmu_psize);
-	else
+	if (add_huge_page_size(size) != 0)
 		printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
 
 	return 1;
@@ -752,41 +526,55 @@
 
 static int __init hugetlbpage_init(void)
 {
-	unsigned int psize;
+	int psize;
 
 	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
 		return -ENODEV;
 
-	/* Add supported huge page sizes.  Need to change HUGE_MAX_HSTATE
-	 * and adjust PTE_NONCACHE_NUM if the number of supported huge page
-	 * sizes changes.
-	 */
-	set_huge_psize(MMU_PAGE_16M);
-	set_huge_psize(MMU_PAGE_16G);
-
-	/* Temporarily disable support for 64K huge pages when 64K SPU local
-	 * store support is enabled as the current implementation conflicts.
-	 */
-#ifndef CONFIG_SPU_FS_64K_LS
-	set_huge_psize(MMU_PAGE_64K);
-#endif
-
 	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
-		if (mmu_huge_psizes[psize]) {
-			pgtable_cache[HUGE_PGTABLE_INDEX(psize)] =
-				kmem_cache_create(
-					HUGEPTE_CACHE_NAME(psize),
-					HUGEPTE_TABLE_SIZE(psize),
-					HUGEPTE_TABLE_SIZE(psize),
-					0,
-					NULL);
-			if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
-				panic("hugetlbpage_init(): could not create %s"\
-				      "\n", HUGEPTE_CACHE_NAME(psize));
-		}
+		unsigned shift;
+		unsigned pdshift;
+
+		if (!mmu_psize_defs[psize].shift)
+			continue;
+
+		shift = mmu_psize_to_shift(psize);
+
+		if (add_huge_page_size(1ULL << shift) < 0)
+			continue;
+
+		if (shift < PMD_SHIFT)
+			pdshift = PMD_SHIFT;
+		else if (shift < PUD_SHIFT)
+			pdshift = PUD_SHIFT;
+		else
+			pdshift = PGDIR_SHIFT;
+
+		pgtable_cache_add(pdshift - shift, NULL);
+		if (!PGT_CACHE(pdshift - shift))
+			panic("hugetlbpage_init(): could not create "
+			      "pgtable cache for %d bit pagesize\n", shift);
 	}
 
+	/* Set default large page size. Currently, we pick 16M or 1M
+	 * depending on what is available
+	 */
+	if (mmu_psize_defs[MMU_PAGE_16M].shift)
+		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
+	else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+		HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
+
 	return 0;
 }
 
 module_init(hugetlbpage_init);
+
+void flush_dcache_icache_hugepage(struct page *page)
+{
+	int i;
+
+	BUG_ON(!PageCompound(page));
+
+	for (i = 0; i < (1UL << compound_order(page)); i++)
+		__flush_dcache_icache(page_address(page+i));
+}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 335c578..776f28d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -41,6 +41,7 @@
 #include <linux/module.h>
 #include <linux/poison.h>
 #include <linux/lmb.h>
+#include <linux/hugetlb.h>
 
 #include <asm/pgalloc.h>
 #include <asm/page.h>
@@ -119,30 +120,63 @@
 	memset(addr, 0, PMD_TABLE_SIZE);
 }
 
-static const unsigned int pgtable_cache_size[2] = {
-	PGD_TABLE_SIZE, PMD_TABLE_SIZE
-};
-static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
-#ifdef CONFIG_PPC_64K_PAGES
-	"pgd_cache", "pmd_cache",
-#else
-	"pgd_cache", "pud_pmd_cache",
-#endif /* CONFIG_PPC_64K_PAGES */
-};
+struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
 
-#ifdef CONFIG_HUGETLB_PAGE
-/* Hugepages need an extra cache per hugepagesize, initialized in
- * hugetlbpage.c.  We can't put into the tables above, because HPAGE_SHIFT
- * is not compile time constant. */
-struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];
-#else
-struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
-#endif
+/*
+ * Create a kmem_cache() for pagetables.  This is not used for PTE
+ * pages - they're linked to struct page, come from the normal free
+ * pages pool and have a different entry size (see real_pte_t) to
+ * everything else.  Caches created by this function are used for all
+ * the higher level pagetables, and for hugepage pagetables.
+ */
+void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
+{
+	char *name;
+	unsigned long table_size = sizeof(void *) << shift;
+	unsigned long align = table_size;
+
+	/* When batching pgtable pointers for RCU freeing, we store
+	 * the index size in the low bits.  Table alignment must be
+	 * big enough to fit it.
+	 *
+	 * Likewise, hugeapge pagetable pointers contain a (different)
+	 * shift value in the low bits.  All tables must be aligned so
+	 * as to leave enough 0 bits in the address to contain it. */
+	unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
+				     HUGEPD_SHIFT_MASK + 1);
+	struct kmem_cache *new;
+
+	/* It would be nice if this was a BUILD_BUG_ON(), but at the
+	 * moment, gcc doesn't seem to recognize is_power_of_2 as a
+	 * constant expression, so so much for that. */
+	BUG_ON(!is_power_of_2(minalign));
+	BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
+
+	if (PGT_CACHE(shift))
+		return; /* Already have a cache of this size */
+
+	align = max_t(unsigned long, align, minalign);
+	name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
+	new = kmem_cache_create(name, table_size, align, 0, ctor);
+	PGT_CACHE(shift) = new;
+
+	pr_debug("Allocated pgtable cache for order %d\n", shift);
+}
+
 
 void pgtable_cache_init(void)
 {
-	pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor);
-	pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor);
+	pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
+	pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
+	if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
+		panic("Couldn't allocate pgtable caches");
+
+	/* In all current configs, when the PUD index exists it's the
+	 * same size as either the pgd or pmd index.  Verify that the
+	 * initialization above has also created a PUD cache.  This
+	 * will need re-examiniation if we add new possibilities for
+	 * the pagetable layout. */
+	BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5973631..b9b1525 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -32,6 +32,7 @@
 #include <linux/pagemap.h>
 #include <linux/suspend.h>
 #include <linux/lmb.h>
+#include <linux/hugetlb.h>
 
 #include <asm/pgalloc.h>
 #include <asm/prom.h>
@@ -417,18 +418,26 @@
 
 void flush_dcache_icache_page(struct page *page)
 {
+#ifdef CONFIG_HUGETLB_PAGE
+	if (PageCompound(page)) {
+		flush_dcache_icache_hugepage(page);
+		return;
+	}
+#endif
 #ifdef CONFIG_BOOKE
-	void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE);
-	__flush_dcache_icache(start);
-	kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
+	{
+		void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE);
+		__flush_dcache_icache(start);
+		kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
+	}
 #elif defined(CONFIG_8xx) || defined(CONFIG_PPC64)
 	/* On 8xx there is no need to kmap since highmem is not supported */
 	__flush_dcache_icache(page_address(page)); 
 #else
 	__flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
 #endif
-
 }
+
 void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
 {
 	clear_page(page);
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index dbeb86a..b910d37 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/spinlock.h>
 #include <linux/idr.h>
+#include <linux/module.h>
 
 #include <asm/mmu_context.h>
 
@@ -32,7 +33,7 @@
 #define NO_CONTEXT	0
 #define MAX_CONTEXT	((1UL << 19) - 1)
 
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+int __init_new_context(void)
 {
 	int index;
 	int err;
@@ -57,22 +58,41 @@
 		return -ENOMEM;
 	}
 
+	return index;
+}
+EXPORT_SYMBOL_GPL(__init_new_context);
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int index;
+
+	index = __init_new_context();
+	if (index < 0)
+		return index;
+
 	/* The old code would re-promote on fork, we don't do that
 	 * when using slices as it could cause problem promoting slices
 	 * that have been forced down to 4K
 	 */
 	if (slice_mm_new_context(mm))
 		slice_set_user_psize(mm, mmu_virtual_psize);
+	subpage_prot_init_new_context(mm);
 	mm->context.id = index;
 
 	return 0;
 }
 
-void destroy_context(struct mm_struct *mm)
+void __destroy_context(int context_id)
 {
 	spin_lock(&mmu_context_lock);
-	idr_remove(&mmu_context_idr, mm->context.id);
+	idr_remove(&mmu_context_idr, context_id);
 	spin_unlock(&mmu_context_lock);
+}
+EXPORT_SYMBOL_GPL(__destroy_context);
 
+void destroy_context(struct mm_struct *mm)
+{
+	__destroy_context(mm->context.id);
+	subpage_prot_free(mm);
 	mm->context.id = NO_CONTEXT;
 }
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index d2e5321..e27a990 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -98,21 +98,10 @@
 
 #ifdef CONFIG_PPC32
 
-struct tlbcam {
-	u32	MAS0;
-	u32	MAS1;
-	u32	MAS2;
-	u32	MAS3;
-	u32	MAS7;
-};
-
 extern void mapin_ram(void);
 extern int map_page(unsigned long va, phys_addr_t pa, int flags);
 extern void setbat(int index, unsigned long virt, phys_addr_t phys,
 		   unsigned int size, int flags);
-extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
-		      unsigned int size, int flags, unsigned int pid);
-extern void invalidate_tlbcam_entry(int index);
 
 extern int __map_without_bats;
 extern unsigned long ioremap_base;
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 5304093..99df697 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -49,12 +49,12 @@
 {
 	struct rcu_head	rcu;
 	unsigned int	index;
-	pgtable_free_t	tables[0];
+	unsigned long	tables[0];
 };
 
 #define PTE_FREELIST_SIZE \
 	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
-	  / sizeof(pgtable_free_t))
+	  / sizeof(unsigned long))
 
 static void pte_free_smp_sync(void *arg)
 {
@@ -64,13 +64,13 @@
 /* This is only called when we are critically out of memory
  * (and fail to get a page in pte_free_tlb).
  */
-static void pgtable_free_now(pgtable_free_t pgf)
+static void pgtable_free_now(void *table, unsigned shift)
 {
 	pte_freelist_forced_free++;
 
 	smp_call_function(pte_free_smp_sync, NULL, 1);
 
-	pgtable_free(pgf);
+	pgtable_free(table, shift);
 }
 
 static void pte_free_rcu_callback(struct rcu_head *head)
@@ -79,8 +79,12 @@
 		container_of(head, struct pte_freelist_batch, rcu);
 	unsigned int i;
 
-	for (i = 0; i < batch->index; i++)
-		pgtable_free(batch->tables[i]);
+	for (i = 0; i < batch->index; i++) {
+		void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
+		unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
+
+		pgtable_free(table, shift);
+	}
 
 	free_page((unsigned long)batch);
 }
@@ -91,25 +95,28 @@
 	call_rcu(&batch->rcu, pte_free_rcu_callback);
 }
 
-void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
 {
 	/* This is safe since tlb_gather_mmu has disabled preemption */
 	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+	unsigned long pgf;
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 	    cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
-		pgtable_free(pgf);
+		pgtable_free(table, shift);
 		return;
 	}
 
 	if (*batchp == NULL) {
 		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
 		if (*batchp == NULL) {
-			pgtable_free_now(pgf);
+			pgtable_free_now(table, shift);
 			return;
 		}
 		(*batchp)->index = 0;
 	}
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf = (unsigned long)table | shift;
 	(*batchp)->tables[(*batchp)->index++] = pgf;
 	if ((*batchp)->index == PTE_FREELIST_SIZE) {
 		pte_free_submit(*batchp);
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 4cafc0c..a040b81 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -24,9 +24,9 @@
  * Also makes sure that the subpage_prot_table structure is
  * reinitialized for the next user.
  */
-void subpage_prot_free(pgd_t *pgd)
+void subpage_prot_free(struct mm_struct *mm)
 {
-	struct subpage_prot_table *spt = pgd_subpage_prot(pgd);
+	struct subpage_prot_table *spt = &mm->context.spt;
 	unsigned long i, j, addr;
 	u32 **p;
 
@@ -51,6 +51,13 @@
 	spt->maxaddr = 0;
 }
 
+void subpage_prot_init_new_context(struct mm_struct *mm)
+{
+	struct subpage_prot_table *spt = &mm->context.spt;
+
+	memset(spt, 0, sizeof(*spt));
+}
+
 static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
 			     int npages)
 {
@@ -87,7 +94,7 @@
 static void subpage_prot_clear(unsigned long addr, unsigned long len)
 {
 	struct mm_struct *mm = current->mm;
-	struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd);
+	struct subpage_prot_table *spt = &mm->context.spt;
 	u32 **spm, *spp;
 	int i, nw;
 	unsigned long next, limit;
@@ -136,7 +143,7 @@
 long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
 {
 	struct mm_struct *mm = current->mm;
-	struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd);
+	struct subpage_prot_table *spt = &mm->context.spt;
 	u32 **spm, *spp;
 	int i, nw;
 	unsigned long next, limit;
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 2b2f35f..282d930 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -53,11 +53,6 @@
 
 	i = batch->index;
 
-	/* We mask the address for the base page size. Huge pages will
-	 * have applied their own masking already
-	 */
-	addr &= PAGE_MASK;
-
 	/* Get page size (maybe move back to caller).
 	 *
 	 * NOTE: when using special 64K mappings in 4K environment like
@@ -75,6 +70,9 @@
 	} else
 		psize = pte_pagesize_index(mm, addr, pte);
 
+	/* Mask the address for the correct page size */
+	addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+
 	/* Build full vaddr */
 	if (!is_kernel_addr(addr)) {
 		ssize = user_segment_size(addr);
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index a6ce805..da9b20a 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -79,7 +79,7 @@
 }
 
 static struct irq_chip cpld_pic = {
-	.typename = " CPLD PIC ",
+	.name = " CPLD PIC ",
 	.mask = cpld_mask_irq,
 	.ack = cpld_mask_irq,
 	.unmask = cpld_unmask_irq,
@@ -132,7 +132,7 @@
 cpld_pic_host_map(struct irq_host *h, unsigned int virq,
 			     irq_hw_number_t hw)
 {
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, &cpld_pic, handle_level_irq);
 	return 0;
 }
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index 8b8e956..47ea1be 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -62,3 +62,8 @@
 	select GENERIC_GPIO
 	help
 	  Enable gpiolib support for mpc5200 based boards
+
+config PPC_MPC5200_LPBFIFO
+	tristate "MPC5200 LocalPlus bus FIFO driver"
+	depends on PPC_MPC52xx
+	select PPC_BESTCOMM_GEN_BD
diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile
index bfd4f52..2bc8cd0 100644
--- a/arch/powerpc/platforms/52xx/Makefile
+++ b/arch/powerpc/platforms/52xx/Makefile
@@ -15,3 +15,4 @@
 endif
 
 obj-$(CONFIG_PPC_MPC5200_GPIO)	+= mpc52xx_gpio.o
+obj-$(CONFIG_PPC_MPC5200_LPBFIFO)	+= mpc52xx_lpbfifo.o
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 68e4f16..cc0c854 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -74,7 +74,7 @@
 }
 
 static struct irq_chip media5200_irq_chip = {
-	.typename = "Media5200 FPGA",
+	.name = "Media5200 FPGA",
 	.unmask = media5200_irq_unmask,
 	.mask = media5200_irq_mask,
 	.mask_ack = media5200_irq_mask,
@@ -114,7 +114,7 @@
 static int media5200_irq_map(struct irq_host *h, unsigned int virq,
 			     irq_hw_number_t hw)
 {
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 
 	pr_debug("%s: h=%p, virq=%i, hwirq=%i\n", __func__, h, virq, (int)hw);
 	set_irq_chip_data(virq, &media5200_irq);
@@ -127,7 +127,7 @@
 }
 
 static int media5200_irq_xlate(struct irq_host *h, struct device_node *ct,
-				 u32 *intspec, unsigned int intsize,
+				 const u32 *intspec, unsigned int intsize,
 				 irq_hw_number_t *out_hwirq,
 				 unsigned int *out_flags)
 {
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index bfbcd41..6f8ebe1 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -16,8 +16,14 @@
  * output signals or measure input signals.
  *
  * This driver supports the GPIO and IRQ controller functions of the GPT
- * device.  Timer functions are not yet supported, nor is the watchdog
- * timer.
+ * device.  Timer functions are not yet supported.
+ *
+ * The timer gpt0 can be used as watchdog (wdt).  If the wdt mode is used,
+ * this prevents the use of any gpt0 gpt function (i.e. they will fail with
+ * -EBUSY).  Thus, the safety wdt function always has precedence over the gpt
+ * function.  If the kernel has been compiled with CONFIG_WATCHDOG_NOWAYOUT,
+ * this means that gpt0 is locked in wdt mode until the next reboot - this
+ * may be a requirement in safety applications.
  *
  * To use the GPIO function, the following two properties must be added
  * to the device tree node for the gpt device (typically in the .dts file
@@ -46,17 +52,24 @@
  * the output mode.  This driver does not change the output mode setting.
  */
 
+#include <linux/device.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
 #include <linux/kernel.h>
+#include <linux/watchdog.h>
+#include <linux/miscdevice.h>
+#include <linux/uaccess.h>
+#include <asm/div64.h>
 #include <asm/mpc52xx.h>
 
 MODULE_DESCRIPTION("Freescale MPC52xx gpt driver");
-MODULE_AUTHOR("Sascha Hauer, Grant Likely");
+MODULE_AUTHOR("Sascha Hauer, Grant Likely, Albrecht Dreß");
 MODULE_LICENSE("GPL");
 
 /**
@@ -66,18 +79,27 @@
  * @lock: spinlock to coordinate between different functions.
  * @of_gc: of_gpio_chip instance structure; used when GPIO is enabled
  * @irqhost: Pointer to irq_host instance; used when IRQ mode is supported
+ * @wdt_mode: only relevant for gpt0: bit 0 (MPC52xx_GPT_CAN_WDT) indicates
+ *   if the gpt may be used as wdt, bit 1 (MPC52xx_GPT_IS_WDT) indicates
+ *   if the timer is actively used as wdt which blocks gpt functions
  */
 struct mpc52xx_gpt_priv {
+	struct list_head list;		/* List of all GPT devices */
 	struct device *dev;
 	struct mpc52xx_gpt __iomem *regs;
 	spinlock_t lock;
 	struct irq_host *irqhost;
+	u32 ipb_freq;
+	u8 wdt_mode;
 
 #if defined(CONFIG_GPIOLIB)
 	struct of_gpio_chip of_gc;
 #endif
 };
 
+LIST_HEAD(mpc52xx_gpt_list);
+DEFINE_MUTEX(mpc52xx_gpt_list_mutex);
+
 #define MPC52xx_GPT_MODE_MS_MASK	(0x07)
 #define MPC52xx_GPT_MODE_MS_IC		(0x01)
 #define MPC52xx_GPT_MODE_MS_OC		(0x02)
@@ -88,15 +110,25 @@
 #define MPC52xx_GPT_MODE_GPIO_OUT_LOW	(0x20)
 #define MPC52xx_GPT_MODE_GPIO_OUT_HIGH	(0x30)
 
+#define MPC52xx_GPT_MODE_COUNTER_ENABLE	(0x1000)
+#define MPC52xx_GPT_MODE_CONTINUOUS	(0x0400)
+#define MPC52xx_GPT_MODE_OPEN_DRAIN	(0x0200)
 #define MPC52xx_GPT_MODE_IRQ_EN		(0x0100)
+#define MPC52xx_GPT_MODE_WDT_EN		(0x8000)
 
 #define MPC52xx_GPT_MODE_ICT_MASK	(0x030000)
 #define MPC52xx_GPT_MODE_ICT_RISING	(0x010000)
 #define MPC52xx_GPT_MODE_ICT_FALLING	(0x020000)
 #define MPC52xx_GPT_MODE_ICT_TOGGLE	(0x030000)
 
+#define MPC52xx_GPT_MODE_WDT_PING	(0xa5)
+
 #define MPC52xx_GPT_STATUS_IRQMASK	(0x000f)
 
+#define MPC52xx_GPT_CAN_WDT		(1 << 0)
+#define MPC52xx_GPT_IS_WDT		(1 << 1)
+
+
 /* ---------------------------------------------------------------------
  * Cascaded interrupt controller hooks
  */
@@ -149,7 +181,7 @@
 }
 
 static struct irq_chip mpc52xx_gpt_irq_chip = {
-	.typename = "MPC52xx GPT",
+	.name = "MPC52xx GPT",
 	.unmask = mpc52xx_gpt_irq_unmask,
 	.mask = mpc52xx_gpt_irq_mask,
 	.ack = mpc52xx_gpt_irq_ack,
@@ -182,7 +214,7 @@
 }
 
 static int mpc52xx_gpt_irq_xlate(struct irq_host *h, struct device_node *ct,
-				 u32 *intspec, unsigned int intsize,
+				 const u32 *intspec, unsigned int intsize,
 				 irq_hw_number_t *out_hwirq,
 				 unsigned int *out_flags)
 {
@@ -190,7 +222,7 @@
 
 	dev_dbg(gpt->dev, "%s: flags=%i\n", __func__, intspec[0]);
 
-	if ((intsize < 1) || (intspec[0] < 1) || (intspec[0] > 3)) {
+	if ((intsize < 1) || (intspec[0] > 3)) {
 		dev_err(gpt->dev, "bad irq specifier in %s\n", ct->full_name);
 		return -EINVAL;
 	}
@@ -211,13 +243,11 @@
 {
 	int cascade_virq;
 	unsigned long flags;
-
-	/* Only setup cascaded IRQ if device tree claims the GPT is
-	 * an interrupt controller */
-	if (!of_find_property(node, "interrupt-controller", NULL))
-		return;
+	u32 mode;
 
 	cascade_virq = irq_of_parse_and_map(node, 0);
+	if (!cascade_virq)
+		return;
 
 	gpt->irqhost = irq_alloc_host(node, IRQ_HOST_MAP_LINEAR, 1,
 				      &mpc52xx_gpt_irq_ops, -1);
@@ -227,14 +257,16 @@
 	}
 
 	gpt->irqhost->host_data = gpt;
-
 	set_irq_data(cascade_virq, gpt);
 	set_irq_chained_handler(cascade_virq, mpc52xx_gpt_irq_cascade);
 
-	/* Set to Input Capture mode */
+	/* If the GPT is currently disabled, then change it to be in Input
+	 * Capture mode.  If the mode is non-zero, then the pin could be
+	 * already in use for something. */
 	spin_lock_irqsave(&gpt->lock, flags);
-	clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK,
-			MPC52xx_GPT_MODE_MS_IC);
+	mode = in_be32(&gpt->regs->mode);
+	if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0)
+		out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC);
 	spin_unlock_irqrestore(&gpt->lock, flags);
 
 	dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq);
@@ -335,6 +367,354 @@
 mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *p, struct device_node *np) { }
 #endif /* defined(CONFIG_GPIOLIB) */
 
+/***********************************************************************
+ * Timer API
+ */
+
+/**
+ * mpc52xx_gpt_from_irq - Return the GPT device associated with an IRQ number
+ * @irq: irq of timer.
+ */
+struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq)
+{
+	struct mpc52xx_gpt_priv *gpt;
+	struct list_head *pos;
+
+	/* Iterate over the list of timers looking for a matching device */
+	mutex_lock(&mpc52xx_gpt_list_mutex);
+	list_for_each(pos, &mpc52xx_gpt_list) {
+		gpt = container_of(pos, struct mpc52xx_gpt_priv, list);
+		if (gpt->irqhost && irq == irq_linear_revmap(gpt->irqhost, 0)) {
+			mutex_unlock(&mpc52xx_gpt_list_mutex);
+			return gpt;
+		}
+	}
+	mutex_unlock(&mpc52xx_gpt_list_mutex);
+
+	return NULL;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_from_irq);
+
+static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
+				int continuous, int as_wdt)
+{
+	u32 clear, set;
+	u64 clocks;
+	u32 prescale;
+	unsigned long flags;
+
+	clear = MPC52xx_GPT_MODE_MS_MASK | MPC52xx_GPT_MODE_CONTINUOUS;
+	set = MPC52xx_GPT_MODE_MS_GPIO | MPC52xx_GPT_MODE_COUNTER_ENABLE;
+	if (as_wdt) {
+		clear |= MPC52xx_GPT_MODE_IRQ_EN;
+		set |= MPC52xx_GPT_MODE_WDT_EN;
+	} else if (continuous)
+		set |= MPC52xx_GPT_MODE_CONTINUOUS;
+
+	/* Determine the number of clocks in the requested period.  64 bit
+	 * arithmatic is done here to preserve the precision until the value
+	 * is scaled back down into the u32 range.  Period is in 'ns', bus
+	 * frequency is in Hz. */
+	clocks = period * (u64)gpt->ipb_freq;
+	do_div(clocks, 1000000000); /* Scale it down to ns range */
+
+	/* This device cannot handle a clock count greater than 32 bits */
+	if (clocks > 0xffffffff)
+		return -EINVAL;
+
+	/* Calculate the prescaler and count values from the clocks value.
+	 * 'clocks' is the number of clock ticks in the period.  The timer
+	 * has 16 bit precision and a 16 bit prescaler.  Prescaler is
+	 * calculated by integer dividing the clocks by 0x10000 (shifting
+	 * down 16 bits) to obtain the smallest possible divisor for clocks
+	 * to get a 16 bit count value.
+	 *
+	 * Note: the prescale register is '1' based, not '0' based.  ie. a
+	 * value of '1' means divide the clock by one.  0xffff divides the
+	 * clock by 0xffff.  '0x0000' does not divide by zero, but wraps
+	 * around and divides by 0x10000.  That is why prescale must be
+	 * a u32 variable, not a u16, for this calculation. */
+	prescale = (clocks >> 16) + 1;
+	do_div(clocks, prescale);
+	if (clocks > 0xffff) {
+		pr_err("calculation error; prescale:%x clocks:%llx\n",
+		       prescale, clocks);
+		return -EINVAL;
+	}
+
+	/* Set and enable the timer, reject an attempt to use a wdt as gpt */
+	spin_lock_irqsave(&gpt->lock, flags);
+	if (as_wdt)
+		gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
+	else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
+		spin_unlock_irqrestore(&gpt->lock, flags);
+		return -EBUSY;
+	}
+	out_be32(&gpt->regs->count, prescale << 16 | clocks);
+	clrsetbits_be32(&gpt->regs->mode, clear, set);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+
+	return 0;
+}
+
+/**
+ * mpc52xx_gpt_start_timer - Set and enable the GPT timer
+ * @gpt: Pointer to gpt private data structure
+ * @period: period of timer in ns; max. ~130s @ 33MHz IPB clock
+ * @continuous: set to 1 to make timer continuous free running
+ *
+ * An interrupt will be generated every time the timer fires
+ */
+int mpc52xx_gpt_start_timer(struct mpc52xx_gpt_priv *gpt, u64 period,
+                            int continuous)
+{
+	return mpc52xx_gpt_do_start(gpt, period, continuous, 0);
+}
+EXPORT_SYMBOL(mpc52xx_gpt_start_timer);
+
+/**
+ * mpc52xx_gpt_stop_timer - Stop a gpt
+ * @gpt: Pointer to gpt private data structure
+ *
+ * Returns an error if attempting to stop a wdt
+ */
+int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
+{
+	unsigned long flags;
+
+	/* reject the operation if the timer is used as watchdog (gpt 0 only) */
+	spin_lock_irqsave(&gpt->lock, flags);
+	if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
+		spin_unlock_irqrestore(&gpt->lock, flags);
+		return -EBUSY;
+	}
+
+	clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_stop_timer);
+
+/**
+ * mpc52xx_gpt_timer_period - Read the timer period
+ * @gpt: Pointer to gpt private data structure
+ *
+ * Returns the timer period in ns
+ */
+u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt)
+{
+	u64 period;
+	u64 prescale;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpt->lock, flags);
+	period = in_be32(&gpt->regs->count);
+	spin_unlock_irqrestore(&gpt->lock, flags);
+
+	prescale = period >> 16;
+	period &= 0xffff;
+	if (prescale == 0)
+		prescale = 0x10000;
+	period = period * prescale * 1000000000ULL;
+	do_div(period, (u64)gpt->ipb_freq);
+	return period;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_timer_period);
+
+#if defined(CONFIG_MPC5200_WDT)
+/***********************************************************************
+ * Watchdog API for gpt0
+ */
+
+#define WDT_IDENTITY	    "mpc52xx watchdog on GPT0"
+
+/* wdt_is_active stores wether or not the /dev/watchdog device is opened */
+static unsigned long wdt_is_active;
+
+/* wdt-capable gpt */
+static struct mpc52xx_gpt_priv *mpc52xx_gpt_wdt;
+
+/* low-level wdt functions */
+static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpt_wdt->lock, flags);
+	out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING);
+	spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+}
+
+/* wdt misc device api */
+static ssize_t mpc52xx_wdt_write(struct file *file, const char __user *data,
+				 size_t len, loff_t *ppos)
+{
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	mpc52xx_gpt_wdt_ping(gpt_wdt);
+	return 0;
+}
+
+static struct watchdog_info mpc5200_wdt_info = {
+	.options	= WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+	.identity	= WDT_IDENTITY,
+};
+
+static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd,
+			      unsigned long arg)
+{
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	int __user *data = (int __user *)arg;
+	int timeout;
+	u64 real_timeout;
+	int ret = 0;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		ret = copy_to_user(data, &mpc5200_wdt_info,
+				   sizeof(mpc5200_wdt_info));
+		if (ret)
+			ret = -EFAULT;
+		break;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		ret = put_user(0, data);
+		break;
+
+	case WDIOC_KEEPALIVE:
+		mpc52xx_gpt_wdt_ping(gpt_wdt);
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		ret = get_user(timeout, data);
+		if (ret)
+			break;
+		real_timeout = (u64) timeout * 1000000000ULL;
+		ret = mpc52xx_gpt_do_start(gpt_wdt, real_timeout, 0, 1);
+		if (ret)
+			break;
+		/* fall through and return the timeout */
+
+	case WDIOC_GETTIMEOUT:
+		/* we need to round here as to avoid e.g. the following
+		 * situation:
+		 * - timeout requested is 1 second;
+		 * - real timeout @33MHz is 999997090ns
+		 * - the int divide by 10^9 will return 0.
+		 */
+		real_timeout =
+			mpc52xx_gpt_timer_period(gpt_wdt) + 500000000ULL;
+		do_div(real_timeout, 1000000000ULL);
+		timeout = (int) real_timeout;
+		ret = put_user(timeout, data);
+		break;
+
+	default:
+		ret = -ENOTTY;
+	}
+	return ret;
+}
+
+static int mpc52xx_wdt_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	/* sanity check */
+	if (!mpc52xx_gpt_wdt)
+		return -ENODEV;
+
+	/* /dev/watchdog can only be opened once */
+	if (test_and_set_bit(0, &wdt_is_active))
+		return -EBUSY;
+
+	/* Set and activate the watchdog with 30 seconds timeout */
+	ret = mpc52xx_gpt_do_start(mpc52xx_gpt_wdt, 30ULL * 1000000000ULL,
+				   0, 1);
+	if (ret) {
+		clear_bit(0, &wdt_is_active);
+		return ret;
+	}
+
+	file->private_data = mpc52xx_gpt_wdt;
+	return nonseekable_open(inode, file);
+}
+
+static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
+{
+	/* note: releasing the wdt in NOWAYOUT-mode does not stop it */
+#if !defined(CONFIG_WATCHDOG_NOWAYOUT)
+	struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpt_wdt->lock, flags);
+	clrbits32(&gpt_wdt->regs->mode,
+		  MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
+	gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
+	spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+#endif
+	clear_bit(0, &wdt_is_active);
+	return 0;
+}
+
+
+static const struct file_operations mpc52xx_wdt_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.write		= mpc52xx_wdt_write,
+	.unlocked_ioctl = mpc52xx_wdt_ioctl,
+	.open		= mpc52xx_wdt_open,
+	.release	= mpc52xx_wdt_release,
+};
+
+static struct miscdevice mpc52xx_wdt_miscdev = {
+	.minor		= WATCHDOG_MINOR,
+	.name		= "watchdog",
+	.fops		= &mpc52xx_wdt_fops,
+};
+
+static int __devinit mpc52xx_gpt_wdt_init(void)
+{
+	int err;
+
+	/* try to register the watchdog misc device */
+	err = misc_register(&mpc52xx_wdt_miscdev);
+	if (err)
+		pr_err("%s: cannot register watchdog device\n", WDT_IDENTITY);
+	else
+		pr_info("%s: watchdog device registered\n", WDT_IDENTITY);
+	return err;
+}
+
+static int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt,
+				 const u32 *period)
+{
+	u64 real_timeout;
+
+	/* remember the gpt for the wdt operation */
+	mpc52xx_gpt_wdt = gpt;
+
+	/* configure the wdt if the device tree contained a timeout */
+	if (!period || *period == 0)
+		return 0;
+
+	real_timeout = (u64) *period * 1000000000ULL;
+	if (mpc52xx_gpt_do_start(gpt, real_timeout, 0, 1))
+		dev_warn(gpt->dev, "starting as wdt failed\n");
+	else
+		dev_info(gpt->dev, "watchdog set to %us timeout\n", *period);
+	return 0;
+}
+
+#else
+
+static int __devinit mpc52xx_gpt_wdt_init(void)
+{
+	return 0;
+}
+
+#define mpc52xx_gpt_wdt_setup(x, y)		(0)
+
+#endif	/*  CONFIG_MPC5200_WDT	*/
+
 /* ---------------------------------------------------------------------
  * of_platform bus binding code
  */
@@ -349,6 +729,7 @@
 
 	spin_lock_init(&gpt->lock);
 	gpt->dev = &ofdev->dev;
+	gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->node);
 	gpt->regs = of_iomap(ofdev->node, 0);
 	if (!gpt->regs) {
 		kfree(gpt);
@@ -360,6 +741,26 @@
 	mpc52xx_gpt_gpio_setup(gpt, ofdev->node);
 	mpc52xx_gpt_irq_setup(gpt, ofdev->node);
 
+	mutex_lock(&mpc52xx_gpt_list_mutex);
+	list_add(&gpt->list, &mpc52xx_gpt_list);
+	mutex_unlock(&mpc52xx_gpt_list_mutex);
+
+	/* check if this device could be a watchdog */
+	if (of_get_property(ofdev->node, "fsl,has-wdt", NULL) ||
+	    of_get_property(ofdev->node, "has-wdt", NULL)) {
+		const u32 *on_boot_wdt;
+
+		gpt->wdt_mode = MPC52xx_GPT_CAN_WDT;
+		on_boot_wdt = of_get_property(ofdev->node, "fsl,wdt-on-boot",
+					      NULL);
+		if (on_boot_wdt) {
+			dev_info(gpt->dev, "used as watchdog\n");
+			gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
+		} else
+			dev_info(gpt->dev, "can function as watchdog\n");
+		mpc52xx_gpt_wdt_setup(gpt, on_boot_wdt);
+	}
+
 	return 0;
 }
 
@@ -394,3 +795,4 @@
 
 /* Make sure GPIOs and IRQs get set up before anyone tries to use them */
 subsys_initcall(mpc52xx_gpt_init);
+device_initcall(mpc52xx_gpt_wdt_init);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
new file mode 100644
index 0000000..929d017
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -0,0 +1,560 @@
+/*
+ * LocalPlus Bus FIFO driver for the Freescale MPC52xx.
+ *
+ * Copyright (C) 2009 Secret Lab Technologies Ltd.
+ *
+ * This file is released under the GPLv2
+ *
+ * Todo:
+ * - Add support for multiple requests to be queued.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/mpc52xx.h>
+#include <asm/time.h>
+
+#include <sysdev/bestcomm/bestcomm.h>
+#include <sysdev/bestcomm/bestcomm_priv.h>
+#include <sysdev/bestcomm/gen_bd.h>
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("MPC5200 LocalPlus FIFO device driver");
+MODULE_LICENSE("GPL");
+
+#define LPBFIFO_REG_PACKET_SIZE		(0x00)
+#define LPBFIFO_REG_START_ADDRESS	(0x04)
+#define LPBFIFO_REG_CONTROL		(0x08)
+#define LPBFIFO_REG_ENABLE		(0x0C)
+#define LPBFIFO_REG_BYTES_DONE_STATUS	(0x14)
+#define LPBFIFO_REG_FIFO_DATA		(0x40)
+#define LPBFIFO_REG_FIFO_STATUS		(0x44)
+#define LPBFIFO_REG_FIFO_CONTROL	(0x48)
+#define LPBFIFO_REG_FIFO_ALARM		(0x4C)
+
+struct mpc52xx_lpbfifo {
+	struct device *dev;
+	phys_addr_t regs_phys;
+	void __iomem *regs;
+	int irq;
+	spinlock_t lock;
+
+	struct bcom_task *bcom_tx_task;
+	struct bcom_task *bcom_rx_task;
+	struct bcom_task *bcom_cur_task;
+
+	/* Current state data */
+	struct mpc52xx_lpbfifo_request *req;
+	int dma_irqs_enabled;
+};
+
+/* The MPC5200 has only one fifo, so only need one instance structure */
+static struct mpc52xx_lpbfifo lpbfifo;
+
+/**
+ * mpc52xx_lpbfifo_kick - Trigger the next block of data to be transfered
+ */
+static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
+{
+	size_t transfer_size = req->size - req->pos;
+	struct bcom_bd *bd;
+	void __iomem *reg;
+	u32 *data;
+	int i;
+	int bit_fields;
+	int dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
+	int write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
+	int poll_dma = req->flags & MPC52XX_LPBFIFO_FLAG_POLL_DMA;
+
+	/* Set and clear the reset bits; is good practice in User Manual */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+
+	/* set master enable bit */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x00000001);
+	if (!dma) {
+		/* While the FIFO can be setup for transfer sizes as large as
+		 * 16M-1, the FIFO itself is only 512 bytes deep and it does
+		 * not generate interrupts for FIFO full events (only transfer
+		 * complete will raise an IRQ).  Therefore when not using
+		 * Bestcomm to drive the FIFO it needs to either be polled, or
+		 * transfers need to constrained to the size of the fifo.
+		 *
+		 * This driver restricts the size of the transfer
+		 */
+		if (transfer_size > 512)
+			transfer_size = 512;
+
+		/* Load the FIFO with data */
+		if (write) {
+			reg = lpbfifo.regs + LPBFIFO_REG_FIFO_DATA;
+			data = req->data + req->pos;
+			for (i = 0; i < transfer_size; i += 4)
+				out_be32(reg, *data++);
+		}
+
+		/* Unmask both error and completion irqs */
+		out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x00000301);
+	} else {
+		/* Choose the correct direction
+		 *
+		 * Configure the watermarks so DMA will always complete correctly.
+		 * It may be worth experimenting with the ALARM value to see if
+		 * there is a performance impacit.  However, if it is wrong there
+		 * is a risk of DMA not transferring the last chunk of data
+		 */
+		if (write) {
+			out_be32(lpbfifo.regs + LPBFIFO_REG_FIFO_ALARM, 0x1e4);
+			out_8(lpbfifo.regs + LPBFIFO_REG_FIFO_CONTROL, 7);
+			lpbfifo.bcom_cur_task = lpbfifo.bcom_tx_task;
+		} else {
+			out_be32(lpbfifo.regs + LPBFIFO_REG_FIFO_ALARM, 0x1ff);
+			out_8(lpbfifo.regs + LPBFIFO_REG_FIFO_CONTROL, 0);
+			lpbfifo.bcom_cur_task = lpbfifo.bcom_rx_task;
+
+			if (poll_dma) {
+				if (lpbfifo.dma_irqs_enabled) {
+					disable_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task));
+					lpbfifo.dma_irqs_enabled = 0;
+				}
+			} else {
+				if (!lpbfifo.dma_irqs_enabled) {
+					enable_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task));
+					lpbfifo.dma_irqs_enabled = 1;
+				}
+			}
+		}
+
+		bd = bcom_prepare_next_buffer(lpbfifo.bcom_cur_task);
+		bd->status = transfer_size;
+		if (!write) {
+			/*
+			 * In the DMA read case, the DMA doesn't complete,
+			 * possibly due to incorrect watermarks in the ALARM
+			 * and CONTROL regs. For now instead of trying to
+			 * determine the right watermarks that will make this
+			 * work, just increase the number of bytes the FIFO is
+			 * expecting.
+			 *
+			 * When submitting another operation, the FIFO will get
+			 * reset, so the condition of the FIFO waiting for a
+			 * non-existent 4 bytes will get cleared.
+			 */
+			transfer_size += 4; /* BLECH! */
+		}
+		bd->data[0] = req->data_phys + req->pos;
+		bcom_submit_next_buffer(lpbfifo.bcom_cur_task, NULL);
+
+		/* error irq & master enabled bit */
+		bit_fields = 0x00000201;
+
+		/* Unmask irqs */
+		if (write && (!poll_dma))
+			bit_fields |= 0x00000100; /* completion irq too */
+		out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, bit_fields);
+	}
+
+	/* Set transfer size, width, chip select and READ mode */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_START_ADDRESS,
+		 req->offset + req->pos);
+	out_be32(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, transfer_size);
+
+	bit_fields = req->cs << 24 | 0x000008;
+	if (!write)
+		bit_fields |= 0x010000; /* read mode */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_CONTROL, bit_fields);
+
+	/* Kick it off */
+	out_8(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, 0x01);
+	if (dma)
+		bcom_enable(lpbfifo.bcom_cur_task);
+}
+
+/**
+ * mpc52xx_lpbfifo_irq - IRQ handler for LPB FIFO
+ *
+ * On transmit, the dma completion irq triggers before the fifo completion
+ * triggers.  Handle the dma completion here instead of the LPB FIFO Bestcomm
+ * task completion irq becuase everyting is not really done until the LPB FIFO
+ * completion irq triggers.
+ *
+ * In other words:
+ * For DMA, on receive, the "Fat Lady" is the bestcom completion irq. on
+ * transmit, the fifo completion irq is the "Fat Lady". The opera (or in this
+ * case the DMA/FIFO operation) is not finished until the "Fat Lady" sings.
+ *
+ * Reasons for entering this routine:
+ * 1) PIO mode rx and tx completion irq
+ * 2) DMA interrupt mode tx completion irq
+ * 3) DMA polled mode tx
+ *
+ * Exit conditions:
+ * 1) Transfer aborted
+ * 2) FIFO complete without DMA; more data to do
+ * 3) FIFO complete without DMA; all data transfered
+ * 4) FIFO complete using DMA
+ *
+ * Condition 1 can occur regardless of whether or not DMA is used.
+ * It requires executing the callback to report the error and exiting
+ * immediately.
+ *
+ * Condition 2 requires programming the FIFO with the next block of data
+ *
+ * Condition 3 requires executing the callback to report completion
+ *
+ * Condition 4 means the same as 3, except that we also retrieve the bcom
+ * buffer so DMA doesn't get clogged up.
+ *
+ * To make things trickier, the spinlock must be dropped before
+ * executing the callback, otherwise we could end up with a deadlock
+ * or nested spinlock condition.  The out path is non-trivial, so
+ * extra fiddling is done to make sure all paths lead to the same
+ * outbound code.
+ */
+static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
+{
+	struct mpc52xx_lpbfifo_request *req;
+	u32 status = in_8(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS);
+	void __iomem *reg;
+	u32 *data;
+	int count, i;
+	int do_callback = 0;
+	u32 ts;
+	unsigned long flags;
+	int dma, write, poll_dma;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+	ts = get_tbl();
+
+	req = lpbfifo.req;
+	if (!req) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		pr_err("bogus LPBFIFO IRQ\n");
+		return IRQ_HANDLED;
+	}
+
+	dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
+	write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
+	poll_dma = req->flags & MPC52XX_LPBFIFO_FLAG_POLL_DMA;
+
+	if (dma && !write) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		pr_err("bogus LPBFIFO IRQ (dma and not writting)\n");
+		return IRQ_HANDLED;
+	}
+
+	if ((status & 0x01) == 0) {
+		goto out;
+	}
+
+	/* check abort bit */
+	if (status & 0x10) {
+		out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+		do_callback = 1;
+		goto out;
+	}
+
+	/* Read result from hardware */
+	count = in_be32(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS);
+	count &= 0x00ffffff;
+
+	if (!dma && !write) {
+		/* copy the data out of the FIFO */
+		reg = lpbfifo.regs + LPBFIFO_REG_FIFO_DATA;
+		data = req->data + req->pos;
+		for (i = 0; i < count; i += 4)
+			*data++ = in_be32(reg);
+	}
+
+	/* Update transfer position and count */
+	req->pos += count;
+
+	/* Decide what to do next */
+	if (req->size - req->pos)
+		mpc52xx_lpbfifo_kick(req); /* more work to do */
+	else
+		do_callback = 1;
+
+ out:
+	/* Clear the IRQ */
+	out_8(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS, 0x01);
+
+	if (dma && (status & 0x11)) {
+		/*
+		 * Count the DMA as complete only when the FIFO completion
+		 * status or abort bits are set.
+		 *
+		 * (status & 0x01) should always be the case except sometimes
+		 * when using polled DMA.
+		 *
+		 * (status & 0x10) {transfer aborted}: This case needs more
+		 * testing.
+		 */
+		bcom_retrieve_buffer(lpbfifo.bcom_cur_task, &status, NULL);
+	}
+	req->last_byte = ((u8 *)req->data)[req->size - 1];
+
+	/* When the do_callback flag is set; it means the transfer is finished
+	 * so set the FIFO as idle */
+	if (do_callback)
+		lpbfifo.req = NULL;
+
+	if (irq != 0) /* don't increment on polled case */
+		req->irq_count++;
+
+	req->irq_ticks += get_tbl() - ts;
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+	/* Spinlock is released; it is now safe to call the callback */
+	if (do_callback && req->callback)
+		req->callback(req);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * mpc52xx_lpbfifo_bcom_irq - IRQ handler for LPB FIFO Bestcomm task
+ *
+ * Only used when receiving data.
+ */
+static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
+{
+	struct mpc52xx_lpbfifo_request *req;
+	unsigned long flags;
+	u32 status;
+	u32 ts;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+	ts = get_tbl();
+
+	req = lpbfifo.req;
+	if (!req || (req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA)) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		return IRQ_HANDLED;
+	}
+
+	if (irq != 0) /* don't increment on polled case */
+		req->irq_count++;
+
+	if (!bcom_buffer_done(lpbfifo.bcom_cur_task)) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+		req->buffer_not_done_cnt++;
+		if ((req->buffer_not_done_cnt % 1000) == 0)
+			pr_err("transfer stalled\n");
+
+		return IRQ_HANDLED;
+	}
+
+	bcom_retrieve_buffer(lpbfifo.bcom_cur_task, &status, NULL);
+
+	req->last_byte = ((u8 *)req->data)[req->size - 1];
+
+	req->pos = status & 0x00ffffff;
+
+	/* Mark the FIFO as idle */
+	lpbfifo.req = NULL;
+
+	/* Release the lock before calling out to the callback. */
+	req->irq_ticks += get_tbl() - ts;
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+	if (req->callback)
+		req->callback(req);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * mpc52xx_lpbfifo_bcom_poll - Poll for DMA completion
+ */
+void mpc52xx_lpbfifo_poll(void)
+{
+	struct mpc52xx_lpbfifo_request *req = lpbfifo.req;
+	int dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
+	int write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
+
+	/*
+	 * For more information, see comments on the "Fat Lady" 
+	 */
+	if (dma && write)
+		mpc52xx_lpbfifo_irq(0, NULL);
+	else 
+		mpc52xx_lpbfifo_bcom_irq(0, NULL);
+}
+EXPORT_SYMBOL(mpc52xx_lpbfifo_poll);
+
+/**
+ * mpc52xx_lpbfifo_submit - Submit an LPB FIFO transfer request.
+ * @req: Pointer to request structure
+ */
+int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req)
+{
+	unsigned long flags;
+
+	if (!lpbfifo.regs)
+		return -ENODEV;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+
+	/* If the req pointer is already set, then a transfer is in progress */
+	if (lpbfifo.req) {
+		spin_unlock_irqrestore(&lpbfifo.lock, flags);
+		return -EBUSY;
+	}
+
+	/* Setup the transfer */
+	lpbfifo.req = req;
+	req->irq_count = 0;
+	req->irq_ticks = 0;
+	req->buffer_not_done_cnt = 0;
+	req->pos = 0;
+
+	mpc52xx_lpbfifo_kick(req);
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(mpc52xx_lpbfifo_submit);
+
+void mpc52xx_lpbfifo_abort(struct mpc52xx_lpbfifo_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&lpbfifo.lock, flags);
+	if (lpbfifo.req == req) {
+		/* Put it into reset and clear the state */
+		bcom_gen_bd_rx_reset(lpbfifo.bcom_rx_task);
+		bcom_gen_bd_tx_reset(lpbfifo.bcom_tx_task);
+		out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+		lpbfifo.req = NULL;
+	}
+	spin_unlock_irqrestore(&lpbfifo.lock, flags);
+}
+EXPORT_SYMBOL(mpc52xx_lpbfifo_abort);
+
+static int __devinit
+mpc52xx_lpbfifo_probe(struct of_device *op, const struct of_device_id *match)
+{
+	struct resource res;
+	int rc = -ENOMEM;
+
+	if (lpbfifo.dev != NULL)
+		return -ENOSPC;
+
+	lpbfifo.irq = irq_of_parse_and_map(op->node, 0);
+	if (!lpbfifo.irq)
+		return -ENODEV;
+
+	if (of_address_to_resource(op->node, 0, &res))
+		return -ENODEV;
+	lpbfifo.regs_phys = res.start;
+	lpbfifo.regs = of_iomap(op->node, 0);
+	if (!lpbfifo.regs)
+		return -ENOMEM;
+
+	spin_lock_init(&lpbfifo.lock);
+
+	/* Put FIFO into reset */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+
+	/* Register the interrupt handler */
+	rc = request_irq(lpbfifo.irq, mpc52xx_lpbfifo_irq, 0,
+			 "mpc52xx-lpbfifo", &lpbfifo);
+	if (rc)
+		goto err_irq;
+
+	/* Request the Bestcomm receive (fifo --> memory) task and IRQ */
+	lpbfifo.bcom_rx_task =
+		bcom_gen_bd_rx_init(2, res.start + LPBFIFO_REG_FIFO_DATA,
+				    BCOM_INITIATOR_SCLPC, BCOM_IPR_SCLPC,
+				    16*1024*1024);
+	if (!lpbfifo.bcom_rx_task)
+		goto err_bcom_rx;
+
+	rc = request_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task),
+			 mpc52xx_lpbfifo_bcom_irq, 0,
+			 "mpc52xx-lpbfifo-rx", &lpbfifo);
+	if (rc)
+		goto err_bcom_rx_irq;
+
+	/* Request the Bestcomm transmit (memory --> fifo) task and IRQ */
+	lpbfifo.bcom_tx_task =
+		bcom_gen_bd_tx_init(2, res.start + LPBFIFO_REG_FIFO_DATA,
+				    BCOM_INITIATOR_SCLPC, BCOM_IPR_SCLPC);
+	if (!lpbfifo.bcom_tx_task)
+		goto err_bcom_tx;
+
+	lpbfifo.dev = &op->dev;
+	return 0;
+
+ err_bcom_tx:
+	free_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task), &lpbfifo);
+ err_bcom_rx_irq:
+	bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
+ err_bcom_rx:
+ err_irq:
+	iounmap(lpbfifo.regs);
+	lpbfifo.regs = NULL;
+
+	dev_err(&op->dev, "mpc52xx_lpbfifo_probe() failed\n");
+	return -ENODEV;
+}
+
+
+static int __devexit mpc52xx_lpbfifo_remove(struct of_device *op)
+{
+	if (lpbfifo.dev != &op->dev)
+		return 0;
+
+	/* Put FIFO in reset */
+	out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
+
+	/* Release the bestcomm transmit task */
+	free_irq(bcom_get_task_irq(lpbfifo.bcom_tx_task), &lpbfifo);
+	bcom_gen_bd_tx_release(lpbfifo.bcom_tx_task);
+	
+	/* Release the bestcomm receive task */
+	free_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task), &lpbfifo);
+	bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
+
+	free_irq(lpbfifo.irq, &lpbfifo);
+	iounmap(lpbfifo.regs);
+	lpbfifo.regs = NULL;
+	lpbfifo.dev = NULL;
+
+	return 0;
+}
+
+static struct of_device_id mpc52xx_lpbfifo_match[] __devinitconst = {
+	{ .compatible = "fsl,mpc5200-lpbfifo", },
+	{},
+};
+
+static struct of_platform_driver mpc52xx_lpbfifo_driver = {
+	.owner = THIS_MODULE,
+	.name = "mpc52xx-lpbfifo",
+	.match_table = mpc52xx_lpbfifo_match,
+	.probe = mpc52xx_lpbfifo_probe,
+	.remove = __devexit_p(mpc52xx_lpbfifo_remove),
+};
+
+/***********************************************************************
+ * Module init/exit
+ */
+static int __init mpc52xx_lpbfifo_init(void)
+{
+	pr_debug("Registering LocalPlus bus FIFO driver\n");
+	return of_register_platform_driver(&mpc52xx_lpbfifo_driver);
+}
+module_init(mpc52xx_lpbfifo_init);
+
+static void __exit mpc52xx_lpbfifo_exit(void)
+{
+	pr_debug("Unregistering LocalPlus bus FIFO driver\n");
+	of_unregister_platform_driver(&mpc52xx_lpbfifo_driver);
+}
+module_exit(mpc52xx_lpbfifo_exit);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 480f806..4bf4bf7 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -220,7 +220,7 @@
 }
 
 static struct irq_chip mpc52xx_extirq_irqchip = {
-	.typename = "MPC52xx External",
+	.name = "MPC52xx External",
 	.mask = mpc52xx_extirq_mask,
 	.unmask = mpc52xx_extirq_unmask,
 	.ack = mpc52xx_extirq_ack,
@@ -258,7 +258,7 @@
 }
 
 static struct irq_chip mpc52xx_main_irqchip = {
-	.typename = "MPC52xx Main",
+	.name = "MPC52xx Main",
 	.mask = mpc52xx_main_mask,
 	.mask_ack = mpc52xx_main_mask,
 	.unmask = mpc52xx_main_unmask,
@@ -291,7 +291,7 @@
 }
 
 static struct irq_chip mpc52xx_periph_irqchip = {
-	.typename = "MPC52xx Peripherals",
+	.name = "MPC52xx Peripherals",
 	.mask = mpc52xx_periph_mask,
 	.mask_ack = mpc52xx_periph_mask,
 	.unmask = mpc52xx_periph_unmask,
@@ -335,7 +335,7 @@
 }
 
 static struct irq_chip mpc52xx_sdma_irqchip = {
-	.typename = "MPC52xx SDMA",
+	.name = "MPC52xx SDMA",
 	.mask = mpc52xx_sdma_mask,
 	.unmask = mpc52xx_sdma_unmask,
 	.ack = mpc52xx_sdma_ack,
@@ -355,7 +355,7 @@
  * mpc52xx_irqhost_xlate - translate virq# from device tree interrupts property
  */
 static int mpc52xx_irqhost_xlate(struct irq_host *h, struct device_node *ct,
-				 u32 *intspec, unsigned int intsize,
+				 const u32 *intspec, unsigned int intsize,
 				 irq_hw_number_t *out_hwirq,
 				 unsigned int *out_flags)
 {
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 7ee979f..9d962d7 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -69,7 +69,6 @@
 }
 
 static struct irq_chip pq2ads_pci_ic = {
-	.typename = "PQ2 ADS PCI",
 	.name = "PQ2 ADS PCI",
 	.end = pq2ads_pci_unmask_irq,
 	.mask = pq2ads_pci_mask_irq,
@@ -107,7 +106,7 @@
 static int pci_pic_host_map(struct irq_host *h, unsigned int virq,
 			    irq_hw_number_t hw)
 {
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_data(virq, h->host_data);
 	set_irq_chip_and_handler(virq, &pq2ads_pci_ic, handle_level_irq);
 	return 0;
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 567ded7..17f9974 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -74,7 +74,7 @@
 
 		prop = of_get_property(np, "mode", NULL);
 		if (prop && !strcmp(prop, "cpu-qe"))
-			pdata.qe_mode = 1;
+			pdata.flags = SPI_QE_CPU_MODE;
 
 		for (j = 0; j < num_board_infos; j++) {
 			if (board_infos[j].bus_num == pdata.bus_num)
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 08e65fc..d306f07 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -96,6 +96,7 @@
 {
 	return deep_sleeping;
 }
+EXPORT_SYMBOL(fsl_deep_sleep);
 
 static int mpc83xx_change_state(void)
 {
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index d3a975e..d951218 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -1,6 +1,7 @@
-menuconfig MPC85xx
-	bool "Machine Type"
-	depends on PPC_85xx
+menuconfig FSL_SOC_BOOKE
+	bool "Freescale Book-E Machine Type"
+	depends on PPC_85xx || PPC_BOOK3E
+	select FSL_SOC
 	select PPC_UDBG_16550
 	select MPIC
 	select PPC_PCI_CHOICE
@@ -8,7 +9,7 @@
 	select SERIAL_8250_SHARE_IRQ if SERIAL_8250
 	default y
 
-if MPC85xx
+if FSL_SOC_BOOKE
 
 config MPC8540_ADS
 	bool "Freescale MPC8540 ADS"
@@ -144,7 +145,19 @@
 	help
 	  This option enables support for the Wind River SBC8560 board
 
-endif # MPC85xx
+config P4080_DS
+	bool "Freescale P4080 DS"
+	select DEFAULT_UIMAGE
+	select PPC_FSL_BOOK3E
+	select PPC_E500MC
+	select PHYS_64BIT
+	select SWIOTLB
+	select MPC8xxx_GPIO
+	select HAS_RAPIDIO
+	help
+	  This option enables support for the P4080 DS board
+
+endif # FSL_SOC_BOOKE
 
 config TQM85xx
 	bool
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 9098aea..387c128 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -10,6 +10,7 @@
 obj-$(CONFIG_MPC85xx_DS)  += mpc85xx_ds.o
 obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o
 obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o
+obj-$(CONFIG_P4080_DS)    += p4080_ds.o corenet_ds.o
 obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
 obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
 obj-$(CONFIG_SBC8560)     += sbc8560.o
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c
new file mode 100644
index 0000000..534c2ec
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/corenet_ds.c
@@ -0,0 +1,125 @@
+/*
+ * Corenet based SoC DS Setup
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/lmb.h>
+
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+void __init corenet_ds_pic_init(void)
+{
+	struct mpic *mpic;
+	struct resource r;
+	struct device_node *np = NULL;
+	unsigned int flags = MPIC_PRIMARY | MPIC_BIG_ENDIAN |
+				MPIC_BROKEN_FRR_NIRQS | MPIC_SINGLE_DEST_CPU;
+
+	np = of_find_node_by_type(np, "open-pic");
+
+	if (np == NULL) {
+		printk(KERN_ERR "Could not find open-pic node\n");
+		return;
+	}
+
+	if (of_address_to_resource(np, 0, &r)) {
+		printk(KERN_ERR "Failed to map mpic register space\n");
+		of_node_put(np);
+		return;
+	}
+
+	if (ppc_md.get_irq == mpic_get_coreint_irq)
+		flags |= MPIC_ENABLE_COREINT;
+
+	mpic = mpic_alloc(np, r.start, flags, 0, 256, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+#ifdef CONFIG_PCI
+static int primary_phb_addr;
+#endif
+
+/*
+ * Setup the architecture
+ */
+#ifdef CONFIG_SMP
+void __init mpc85xx_smp_init(void);
+#endif
+
+void __init corenet_ds_setup_arch(void)
+{
+#ifdef CONFIG_PCI
+	struct device_node *np;
+	struct pci_controller *hose;
+#endif
+	dma_addr_t max = 0xffffffff;
+
+#ifdef CONFIG_SMP
+	mpc85xx_smp_init();
+#endif
+
+#ifdef CONFIG_PCI
+	for_each_compatible_node(np, "pci", "fsl,p4080-pcie") {
+		struct resource rsrc;
+		of_address_to_resource(np, 0, &rsrc);
+		if ((rsrc.start & 0xfffff) == primary_phb_addr)
+			fsl_add_bridge(np, 1);
+		else
+			fsl_add_bridge(np, 0);
+
+		hose = pci_find_hose_for_OF_device(np);
+		max = min(max, hose->dma_window_base_cur +
+				hose->dma_window_size);
+	}
+#endif
+
+#ifdef CONFIG_SWIOTLB
+	if (lmb_end_of_DRAM() > max) {
+		ppc_swiotlb_enable = 1;
+		set_pci_dma_ops(&swiotlb_dma_ops);
+		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
+	}
+#endif
+	pr_info("%s board from Freescale Semiconductor\n", ppc_md.name);
+}
+
+static const struct of_device_id of_device_ids[] __devinitconst = {
+	{
+		.compatible	= "simple-bus"
+	},
+	{
+		.compatible	= "fsl,rapidio-delta",
+	},
+	{}
+};
+
+int __init corenet_ds_publish_devices(void)
+{
+	return of_platform_bus_probe(NULL, of_device_ids, NULL);
+}
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.h b/arch/powerpc/platforms/85xx/corenet_ds.h
new file mode 100644
index 0000000..ddd700b
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/corenet_ds.h
@@ -0,0 +1,19 @@
+/*
+ * Corenet based SoC DS Setup
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef CORENET_DS_H
+#define CORENET_DS_H
+
+extern void __init corenet_ds_pic_init(void);
+extern void __init corenet_ds_setup_arch(void);
+extern int __init corenet_ds_publish_devices(void);
+
+#endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 3909d57..c5028a2 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -301,6 +301,7 @@
 	{ .compatible = "fsl,qe", },
 	{ .compatible = "gianfar", },
 	{ .compatible = "fsl,rapidio-delta", },
+	{ .compatible = "fsl,mpc8548-guts", },
 	{},
 };
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index c8468de..088f30b 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -44,6 +44,7 @@
 	struct mpic *mpic;
 	struct resource r;
 	struct device_node *np;
+	unsigned long root = of_get_flat_dt_root();
 
 	np = of_find_node_by_type(NULL, "open-pic");
 	if (np == NULL) {
@@ -57,11 +58,18 @@
 		return;
 	}
 
-	mpic = mpic_alloc(np, r.start,
+	if (of_flat_dt_is_compatible(root, "fsl,85XXRDB-CAMP")) {
+		mpic = mpic_alloc(np, r.start,
+			MPIC_PRIMARY |
+			MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS,
+			0, 256, " OpenPIC  ");
+	} else {
+		mpic = mpic_alloc(np, r.start,
 		  MPIC_PRIMARY | MPIC_WANTS_RESET |
 		  MPIC_BIG_ENDIAN | MPIC_BROKEN_FRR_NIRQS |
 		  MPIC_SINGLE_DEST_CPU,
 		  0, 256, " OpenPIC  ");
+	}
 
 	BUG_ON(mpic == NULL);
 	of_node_put(np);
@@ -113,6 +121,7 @@
 	return of_platform_bus_probe(NULL, mpc85xxrdb_ids, NULL);
 }
 machine_device_initcall(p2020_rdb, mpc85xxrdb_publish_devices);
+machine_device_initcall(p1020_rdb, mpc85xxrdb_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
@@ -126,6 +135,15 @@
 	return 0;
 }
 
+static int __init p1020_rdb_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,P1020RDB"))
+		return 1;
+	return 0;
+}
+
 define_machine(p2020_rdb) {
 	.name			= "P2020 RDB",
 	.probe			= p2020_rdb_probe,
@@ -139,3 +157,17 @@
 	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= udbg_progress,
 };
+
+define_machine(p1020_rdb) {
+	.name			= "P1020 RDB",
+	.probe			= p1020_rdb_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p4080_ds.c b/arch/powerpc/platforms/85xx/p4080_ds.c
new file mode 100644
index 0000000..8417046
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p4080_ds.c
@@ -0,0 +1,74 @@
+/*
+ * P4080 DS Setup
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "corenet_ds.h"
+
+#ifdef CONFIG_PCI
+static int primary_phb_addr;
+#endif
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p4080_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,P4080DS")) {
+#ifdef CONFIG_PCI
+		/* treat PCIe1 as primary,
+		 * shouldn't matter as we have no ISA on the board
+		 */
+		primary_phb_addr = 0x0000;
+#endif
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+define_machine(p4080_ds) {
+	.name			= "P4080 DS",
+	.probe			= p4080_ds_probe,
+	.setup_arch		= corenet_ds_setup_arch,
+	.init_IRQ		= corenet_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_coreint_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+machine_device_initcall(p4080_ds, corenet_ds_publish_devices);
+machine_arch_initcall(p4080_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 60edf63..e5da5f6 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -232,7 +232,7 @@
 }
 
 static struct irq_chip socrates_fpga_pic_chip = {
-	.typename       = " FPGA-PIC ",
+	.name		= " FPGA-PIC ",
 	.ack		= socrates_fpga_pic_ack,
 	.mask           = socrates_fpga_pic_mask,
 	.mask_ack       = socrates_fpga_pic_mask_ack,
@@ -245,7 +245,7 @@
 		irq_hw_number_t hwirq)
 {
 	/* All interrupts are LEVEL sensitive */
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, &socrates_fpga_pic_chip,
 			handle_fasteoi_irq);
 
@@ -253,7 +253,7 @@
 }
 
 static int socrates_fpga_pic_host_xlate(struct irq_host *h,
-		struct device_node *ct,	u32 *intspec, unsigned int intsize,
+		struct device_node *ct,	const u32 *intspec, unsigned int intsize,
 		irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 {
 	struct socrates_fpga_irq_info *fpga_irq = &fpga_irqs[intspec[0]];
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 9c7b64a..2bbfd53 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -35,6 +35,7 @@
 config GEF_PPC9A
 	bool "GE Fanuc PPC9A"
 	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
 	select GENERIC_GPIO
 	select ARCH_REQUIRE_GPIOLIB
 	help
@@ -43,6 +44,7 @@
 config GEF_SBC310
 	bool "GE Fanuc SBC310"
 	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
 	select GENERIC_GPIO
 	select ARCH_REQUIRE_GPIOLIB
 	help
@@ -51,6 +53,7 @@
 config GEF_SBC610
 	bool "GE Fanuc SBC610"
 	select DEFAULT_UIMAGE
+	select MMIO_NVRAM
 	select GENERIC_GPIO
 	select ARCH_REQUIRE_GPIOLIB
 	select HAS_RAPIDIO
diff --git a/arch/powerpc/platforms/86xx/gef_pic.c b/arch/powerpc/platforms/86xx/gef_pic.c
index 50d0a2b..0110a87 100644
--- a/arch/powerpc/platforms/86xx/gef_pic.c
+++ b/arch/powerpc/platforms/86xx/gef_pic.c
@@ -149,7 +149,7 @@
 }
 
 static struct irq_chip gef_pic_chip = {
-	.typename	= "gefp",
+	.name		= "gefp",
 	.mask		= gef_pic_mask,
 	.mask_ack	= gef_pic_mask_ack,
 	.unmask		= gef_pic_unmask,
@@ -163,14 +163,14 @@
 			  irq_hw_number_t hwirq)
 {
 	/* All interrupts are LEVEL sensitive */
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, &gef_pic_chip, handle_level_irq);
 
 	return 0;
 }
 
 static int gef_pic_host_xlate(struct irq_host *h, struct device_node *ct,
-			    u32 *intspec, unsigned int intsize,
+			    const u32 *intspec, unsigned int intsize,
 			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 {
 
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
index 287f7bd..a792e5d 100644
--- a/arch/powerpc/platforms/86xx/gef_ppc9a.c
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -33,6 +33,7 @@
 #include <asm/udbg.h>
 
 #include <asm/mpic.h>
+#include <asm/nvram.h>
 
 #include <sysdev/fsl_pci.h>
 #include <sysdev/fsl_soc.h>
@@ -95,6 +96,10 @@
 			printk(KERN_WARNING "Unable to map board registers\n");
 		of_node_put(regs);
 	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
 }
 
 /* Return the PCB revision */
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
index 90754e7..6a1a613 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc310.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -33,6 +33,7 @@
 #include <asm/udbg.h>
 
 #include <asm/mpic.h>
+#include <asm/nvram.h>
 
 #include <sysdev/fsl_pci.h>
 #include <sysdev/fsl_soc.h>
@@ -95,6 +96,10 @@
 			printk(KERN_WARNING "Unable to map board registers\n");
 		of_node_put(regs);
 	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
 }
 
 /* Return the PCB revision */
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
index 72b31a6..e10688a 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc610.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -33,6 +33,7 @@
 #include <asm/udbg.h>
 
 #include <asm/mpic.h>
+#include <asm/nvram.h>
 
 #include <sysdev/fsl_pci.h>
 #include <sysdev/fsl_soc.h>
@@ -95,6 +96,10 @@
 			printk(KERN_WARNING "Unable to map board registers\n");
 		of_node_put(regs);
 	}
+
+#if defined(CONFIG_MMIO_NVRAM)
+	mmio_nvram_init();
+#endif
 }
 
 /* Return the PCB revision */
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 627908a..5abe137 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -19,6 +19,7 @@
 #include <linux/stddef.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/interrupt.h>
 #include <linux/kdev_t.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
@@ -41,10 +42,46 @@
 
 #include "mpc86xx.h"
 
+static struct device_node *pixis_node;
 static unsigned char *pixis_bdcfg0, *pixis_arch;
 
+#ifdef CONFIG_SUSPEND
+static irqreturn_t mpc8610_sw9_irq(int irq, void *data)
+{
+	pr_debug("%s: PIXIS' event (sw9/wakeup) IRQ handled\n", __func__);
+	return IRQ_HANDLED;
+}
+
+static void __init mpc8610_suspend_init(void)
+{
+	int irq;
+	int ret;
+
+	if (!pixis_node)
+		return;
+
+	irq = irq_of_parse_and_map(pixis_node, 0);
+	if (!irq) {
+		pr_err("%s: can't map pixis event IRQ.\n", __func__);
+		return;
+	}
+
+	ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9/wakeup", NULL);
+	if (ret) {
+		pr_err("%s: can't request pixis event IRQ: %d\n",
+		       __func__, ret);
+		irq_dispose_mapping(irq);
+	}
+
+	enable_irq_wake(irq);
+}
+#else
+static inline void mpc8610_suspend_init(void) { }
+#endif /* CONFIG_SUSPEND */
+
 static struct of_device_id __initdata mpc8610_ids[] = {
 	{ .compatible = "fsl,mpc8610-immr", },
+	{ .compatible = "fsl,mpc8610-guts", },
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
 	{}
@@ -55,6 +92,9 @@
 	/* Firstly, register PIXIS GPIOs. */
 	simple_gpiochip_init("fsl,fpga-pixis-gpio-bank");
 
+	/* Enable wakeup on PIXIS' event IRQ. */
+	mpc8610_suspend_init();
+
 	/* Without this call, the SSI device driver won't get probed. */
 	of_platform_bus_probe(NULL, mpc8610_ids, NULL);
 
@@ -250,10 +290,10 @@
 	diu_ops.set_sysfs_monitor_port	= mpc8610hpcd_set_sysfs_monitor_port;
 #endif
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis");
-	if (np) {
-		of_address_to_resource(np, 0, &r);
-		of_node_put(np);
+	pixis_node = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis");
+	if (pixis_node) {
+		of_address_to_resource(pixis_node, 0, &r);
+		of_node_put(pixis_node);
 		pixis = ioremap(r.start, 32);
 		if (!pixis) {
 			printk(KERN_ERR "Err: can't map FPGA cfg register!\n");
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 385acfc..242954c 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -222,7 +222,7 @@
 	int cascade_irq;
 
 	if ((cascade_irq = cpm_get_irq()) >= 0) {
-		struct irq_desc *cdesc = irq_desc + cascade_irq;
+		struct irq_desc *cdesc = irq_to_desc(cascade_irq);
 
 		generic_handle_irq(cascade_irq);
 		cdesc->chip->eoi(cascade_irq);
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 04a8061..d1663db 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -86,6 +86,11 @@
 	depends on PPC_RTAS
 	default n
 
+config PPC_RTAS_DAEMON
+	bool
+	depends on PPC_RTAS
+	default n
+
 config RTAS_PROC
 	bool "Proc interface to RTAS"
 	depends on PPC_RTAS
@@ -255,7 +260,7 @@
 
 config CPM2
 	bool "Enable support for the CPM2 (Communications Processor Module)"
-	depends on MPC85xx || 8260
+	depends on (FSL_SOC_BOOKE && PPC32) || 8260
 	select CPM
 	select PPC_LIB_RHEAP
 	select PPC_PCI_CHOICE
@@ -300,7 +305,7 @@
 
 config MPC8xxx_GPIO
 	bool "MPC8xxx GPIO support"
-	depends on PPC_MPC831x || PPC_MPC834x || PPC_MPC837x || PPC_85xx || PPC_86xx
+	depends on PPC_MPC831x || PPC_MPC834x || PPC_MPC837x || FSL_SOC_BOOKE || PPC_86xx
 	select GENERIC_GPIO
 	select ARCH_REQUIRE_GPIOLIB
 	help
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e382cae..2eab27a 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -28,8 +28,6 @@
 config PPC_85xx
 	bool "Freescale 85xx"
 	select E500
-	select FSL_SOC
-	select MPC85xx
 
 config PPC_8xx
 	bool "Freescale 8xx"
@@ -138,6 +136,14 @@
 	bool
 	default y if PPC64
 
+config FSL_EMB_PERFMON
+	bool "Freescale Embedded Perfmon"
+	depends on E500 || PPC_83xx
+	help
+	  This is the Performance Monitor support found on the e500 core
+	  and some e300 cores (c3 and c4).  Select this only if your
+	  core supports the Embedded Performance Monitor APU
+
 config 4xx
 	bool
 	depends on 40x || 44x
@@ -153,13 +159,6 @@
 	depends on E200 || E500
 	default y
 
-config FSL_EMB_PERFMON
-	bool "Freescale Embedded Perfmon"
-	depends on E500 || PPC_83xx
-	help
-	  This is the Performance Monitor support found on the e500 core
-	  and some e300 cores (c3 and c4).  Select this only if your
-	  core supports the Embedded Performance Monitor APU
 
 config PTE_64BIT
 	bool
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index a6812ee..fdb9f0b 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -12,7 +12,7 @@
 obj-$(CONFIG_PPC_8xx)		+= 8xx/
 obj-$(CONFIG_PPC_82xx)		+= 82xx/
 obj-$(CONFIG_PPC_83xx)		+= 83xx/
-obj-$(CONFIG_PPC_85xx)		+= 85xx/
+obj-$(CONFIG_FSL_SOC_BOOKE)	+= 85xx/
 obj-$(CONFIG_PPC_86xx)		+= 86xx/
 obj-$(CONFIG_PPC_PSERIES)	+= pseries/
 obj-$(CONFIG_PPC_ISERIES)	+= iseries/
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index a86c34b..96fe896 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -312,7 +312,7 @@
 	.mask		= mask_msi_irq,
 	.unmask		= unmask_msi_irq,
 	.shutdown	= unmask_msi_irq,
-	.typename	= "AXON-MSI",
+	.name		= "AXON-MSI",
 };
 
 static int msic_host_map(struct irq_host *h, unsigned int virq,
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 7225484..36052a9 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -110,7 +110,7 @@
 }
 
 static struct irq_chip beatic_pic = {
-	.typename = " CELL-BEAT ",
+	.name = " CELL-BEAT ",
 	.unmask = beatic_unmask_irq,
 	.mask = beatic_mask_irq,
 	.eoi = beatic_end_irq,
@@ -136,7 +136,7 @@
 static int beatic_pic_host_map(struct irq_host *h, unsigned int virq,
 			       irq_hw_number_t hw)
 {
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	int64_t	err;
 
 	err = beat_construct_and_connect_irq_plug(virq, hw);
@@ -166,11 +166,11 @@
  * Note: We have only 1 entry to translate.
  */
 static int beatic_pic_host_xlate(struct irq_host *h, struct device_node *ct,
-				 u32 *intspec, unsigned int intsize,
+				 const u32 *intspec, unsigned int intsize,
 				 irq_hw_number_t *out_hwirq,
 				 unsigned int *out_flags)
 {
-	u64 *intspec2 = (u64 *)intspec;
+	const u64 *intspec2 = (const u64 *)intspec;
 
 	*out_hwirq = *intspec2;
 	*out_flags |= IRQ_TYPE_LEVEL_LOW;
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 882e470..f9dbf76 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -88,7 +88,7 @@
 }
 
 static struct irq_chip iic_chip = {
-	.typename = " CELL-IIC ",
+	.name = " CELL-IIC ",
 	.mask = iic_mask,
 	.unmask = iic_unmask,
 	.eoi = iic_eoi,
@@ -133,7 +133,7 @@
 
 
 static struct irq_chip iic_ioexc_chip = {
-	.typename = " CELL-IOEX",
+	.name = " CELL-IOEX",
 	.mask = iic_mask,
 	.unmask = iic_unmask,
 	.eoi = iic_ioexc_eoi,
@@ -297,7 +297,7 @@
 }
 
 static int iic_host_xlate(struct irq_host *h, struct device_node *ct,
-			   u32 *intspec, unsigned int intsize,
+			   const u32 *intspec, unsigned int intsize,
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 4e56556..01244f2 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -102,7 +102,7 @@
 
 	/* Reset edge detection logic if necessary
 	 */
-	if (get_irq_desc(virq)->status & IRQ_LEVEL)
+	if (irq_to_desc(virq)->status & IRQ_LEVEL)
 		return;
 
 	/* Only interrupts 47 to 50 can be set to edge */
@@ -119,7 +119,7 @@
 	struct spider_pic *pic = spider_virq_to_pic(virq);
 	unsigned int hw = irq_map[virq].hwirq;
 	void __iomem *cfg = spider_get_irq_config(pic, hw);
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	u32 old_mask;
 	u32 ic;
 
@@ -168,7 +168,7 @@
 }
 
 static struct irq_chip spider_pic = {
-	.typename = " SPIDER   ",
+	.name = " SPIDER   ",
 	.unmask = spider_unmask_irq,
 	.mask = spider_mask_irq,
 	.ack = spider_ack_irq,
@@ -187,7 +187,7 @@
 }
 
 static int spider_host_xlate(struct irq_host *h, struct device_node *ct,
-			   u32 *intspec, unsigned int intsize,
+			   const u32 *intspec, unsigned int intsize,
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 884e8bc..64a4c2d 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -2494,7 +2494,7 @@
 	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
 	int error = 0, cnt = 0;
 
-	if (!buf || len < 0)
+	if (!buf)
 		return -EINVAL;
 
 	error = spu_acquire(ctx);
diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig
index 37d438b..bc0b0ef 100644
--- a/arch/powerpc/platforms/chrp/Kconfig
+++ b/arch/powerpc/platforms/chrp/Kconfig
@@ -5,6 +5,8 @@
 	select PPC_I8259
 	select PPC_INDIRECT_PCI
 	select PPC_RTAS
+	select PPC_RTAS_DAEMON
+	select RTAS_ERROR_LOGGING
 	select PPC_MPC106
 	select PPC_UDBG_16550
 	select PPC_NATIVE
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index cd4ad9a..52f3df3 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -364,19 +364,6 @@
 	if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0);
 }
 
-void
-chrp_event_scan(unsigned long unused)
-{
-	unsigned char log[1024];
-	int ret = 0;
-
-	/* XXX: we should loop until the hardware says no more error logs -- Cort */
-	rtas_call(rtas_token("event-scan"), 4, 1, &ret, 0xffffffff, 0,
-		  __pa(log), 1024);
-	mod_timer(&__get_cpu_var(heartbeat_timer),
-		  jiffies + event_scan_interval);
-}
-
 static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc)
 {
 	unsigned int cascade_irq = i8259_irq();
@@ -568,9 +555,6 @@
 void __init
 chrp_init2(void)
 {
-	struct device_node *device;
-	const unsigned int *p = NULL;
-
 #ifdef CONFIG_NVRAM
 	chrp_nvram_init();
 #endif
@@ -582,40 +566,6 @@
 	request_region(0x80,0x10,"dma page reg");
 	request_region(0xc0,0x20,"dma2");
 
-	/* Get the event scan rate for the rtas so we know how
-	 * often it expects a heartbeat. -- Cort
-	 */
-	device = of_find_node_by_name(NULL, "rtas");
-	if (device)
-		p = of_get_property(device, "rtas-event-scan-rate", NULL);
-	if (p && *p) {
-		/*
-		 * Arrange to call chrp_event_scan at least *p times
-		 * per minute.  We use 59 rather than 60 here so that
-		 * the rate will be slightly higher than the minimum.
-		 * This all assumes we don't do hotplug CPU on any
-		 * machine that needs the event scans done.
-		 */
-		unsigned long interval, offset;
-		int cpu, ncpus;
-		struct timer_list *timer;
-
-		interval = HZ * 59 / *p;
-		offset = HZ;
-		ncpus = num_online_cpus();
-		event_scan_interval = ncpus * interval;
-		for (cpu = 0; cpu < ncpus; ++cpu) {
-			timer = &per_cpu(heartbeat_timer, cpu);
-			setup_timer(timer, chrp_event_scan, 0);
-			timer->expires = jiffies + offset;
-			add_timer_on(timer, cpu);
-			offset += interval;
-		}
-		printk("RTAS Event Scan Rate: %u (%lu jiffies)\n",
-		       *p, interval);
-	}
-	of_node_put(device);
-
 	if (ppc_md.progress)
 		ppc_md.progress("  Have fun!    ", 0x7777);
 }
diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c
index f99c6c4..3ae66ab 100644
--- a/arch/powerpc/platforms/iseries/htab.c
+++ b/arch/powerpc/platforms/iseries/htab.c
@@ -19,8 +19,7 @@
 
 #include "call_hpt.h"
 
-static spinlock_t iSeries_hlocks[64] __cacheline_aligned_in_smp =
-	{ [0 ... 63] = SPIN_LOCK_UNLOCKED};
+static spinlock_t iSeries_hlocks[64] __cacheline_aligned_in_smp;
 
 /*
  * Very primitive algorithm for picking up a lock
@@ -245,6 +244,11 @@
 
 void __init hpte_init_iSeries(void)
 {
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(iSeries_hlocks); i++)
+		spin_lock_init(&iSeries_hlocks[i]);
+
 	ppc_md.hpte_invalidate	= iSeries_hpte_invalidate;
 	ppc_md.hpte_updatepp	= iSeries_hpte_updatepp;
 	ppc_md.hpte_updateboltedpp = iSeries_hpte_updateboltedpp;
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index 94f4447..0776225 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -214,7 +214,7 @@
 	unsigned long flags;
 
 	for_each_irq (irq) {
-		struct irq_desc *desc = get_irq_desc(irq);
+		struct irq_desc *desc = irq_to_desc(irq);
 
 		if (desc && desc->chip && desc->chip->startup) {
 			spin_lock_irqsave(&desc->lock, flags);
@@ -273,7 +273,7 @@
 }
 
 static struct irq_chip iseries_pic = {
-	.typename	= "iSeries irq controller",
+	.name	= "iSeries irq controller",
 	.startup	= iseries_startup_IRQ,
 	.shutdown	= iseries_shutdown_IRQ,
 	.unmask		= iseries_enable_IRQ,
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index d212006..09e8272 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -152,12 +152,12 @@
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
 
-  	spin_lock_irqsave(&pmac_pic_lock, flags);
-	if ((irq_desc[virq].status & IRQ_LEVEL) == 0)
+	spin_lock_irqsave(&pmac_pic_lock, flags);
+	if ((irq_to_desc(virq)->status & IRQ_LEVEL) == 0)
 		out_le32(&pmac_irq_hw[i]->ack, bit);
         __set_bit(src, ppc_cached_irq_mask);
         __pmac_set_irq_mask(src, 0);
-  	spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 
 	return 0;
 }
@@ -195,7 +195,7 @@
 }
 
 static struct irq_chip pmac_pic = {
-	.typename	= " PMAC-PIC ",
+	.name		= " PMAC-PIC ",
 	.startup	= pmac_startup_irq,
 	.mask		= pmac_mask_irq,
 	.ack		= pmac_ack_irq,
@@ -285,7 +285,7 @@
 static int pmac_pic_host_map(struct irq_host *h, unsigned int virq,
 			     irq_hw_number_t hw)
 {
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	int level;
 
 	if (hw >= max_irqs)
@@ -303,7 +303,7 @@
 }
 
 static int pmac_pic_host_xlate(struct irq_host *h, struct device_node *ct,
-			       u32 *intspec, unsigned int intsize,
+			       const u32 *intspec, unsigned int intsize,
 			       irq_hw_number_t *out_hwirq,
 			       unsigned int *out_flags)
 
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 8ec5ccf..59d9712 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -152,7 +152,7 @@
  */
 
 static struct irq_chip ps3_irq_chip = {
-	.typename = "ps3",
+	.name = "ps3",
 	.mask = ps3_chip_mask,
 	.unmask = ps3_chip_unmask,
 	.eoi = ps3_chip_eoi,
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 189a25b..e81b028 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -34,7 +34,7 @@
 #if defined(DEBUG)
 #define DBG udbg_printf
 #else
-#define DBG pr_debug
+#define DBG pr_devel
 #endif
 
 enum {
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index f0e6f28..27554c8 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -4,6 +4,7 @@
 	select MPIC
 	select PPC_I8259
 	select PPC_RTAS
+	select PPC_RTAS_DAEMON
 	select RTAS_ERROR_LOGGING
 	select PPC_UDBG_16550
 	select PPC_NATIVE
@@ -59,7 +60,7 @@
 
 config CMM
 	tristate "Collaborative memory management"
-	depends on PPC_SMLPAR && !CRASH_DUMP
+	depends on PPC_SMLPAR
 	default y
 	help
 	  Select this option, if you want to enable the kernel interface
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 790c0b8..0ff5174 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -7,8 +7,8 @@
 endif
 
 obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
-			   setup.o iommu.o ras.o rtasd.o \
-			   firmware.o power.o
+			   setup.o iommu.o ras.o \
+			   firmware.o power.o dlpar.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_XICS)	+= xics.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 6567439..bcdcf0c 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -229,8 +229,9 @@
 {
 	int rc;
 	struct hvcall_mpp_data mpp_data;
-	unsigned long active_pages_target;
-	signed long page_loan_request;
+	signed long active_pages_target, page_loan_request, target;
+	signed long total_pages = totalram_pages + loaned_pages;
+	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
 
 	rc = h_get_mpp(&mpp_data);
 
@@ -238,17 +239,25 @@
 		return;
 
 	page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
-	loaned_pages_target = page_loan_request + loaned_pages;
-	if (loaned_pages_target > oom_freed_pages)
-		loaned_pages_target -= oom_freed_pages;
+	target = page_loan_request + (signed long)loaned_pages;
+
+	if (target < 0 || total_pages < min_mem_pages)
+		target = 0;
+
+	if (target > oom_freed_pages)
+		target -= oom_freed_pages;
 	else
-		loaned_pages_target = 0;
+		target = 0;
 
-	active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
+	active_pages_target = total_pages - target;
 
-	if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
-		loaned_pages_target = totalram_pages + loaned_pages -
-			((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
+	if (min_mem_pages > active_pages_target)
+		target = total_pages - min_mem_pages;
+
+	if (target < 0)
+		target = 0;
+
+	loaned_pages_target = target;
 
 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
 		page_loan_request, loaned_pages, loaned_pages_target,
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
new file mode 100644
index 0000000..12df9e8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -0,0 +1,558 @@
+/*
+ * Support for dynamic reconfiguration for PCI, Memory, and CPU
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms.
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include "offline_states.h"
+
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/pSeries_reconfig.h>
+
+struct cc_workarea {
+	u32	drc_index;
+	u32	zero;
+	u32	name_offset;
+	u32	prop_length;
+	u32	prop_offset;
+};
+
+static void dlpar_free_cc_property(struct property *prop)
+{
+	kfree(prop->name);
+	kfree(prop->value);
+	kfree(prop);
+}
+
+static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
+{
+	struct property *prop;
+	char *name;
+	char *value;
+
+	prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+	if (!prop)
+		return NULL;
+
+	name = (char *)ccwa + ccwa->name_offset;
+	prop->name = kstrdup(name, GFP_KERNEL);
+
+	prop->length = ccwa->prop_length;
+	value = (char *)ccwa + ccwa->prop_offset;
+	prop->value = kzalloc(prop->length, GFP_KERNEL);
+	if (!prop->value) {
+		dlpar_free_cc_property(prop);
+		return NULL;
+	}
+
+	memcpy(prop->value, value, prop->length);
+	return prop;
+}
+
+static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa)
+{
+	struct device_node *dn;
+	char *name;
+
+	dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+	if (!dn)
+		return NULL;
+
+	/* The configure connector reported name does not contain a
+	 * preceeding '/', so we allocate a buffer large enough to
+	 * prepend this to the full_name.
+	 */
+	name = (char *)ccwa + ccwa->name_offset;
+	dn->full_name = kmalloc(strlen(name) + 2, GFP_KERNEL);
+	if (!dn->full_name) {
+		kfree(dn);
+		return NULL;
+	}
+
+	sprintf(dn->full_name, "/%s", name);
+	return dn;
+}
+
+static void dlpar_free_one_cc_node(struct device_node *dn)
+{
+	struct property *prop;
+
+	while (dn->properties) {
+		prop = dn->properties;
+		dn->properties = prop->next;
+		dlpar_free_cc_property(prop);
+	}
+
+	kfree(dn->full_name);
+	kfree(dn);
+}
+
+static void dlpar_free_cc_nodes(struct device_node *dn)
+{
+	if (dn->child)
+		dlpar_free_cc_nodes(dn->child);
+
+	if (dn->sibling)
+		dlpar_free_cc_nodes(dn->sibling);
+
+	dlpar_free_one_cc_node(dn);
+}
+
+#define NEXT_SIBLING    1
+#define NEXT_CHILD      2
+#define NEXT_PROPERTY   3
+#define PREV_PARENT     4
+#define MORE_MEMORY     5
+#define CALL_AGAIN	-2
+#define ERR_CFG_USE     -9003
+
+struct device_node *dlpar_configure_connector(u32 drc_index)
+{
+	struct device_node *dn;
+	struct device_node *first_dn = NULL;
+	struct device_node *last_dn = NULL;
+	struct property *property;
+	struct property *last_property = NULL;
+	struct cc_workarea *ccwa;
+	int cc_token;
+	int rc;
+
+	cc_token = rtas_token("ibm,configure-connector");
+	if (cc_token == RTAS_UNKNOWN_SERVICE)
+		return NULL;
+
+	spin_lock(&rtas_data_buf_lock);
+	ccwa = (struct cc_workarea *)&rtas_data_buf[0];
+	ccwa->drc_index = drc_index;
+	ccwa->zero = 0;
+
+	rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL);
+	while (rc) {
+		switch (rc) {
+		case NEXT_SIBLING:
+			dn = dlpar_parse_cc_node(ccwa);
+			if (!dn)
+				goto cc_error;
+
+			dn->parent = last_dn->parent;
+			last_dn->sibling = dn;
+			last_dn = dn;
+			break;
+
+		case NEXT_CHILD:
+			dn = dlpar_parse_cc_node(ccwa);
+			if (!dn)
+				goto cc_error;
+
+			if (!first_dn)
+				first_dn = dn;
+			else {
+				dn->parent = last_dn;
+				if (last_dn)
+					last_dn->child = dn;
+			}
+
+			last_dn = dn;
+			break;
+
+		case NEXT_PROPERTY:
+			property = dlpar_parse_cc_property(ccwa);
+			if (!property)
+				goto cc_error;
+
+			if (!last_dn->properties)
+				last_dn->properties = property;
+			else
+				last_property->next = property;
+
+			last_property = property;
+			break;
+
+		case PREV_PARENT:
+			last_dn = last_dn->parent;
+			break;
+
+		case CALL_AGAIN:
+			break;
+
+		case MORE_MEMORY:
+		case ERR_CFG_USE:
+		default:
+			printk(KERN_ERR "Unexpected Error (%d) "
+			       "returned from configure-connector\n", rc);
+			goto cc_error;
+		}
+
+		rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL);
+	}
+
+	spin_unlock(&rtas_data_buf_lock);
+	return first_dn;
+
+cc_error:
+	if (first_dn)
+		dlpar_free_cc_nodes(first_dn);
+	spin_unlock(&rtas_data_buf_lock);
+	return NULL;
+}
+
+static struct device_node *derive_parent(const char *path)
+{
+	struct device_node *parent;
+	char *last_slash;
+
+	last_slash = strrchr(path, '/');
+	if (last_slash == path) {
+		parent = of_find_node_by_path("/");
+	} else {
+		char *parent_path;
+		int parent_path_len = last_slash - path + 1;
+		parent_path = kmalloc(parent_path_len, GFP_KERNEL);
+		if (!parent_path)
+			return NULL;
+
+		strlcpy(parent_path, path, parent_path_len);
+		parent = of_find_node_by_path(parent_path);
+		kfree(parent_path);
+	}
+
+	return parent;
+}
+
+int dlpar_attach_node(struct device_node *dn)
+{
+	struct proc_dir_entry *ent;
+	int rc;
+
+	of_node_set_flag(dn, OF_DYNAMIC);
+	kref_init(&dn->kref);
+	dn->parent = derive_parent(dn->full_name);
+	if (!dn->parent)
+		return -ENOMEM;
+
+	rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+					  PSERIES_RECONFIG_ADD, dn);
+	if (rc == NOTIFY_BAD) {
+		printk(KERN_ERR "Failed to add device node %s\n",
+		       dn->full_name);
+		return -ENOMEM; /* For now, safe to assume kmalloc failure */
+	}
+
+	of_attach_node(dn);
+
+#ifdef CONFIG_PROC_DEVICETREE
+	ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
+	if (ent)
+		proc_device_tree_add_node(dn, ent);
+#endif
+
+	of_node_put(dn->parent);
+	return 0;
+}
+
+int dlpar_detach_node(struct device_node *dn)
+{
+	struct device_node *parent = dn->parent;
+	struct property *prop = dn->properties;
+
+#ifdef CONFIG_PROC_DEVICETREE
+	while (prop) {
+		remove_proc_entry(prop->name, dn->pde);
+		prop = prop->next;
+	}
+
+	if (dn->pde)
+		remove_proc_entry(dn->pde->name, parent->pde);
+#endif
+
+	blocking_notifier_call_chain(&pSeries_reconfig_chain,
+			    PSERIES_RECONFIG_REMOVE, dn);
+	of_detach_node(dn);
+	of_node_put(dn); /* Must decrement the refcount */
+
+	return 0;
+}
+
+#define DR_ENTITY_SENSE		9003
+#define DR_ENTITY_PRESENT	1
+#define DR_ENTITY_UNUSABLE	2
+#define ALLOCATION_STATE	9003
+#define ALLOC_UNUSABLE		0
+#define ALLOC_USABLE		1
+#define ISOLATION_STATE		9001
+#define ISOLATE			0
+#define UNISOLATE		1
+
+int dlpar_acquire_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+		       DR_ENTITY_SENSE, drc_index);
+	if (rc || dr_status != DR_ENTITY_UNUSABLE)
+		return -1;
+
+	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+	if (rc)
+		return rc;
+
+	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+	if (rc) {
+		rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+		return rc;
+	}
+
+	return 0;
+}
+
+int dlpar_release_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+		       DR_ENTITY_SENSE, drc_index);
+	if (rc || dr_status != DR_ENTITY_PRESENT)
+		return -1;
+
+	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+	if (rc)
+		return rc;
+
+	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+	if (rc) {
+		rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+		return rc;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+
+static DEFINE_MUTEX(pseries_cpu_hotplug_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+	mutex_lock(&pseries_cpu_hotplug_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+	mutex_unlock(&pseries_cpu_hotplug_mutex);
+}
+
+static int dlpar_online_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const u32 *intserv;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != intserv[i])
+				continue;
+			BUG_ON(get_cpu_current_state(cpu)
+					!= CPU_STATE_OFFLINE);
+			cpu_maps_update_done();
+			rc = cpu_up(cpu);
+			if (rc)
+				goto out;
+			cpu_maps_update_begin();
+
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to online "
+			       "with physical id 0x%x\n", intserv[i]);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+{
+	struct device_node *dn;
+	unsigned long drc_index;
+	char *cpu_name;
+	int rc;
+
+	cpu_hotplug_driver_lock();
+	rc = strict_strtoul(buf, 0, &drc_index);
+	if (rc) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	dn = dlpar_configure_connector(drc_index);
+	if (!dn) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* configure-connector reports cpus as living in the base
+	 * directory of the device tree.  CPUs actually live in the
+	 * cpus directory so we need to fixup the full_name.
+	 */
+	cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus") + 1,
+			   GFP_KERNEL);
+	if (!cpu_name) {
+		dlpar_free_cc_nodes(dn);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	sprintf(cpu_name, "/cpus%s", dn->full_name);
+	kfree(dn->full_name);
+	dn->full_name = cpu_name;
+
+	rc = dlpar_acquire_drc(drc_index);
+	if (rc) {
+		dlpar_free_cc_nodes(dn);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = dlpar_attach_node(dn);
+	if (rc) {
+		dlpar_release_drc(drc_index);
+		dlpar_free_cc_nodes(dn);
+	}
+
+	rc = dlpar_online_cpu(dn);
+out:
+	cpu_hotplug_driver_unlock();
+
+	return rc ? rc : count;
+}
+
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const u32 *intserv;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != intserv[i])
+				continue;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+				break;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+				cpu_maps_update_done();
+				rc = cpu_down(cpu);
+				if (rc)
+					goto out;
+				cpu_maps_update_begin();
+				break;
+
+			}
+
+			/*
+			 * The cpu is in CPU_STATE_INACTIVE.
+			 * Upgrade it's state to CPU_STATE_OFFLINE.
+			 */
+			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+			BUG_ON(plpar_hcall_norets(H_PROD, intserv[i])
+								!= H_SUCCESS);
+			__cpu_die(cpu);
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to offline "
+			       "with physical id 0x%x\n", intserv[i]);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static ssize_t dlpar_cpu_release(const char *buf, size_t count)
+{
+	struct device_node *dn;
+	const u32 *drc_index;
+	int rc;
+
+	dn = of_find_node_by_path(buf);
+	if (!dn)
+		return -EINVAL;
+
+	drc_index = of_get_property(dn, "ibm,my-drc-index", NULL);
+	if (!drc_index) {
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	cpu_hotplug_driver_lock();
+	rc = dlpar_offline_cpu(dn);
+	if (rc) {
+		of_node_put(dn);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = dlpar_release_drc(*drc_index);
+	if (rc) {
+		of_node_put(dn);
+		goto out;
+	}
+
+	rc = dlpar_detach_node(dn);
+	if (rc) {
+		dlpar_acquire_drc(*drc_index);
+		goto out;
+	}
+
+	of_node_put(dn);
+out:
+	cpu_hotplug_driver_unlock();
+	return rc ? rc : count;
+}
+
+static int __init pseries_dlpar_init(void)
+{
+	ppc_md.cpu_probe = dlpar_cpu_probe;
+	ppc_md.cpu_release = dlpar_cpu_release;
+
+	return 0;
+}
+machine_device_initcall(pseries, pseries_dlpar_init);
+
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 0e8db67..ef8e454 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -63,22 +63,6 @@
 }
 #endif
 
-/** 
- * irq_in_use - return true if this irq is being used 
- */
-static int irq_in_use(unsigned int irq)
-{
-	int rc = 0;
-	unsigned long flags;
-   struct irq_desc *desc = irq_desc + irq;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (desc->action)
-		rc = 1;
-	spin_unlock_irqrestore(&desc->lock, flags);
-	return rc;
-}
-
 /**
  * eeh_disable_irq - disable interrupt for the recovering device
  */
@@ -93,7 +77,7 @@
 	if (dev->msi_enabled || dev->msix_enabled)
 		return;
 
-	if (!irq_in_use(dev->irq))
+	if (!irq_has_action(dev->irq))
 		return;
 
 	PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index ebff6d9..6ea4698 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -30,6 +30,7 @@
 #include <asm/pSeries_reconfig.h>
 #include "xics.h"
 #include "plpar_wrappers.h"
+#include "offline_states.h"
 
 /* This version can't take the spinlock, because it never returns */
 static struct rtas_args rtas_stop_self_args = {
@@ -39,6 +40,55 @@
 	.rets = &rtas_stop_self_args.args[0],
 };
 
+static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
+							CPU_STATE_OFFLINE;
+static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
+
+static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
+
+static int cede_offline_enabled __read_mostly = 1;
+
+/*
+ * Enable/disable cede_offline when available.
+ */
+static int __init setup_cede_offline(char *str)
+{
+	if (!strcmp(str, "off"))
+		cede_offline_enabled = 0;
+	else if (!strcmp(str, "on"))
+		cede_offline_enabled = 1;
+	else
+		return 0;
+	return 1;
+}
+
+__setup("cede_offline=", setup_cede_offline);
+
+enum cpu_state_vals get_cpu_current_state(int cpu)
+{
+	return per_cpu(current_state, cpu);
+}
+
+void set_cpu_current_state(int cpu, enum cpu_state_vals state)
+{
+	per_cpu(current_state, cpu) = state;
+}
+
+enum cpu_state_vals get_preferred_offline_state(int cpu)
+{
+	return per_cpu(preferred_offline_state, cpu);
+}
+
+void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
+{
+	per_cpu(preferred_offline_state, cpu) = state;
+}
+
+void set_default_offline_state(int cpu)
+{
+	per_cpu(preferred_offline_state, cpu) = default_offline_state;
+}
+
 static void rtas_stop_self(void)
 {
 	struct rtas_args *args = &rtas_stop_self_args;
@@ -56,11 +106,61 @@
 
 static void pseries_mach_cpu_die(void)
 {
+	unsigned int cpu = smp_processor_id();
+	unsigned int hwcpu = hard_smp_processor_id();
+	u8 cede_latency_hint = 0;
+
 	local_irq_disable();
 	idle_task_exit();
 	xics_teardown_cpu();
-	unregister_slb_shadow(hard_smp_processor_id(), __pa(get_slb_shadow()));
-	rtas_stop_self();
+
+	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
+		cede_latency_hint = 2;
+
+		get_lppaca()->idle = 1;
+		if (!get_lppaca()->shared_proc)
+			get_lppaca()->donate_dedicated_cpu = 1;
+
+		printk(KERN_INFO
+			"cpu %u (hwid %u) ceding for offline with hint %d\n",
+			cpu, hwcpu, cede_latency_hint);
+		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+			extended_cede_processor(cede_latency_hint);
+			printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n",
+				cpu, hwcpu);
+			printk(KERN_INFO
+			"Decrementer value = %x Timebase value = %llx\n",
+			get_dec(), get_tb());
+		}
+
+		printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n",
+			cpu, hwcpu);
+
+		if (!get_lppaca()->shared_proc)
+			get_lppaca()->donate_dedicated_cpu = 0;
+		get_lppaca()->idle = 0;
+	}
+
+	if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
+		unregister_slb_shadow(hwcpu, __pa(get_slb_shadow()));
+
+		/*
+		 * NOTE: Calling start_secondary() here for now to
+		 * start new context.
+		 * However, need to do it cleanly by resetting the
+		 * stack pointer.
+		 */
+		start_secondary();
+
+	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
+
+		set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
+		unregister_slb_shadow(hard_smp_processor_id(),
+					__pa(get_slb_shadow()));
+		rtas_stop_self();
+	}
+
 	/* Should never get here... */
 	BUG();
 	for(;;);
@@ -106,18 +206,43 @@
 	return 0;
 }
 
+/*
+ * pseries_cpu_die: Wait for the cpu to die.
+ * @cpu: logical processor id of the CPU whose death we're awaiting.
+ *
+ * This function is called from the context of the thread which is performing
+ * the cpu-offline. Here we wait for long enough to allow the cpu in question
+ * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
+ * notifications.
+ *
+ * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
+ * self-destruct.
+ */
 static void pseries_cpu_die(unsigned int cpu)
 {
 	int tries;
-	int cpu_status;
+	int cpu_status = 1;
 	unsigned int pcpu = get_hard_smp_processor_id(cpu);
 
-	for (tries = 0; tries < 25; tries++) {
-		cpu_status = query_cpu_stopped(pcpu);
-		if (cpu_status == 0 || cpu_status == -1)
-			break;
-		cpu_relax();
+	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
+		cpu_status = 1;
+		for (tries = 0; tries < 1000; tries++) {
+			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
+				cpu_status = 0;
+				break;
+			}
+			cpu_relax();
+		}
+	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
+
+		for (tries = 0; tries < 25; tries++) {
+			cpu_status = query_cpu_stopped(pcpu);
+			if (cpu_status == 0 || cpu_status == -1)
+				break;
+			cpu_relax();
+		}
 	}
+
 	if (cpu_status != 0) {
 		printk("Querying DEAD? cpu %i (%i) shows %i\n",
 		       cpu, pcpu, cpu_status);
@@ -252,10 +377,41 @@
 	.notifier_call = pseries_smp_notifier,
 };
 
+#define MAX_CEDE_LATENCY_LEVELS		4
+#define	CEDE_LATENCY_PARAM_LENGTH	10
+#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
+	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
+#define CEDE_LATENCY_TOKEN		45
+
+static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
+
+static int parse_cede_parameters(void)
+{
+	int call_status;
+
+	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
+	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				NULL,
+				CEDE_LATENCY_TOKEN,
+				__pa(cede_parameters),
+				CEDE_LATENCY_PARAM_MAX_LENGTH);
+
+	if (call_status != 0)
+		printk(KERN_INFO "CEDE_LATENCY: \
+			%s %s Error calling get-system-parameter(0x%x)\n",
+			__FILE__, __func__, call_status);
+	else
+		printk(KERN_INFO "CEDE_LATENCY: \
+			get-system-parameter successful.\n");
+
+	return call_status;
+}
+
 static int __init pseries_cpu_hotplug_init(void)
 {
 	struct device_node *np;
 	const char *typep;
+	int cpu;
 
 	for_each_node_by_name(np, "interrupt-controller") {
 		typep = of_get_property(np, "compatible", NULL);
@@ -283,8 +439,16 @@
 	smp_ops->cpu_die = pseries_cpu_die;
 
 	/* Processors can be added/removed only on LPAR */
-	if (firmware_has_feature(FW_FEATURE_LPAR))
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
 		pSeries_reconfig_notifier_register(&pseries_smp_nb);
+		cpu_maps_update_begin();
+		if (cede_offline_enabled && parse_cede_parameters() == 0) {
+			default_offline_state = CPU_STATE_INACTIVE;
+			for_each_online_cpu(cpu)
+				set_default_offline_state(cpu);
+		}
+		cpu_maps_update_done();
+	}
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h
new file mode 100644
index 0000000..22574e0
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/offline_states.h
@@ -0,0 +1,18 @@
+#ifndef _OFFLINE_STATES_H_
+#define _OFFLINE_STATES_H_
+
+/* Cpu offline states go here */
+enum cpu_state_vals {
+	CPU_STATE_OFFLINE,
+	CPU_STATE_INACTIVE,
+	CPU_STATE_ONLINE,
+	CPU_MAX_OFFLINE_STATES
+};
+
+extern enum cpu_state_vals get_cpu_current_state(int cpu);
+extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
+extern enum cpu_state_vals get_preferred_offline_state(int cpu);
+extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
+extern void set_default_offline_state(int cpu);
+extern int start_secondary(void);
+#endif
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index a24a6b23..0603c91 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -9,11 +9,33 @@
 	return plpar_hcall_norets(H_POLL_PENDING);
 }
 
+static inline u8 get_cede_latency_hint(void)
+{
+	return get_lppaca()->gpr5_dword.fields.cede_latency_hint;
+}
+
+static inline void set_cede_latency_hint(u8 latency_hint)
+{
+	get_lppaca()->gpr5_dword.fields.cede_latency_hint = latency_hint;
+}
+
 static inline long cede_processor(void)
 {
 	return plpar_hcall_norets(H_CEDE);
 }
 
+static inline long extended_cede_processor(unsigned long latency_hint)
+{
+	long rc;
+	u8 old_latency_hint = get_cede_latency_hint();
+
+	set_cede_latency_hint(latency_hint);
+	rc = cede_processor();
+	set_cede_latency_hint(old_latency_hint);
+
+	return rc;
+}
+
 static inline long vpa_call(unsigned long flags, unsigned long cpu,
 		unsigned long vpa)
 {
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 2e2bbe1..a2305d2 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -96,7 +96,7 @@
 	return parent;
 }
 
-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
+BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
 
 int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {
@@ -184,7 +184,7 @@
 }
 
 /*
- * /proc/ppc64/ofdt - yucky binary interface for adding and removing
+ * /proc/powerpc/ofdt - yucky binary interface for adding and removing
  * OF device nodes.  Should be deprecated as soon as we get an
  * in-kernel wrapper for the RTAS ibm,configure-connector call.
  */
@@ -543,7 +543,7 @@
 	.write = ofdt_write
 };
 
-/* create /proc/ppc64/ofdt write-only by root */
+/* create /proc/powerpc/ofdt write-only by root */
 static int proc_ppc64_create_ofdt(void)
 {
 	struct proc_dir_entry *ent;
@@ -551,7 +551,7 @@
 	if (!machine_is(pseries))
 		return 0;
 
-	ent = proc_create("ppc64/ofdt", S_IWUSR, NULL, &ofdt_fops);
+	ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops);
 	if (ent)
 		ent->size = 0;
 
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index 417eca7..1b45c45 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -13,7 +13,7 @@
  * of this data using this driver.  A dump exists if the device-tree
  * /chosen/ibm,scan-log-data property exists.
  *
- * This driver exports /proc/ppc64/scan-log-dump which can be read.
+ * This driver exports /proc/powerpc/scan-log-dump which can be read.
  * The driver supports only sequential reads.
  *
  * The driver looks at a write to the driver for the single word "reset".
@@ -186,7 +186,7 @@
 	if (!data)
 		goto err;
 
-	ent = proc_create_data("ppc64/rtas/scan-log-dump", S_IRUSR, NULL,
+	ent = proc_create_data("powerpc/rtas/scan-log-dump", S_IRUSR, NULL,
 			       &scanlog_fops, data);
 	if (!ent)
 		goto err;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 440000c..8868c01 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -48,6 +48,7 @@
 #include "plpar_wrappers.h"
 #include "pseries.h"
 #include "xics.h"
+#include "offline_states.h"
 
 
 /*
@@ -84,6 +85,9 @@
 	/* Fixup atomic count: it exited inside IRQ handler. */
 	task_thread_info(paca[lcpu].__current)->preempt_count	= 0;
 
+	if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
+		goto out;
+
 	/* 
 	 * If the RTAS start-cpu token does not exist then presume the
 	 * cpu is already spinning.
@@ -98,6 +102,7 @@
 		return 0;
 	}
 
+out:
 	return 1;
 }
 
@@ -111,12 +116,16 @@
 		vpa_init(cpu);
 
 	cpu_clear(cpu, of_spin_map);
+	set_cpu_current_state(cpu, CPU_STATE_ONLINE);
+	set_default_offline_state(cpu);
 
 }
 #endif /* CONFIG_XICS */
 
 static void __devinit smp_pSeries_kick_cpu(int nr)
 {
+	long rc;
+	unsigned long hcpuid;
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	if (!smp_startup_cpu(nr))
@@ -128,6 +137,16 @@
 	 * the processor will continue on to secondary_start
 	 */
 	paca[nr].cpu_start = 1;
+
+	set_preferred_offline_state(nr, CPU_STATE_ONLINE);
+
+	if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
+		hcpuid = get_hard_smp_processor_id(nr);
+		rc = plpar_hcall_norets(H_PROD, hcpuid);
+		if (rc != H_SUCCESS)
+			panic("Error: Prod to wake up processor %d Ret= %ld\n",
+				nr, rc);
+	}
 }
 
 static int smp_pSeries_cpu_bootable(unsigned int nr)
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index b9bf0ee..7d01b58 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -20,6 +20,7 @@
 #include <linux/cpu.h>
 #include <linux/msi.h>
 #include <linux/of.h>
+#include <linux/percpu.h>
 
 #include <asm/firmware.h>
 #include <asm/io.h>
@@ -46,6 +47,12 @@
  */
 #define IPI_PRIORITY		4
 
+/* The least favored priority */
+#define LOWEST_PRIORITY		0xFF
+
+/* The number of priorities defined above */
+#define MAX_NUM_PRIORITIES	3
+
 static unsigned int default_server = 0xFF;
 static unsigned int default_distrib_server = 0;
 static unsigned int interrupt_server_size = 8;
@@ -56,6 +63,12 @@
 static int ibm_int_on;
 static int ibm_int_off;
 
+struct xics_cppr {
+	unsigned char stack[MAX_NUM_PRIORITIES];
+	int index;
+};
+
+static DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
 
 /* Direct hardware low level accessors */
 
@@ -157,7 +170,7 @@
 	cpumask_t cpumask;
 	cpumask_t tmp = CPU_MASK_NONE;
 
-	cpumask_copy(&cpumask, irq_desc[virq].affinity);
+	cpumask_copy(&cpumask, irq_to_desc(virq)->affinity);
 	if (!distribute_irqs)
 		return default_server;
 
@@ -284,6 +297,19 @@
 	return xirr & 0x00ffffff;
 }
 
+static void push_cppr(unsigned int vec)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
+		return;
+
+	if (vec == XICS_IPI)
+		os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
+	else
+		os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
+}
+
 static unsigned int xics_get_irq_direct(void)
 {
 	unsigned int xirr = direct_xirr_info_get();
@@ -294,8 +320,10 @@
 		return NO_IRQ;
 
 	irq = irq_radix_revmap_lookup(xics_host, vec);
-	if (likely(irq != NO_IRQ))
+	if (likely(irq != NO_IRQ)) {
+		push_cppr(vec);
 		return irq;
+	}
 
 	/* We don't have a linux mapping, so have rtas mask it. */
 	xics_mask_unknown_vec(vec);
@@ -315,8 +343,10 @@
 		return NO_IRQ;
 
 	irq = irq_radix_revmap_lookup(xics_host, vec);
-	if (likely(irq != NO_IRQ))
+	if (likely(irq != NO_IRQ)) {
+		push_cppr(vec);
 		return irq;
+	}
 
 	/* We don't have a linux mapping, so have RTAS mask it. */
 	xics_mask_unknown_vec(vec);
@@ -326,12 +356,22 @@
 	return NO_IRQ;
 }
 
+static unsigned char pop_cppr(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	if (WARN_ON(os_cppr->index < 1))
+		return LOWEST_PRIORITY;
+
+	return os_cppr->stack[--os_cppr->index];
+}
+
 static void xics_eoi_direct(unsigned int virq)
 {
 	unsigned int irq = (unsigned int)irq_map[virq].hwirq;
 
 	iosync();
-	direct_xirr_info_set((0xff << 24) | irq);
+	direct_xirr_info_set((pop_cppr() << 24) | irq);
 }
 
 static void xics_eoi_lpar(unsigned int virq)
@@ -339,7 +379,7 @@
 	unsigned int irq = (unsigned int)irq_map[virq].hwirq;
 
 	iosync();
-	lpar_xirr_info_set((0xff << 24) | irq);
+	lpar_xirr_info_set((pop_cppr() << 24) | irq);
 }
 
 static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
@@ -388,7 +428,7 @@
 }
 
 static struct irq_chip xics_pic_direct = {
-	.typename = " XICS     ",
+	.name = " XICS     ",
 	.startup = xics_startup,
 	.mask = xics_mask_irq,
 	.unmask = xics_unmask_irq,
@@ -397,7 +437,7 @@
 };
 
 static struct irq_chip xics_pic_lpar = {
-	.typename = " XICS     ",
+	.name = " XICS     ",
 	.startup = xics_startup,
 	.mask = xics_mask_irq,
 	.unmask = xics_unmask_irq,
@@ -428,13 +468,13 @@
 	/* Insert the interrupt mapping into the radix tree for fast lookup */
 	irq_radix_revmap_insert(xics_host, virq, hw);
 
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
 	return 0;
 }
 
 static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
-			   u32 *intspec, unsigned int intsize,
+			   const u32 *intspec, unsigned int intsize,
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
@@ -746,6 +786,12 @@
 
 static void xics_set_cpu_priority(unsigned char cppr)
 {
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	BUG_ON(os_cppr->index != 0);
+
+	os_cppr->stack[os_cppr->index] = cppr;
+
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		lpar_cppr_info(cppr);
 	else
@@ -772,7 +818,7 @@
 
 void xics_setup_cpu(void)
 {
-	xics_set_cpu_priority(0xff);
+	xics_set_cpu_priority(LOWEST_PRIORITY);
 
 	xics_set_cpu_giq(default_distrib_server, 1);
 }
@@ -852,7 +898,7 @@
 		/* We need to get IPIs still. */
 		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
 			continue;
-		desc = get_irq_desc(virq);
+		desc = irq_to_desc(virq);
 
 		/* We only need to migrate enabled IRQS */
 		if (desc == NULL || desc->chip == NULL
@@ -881,7 +927,7 @@
 		       virq, cpu);
 
 		/* Reset affinity to all cpus */
-		cpumask_setall(irq_desc[virq].affinity);
+		cpumask_setall(irq_to_desc(virq)->affinity);
 		desc->chip->set_affinity(virq, cpu_all_mask);
 unlock:
 		spin_unlock_irqrestore(&desc->lock, flags);
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 9d4b174..5642924 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -16,6 +16,7 @@
 obj-$(CONFIG_MMIO_NVRAM)	+= mmio_nvram.o
 obj-$(CONFIG_FSL_SOC)		+= fsl_soc.o
 obj-$(CONFIG_FSL_PCI)		+= fsl_pci.o $(fsl-msi-obj-y)
+obj-$(CONFIG_FSL_PMC)		+= fsl_pmc.o
 obj-$(CONFIG_FSL_LBC)		+= fsl_lbc.o
 obj-$(CONFIG_FSL_GTM)		+= fsl_gtm.o
 obj-$(CONFIG_MPC8xxx_GPIO)	+= mpc8xxx_gpio.o
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index 82424cd..a4b41db 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -77,7 +77,7 @@
 }
 
 static struct irq_chip cpm_pic = {
-	.typename = " CPM PIC ",
+	.name = " CPM PIC ",
 	.mask = cpm_mask_irq,
 	.unmask = cpm_unmask_irq,
 	.eoi = cpm_end_irq,
@@ -102,7 +102,7 @@
 {
 	pr_debug("cpm_pic_host_map(%d, 0x%lx)\n", virq, hw);
 
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, &cpm_pic, handle_fasteoi_irq);
 	return 0;
 }
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index 78f1f7c..971483f 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -115,11 +115,13 @@
 
 static void cpm2_end_irq(unsigned int virq)
 {
+	struct irq_desc *desc;
 	int	bit, word;
 	unsigned int irq_nr = virq_to_hw(virq);
 
-	if (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS))
-			&& irq_desc[irq_nr].action) {
+	desc = irq_to_desc(irq_nr);
+	if (!(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))
+			&& desc->action) {
 
 		bit = irq_to_siubit[irq_nr];
 		word = irq_to_siureg[irq_nr];
@@ -138,7 +140,7 @@
 static int cpm2_set_irq_type(unsigned int virq, unsigned int flow_type)
 {
 	unsigned int src = virq_to_hw(virq);
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	unsigned int vold, vnew, edibit;
 
 	if (flow_type == IRQ_TYPE_NONE)
@@ -182,7 +184,7 @@
 }
 
 static struct irq_chip cpm2_pic = {
-	.typename = " CPM2 SIU ",
+	.name = " CPM2 SIU ",
 	.mask = cpm2_mask_irq,
 	.unmask = cpm2_unmask_irq,
 	.ack = cpm2_ack,
@@ -210,13 +212,13 @@
 {
 	pr_debug("cpm2_pic_host_map(%d, 0x%lx)\n", virq, hw);
 
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, &cpm2_pic, handle_level_irq);
 	return 0;
 }
 
 static int cpm2_pic_host_xlate(struct irq_host *h, struct device_node *ct,
-			    u32 *intspec, unsigned int intsize,
+			    const u32 *intspec, unsigned int intsize,
 			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 {
 	*out_hwirq = intspec[0];
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index e4b6d66..9de72c9 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -72,7 +72,7 @@
 /* Max address size we deal with */
 #define OF_MAX_ADDR_CELLS	4
 
-int __init cpm_muram_init(void)
+int cpm_muram_init(void)
 {
 	struct device_node *np;
 	struct resource r;
@@ -81,6 +81,9 @@
 	int i = 0;
 	int ret = 0;
 
+	if (muram_pbase)
+		return 0;
+
 	spin_lock_init(&cpm_muram_lock);
 	/* initialize the info header */
 	rh_init(&cpm_muram_info, 1,
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index da38a1f..62e5025 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -47,7 +47,7 @@
 	.mask		= mask_msi_irq,
 	.unmask		= unmask_msi_irq,
 	.ack		= fsl_msi_end_irq,
-	.typename	= " FSL-MSI  ",
+	.name	= " FSL-MSI  ",
 };
 
 static int fsl_msi_host_map(struct irq_host *h, unsigned int virq,
@@ -55,7 +55,7 @@
 {
 	struct irq_chip *chip = &fsl_msi_chip;
 
-	get_irq_desc(virq)->status |= IRQ_TYPE_EDGE_FALLING;
+	irq_to_desc(virq)->status |= IRQ_TYPE_EDGE_FALLING;
 
 	set_irq_chip_and_handler(virq, chip, handle_edge_irq);
 
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ae88b14..4e3a3e3 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -56,7 +56,7 @@
 	return 0;
 }
 
-#if defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_86xx)
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
 static int __init setup_one_atmu(struct ccsr_pci __iomem *pci,
 	unsigned int index, const struct resource *res,
 	resource_size_t offset)
@@ -392,9 +392,23 @@
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8641, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8641D, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8610, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1011E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1011, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020, quirk_fsl_pcie_header);
-#endif /* CONFIG_PPC_85xx || CONFIG_PPC_86xx */
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080, quirk_fsl_pcie_header);
+#endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */
 
 #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x)
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_MPC8314E, quirk_fsl_pcie_header);
diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c
new file mode 100644
index 0000000..a7635a9
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_pmc.c
@@ -0,0 +1,88 @@
+/*
+ * Suspend/resume support
+ *
+ * Copyright 2009  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/of_platform.h>
+
+struct pmc_regs {
+	__be32 devdisr;
+	__be32 devdisr2;
+	__be32 :32;
+	__be32 :32;
+	__be32 pmcsr;
+#define PMCSR_SLP	(1 << 17)
+};
+
+static struct device *pmc_dev;
+static struct pmc_regs __iomem *pmc_regs;
+
+static int pmc_suspend_enter(suspend_state_t state)
+{
+	int ret;
+
+	setbits32(&pmc_regs->pmcsr, PMCSR_SLP);
+	/* At this point, the CPU is asleep. */
+
+	/* Upon resume, wait for SLP bit to be clear. */
+	ret = spin_event_timeout((in_be32(&pmc_regs->pmcsr) & PMCSR_SLP) == 0,
+				 10000, 10) ? 0 : -ETIMEDOUT;
+	if (ret)
+		dev_err(pmc_dev, "tired waiting for SLP bit to clear\n");
+	return ret;
+}
+
+static int pmc_suspend_valid(suspend_state_t state)
+{
+	if (state != PM_SUSPEND_STANDBY)
+		return 0;
+	return 1;
+}
+
+static struct platform_suspend_ops pmc_suspend_ops = {
+	.valid = pmc_suspend_valid,
+	.enter = pmc_suspend_enter,
+};
+
+static int pmc_probe(struct of_device *ofdev, const struct of_device_id *id)
+{
+	pmc_regs = of_iomap(ofdev->node, 0);
+	if (!pmc_regs)
+		return -ENOMEM;
+
+	pmc_dev = &ofdev->dev;
+	suspend_set_ops(&pmc_suspend_ops);
+	return 0;
+}
+
+static const struct of_device_id pmc_ids[] = {
+	{ .compatible = "fsl,mpc8548-pmc", },
+	{ .compatible = "fsl,mpc8641d-pmc", },
+	{ },
+};
+
+static struct of_platform_driver pmc_driver = {
+	.driver.name = "fsl-pmc",
+	.match_table = pmc_ids,
+	.probe = pmc_probe,
+};
+
+static int __init pmc_init(void)
+{
+	return of_register_platform_driver(&pmc_driver);
+}
+device_initcall(pmc_init);
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index adca4af..b91f7ac 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -372,7 +372,7 @@
 
 arch_initcall(fsl_usb_of_init);
 
-#if defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_86xx)
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
 static __be32 __iomem *rstcr;
 
 static int __init setup_rstcr(void)
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index a96584a..0a55db8 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -135,7 +135,7 @@
 }
 
 static struct irq_chip i8259_pic = {
-	.typename	= " i8259    ",
+	.name		= " i8259    ",
 	.mask		= i8259_mask_irq,
 	.disable	= i8259_mask_irq,
 	.unmask		= i8259_unmask_irq,
@@ -175,12 +175,12 @@
 
 	/* We block the internal cascade */
 	if (hw == 2)
-		get_irq_desc(virq)->status |= IRQ_NOREQUEST;
+		irq_to_desc(virq)->status |= IRQ_NOREQUEST;
 
 	/* We use the level handler only for now, we might want to
 	 * be more cautious here but that works for now
 	 */
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 	set_irq_chip_and_handler(virq, &i8259_pic, handle_level_irq);
 	return 0;
 }
@@ -198,7 +198,7 @@
 }
 
 static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
-			    u32 *intspec, unsigned int intsize,
+			    const u32 *intspec, unsigned int intsize,
 			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 {
 	static unsigned char map_isa_senses[4] = {
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index cb7689c..28cdddd 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -605,7 +605,7 @@
 {
 	struct ipic *ipic = ipic_from_irq(virq);
 	unsigned int src = ipic_irq_to_hw(virq);
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	unsigned int vold, vnew, edibit;
 
 	if (flow_type == IRQ_TYPE_NONE)
@@ -660,7 +660,7 @@
 
 /* level interrupts and edge interrupts have different ack operations */
 static struct irq_chip ipic_level_irq_chip = {
-	.typename	= " IPIC  ",
+	.name		= " IPIC  ",
 	.unmask		= ipic_unmask_irq,
 	.mask		= ipic_mask_irq,
 	.mask_ack	= ipic_mask_irq,
@@ -668,7 +668,7 @@
 };
 
 static struct irq_chip ipic_edge_irq_chip = {
-	.typename	= " IPIC  ",
+	.name		= " IPIC  ",
 	.unmask		= ipic_unmask_irq,
 	.mask		= ipic_mask_irq,
 	.mask_ack	= ipic_mask_irq_and_ack,
@@ -697,7 +697,7 @@
 }
 
 static int ipic_host_xlate(struct irq_host *h, struct device_node *ct,
-			   u32 *intspec, unsigned int intsize,
+			   const u32 *intspec, unsigned int intsize,
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index 5d2d552..69bd6f4 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -72,7 +72,7 @@
 
 static int mpc8xx_set_irq_type(unsigned int virq, unsigned int flow_type)
 {
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 
 	desc->status &= ~(IRQ_TYPE_SENSE_MASK | IRQ_LEVEL);
 	desc->status |= flow_type & IRQ_TYPE_SENSE_MASK;
@@ -94,7 +94,7 @@
 }
 
 static struct irq_chip mpc8xx_pic = {
-	.typename = " MPC8XX SIU ",
+	.name = " MPC8XX SIU ",
 	.unmask = mpc8xx_unmask_irq,
 	.mask = mpc8xx_mask_irq,
 	.ack = mpc8xx_ack,
@@ -130,7 +130,7 @@
 
 
 static int mpc8xx_pic_host_xlate(struct irq_host *h, struct device_node *ct,
-			    u32 *intspec, unsigned int intsize,
+			    const u32 *intspec, unsigned int intsize,
 			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 {
 	static unsigned char map_pic_senses[4] = {
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 30c44e6..aa9d06e 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -572,7 +572,7 @@
 	cpumask_t mask;
 	int cpuid;
 
-	cpumask_copy(&mask, irq_desc[virt_irq].affinity);
+	cpumask_copy(&mask, irq_to_desc(virt_irq)->affinity);
 	if (cpus_equal(mask, CPU_MASK_ALL)) {
 		static int irq_rover;
 		static DEFINE_SPINLOCK(irq_rover_lock);
@@ -621,7 +621,7 @@
 	if (irq < NUM_ISA_INTERRUPTS)
 		return NULL;
 
-	return irq_desc[irq].chip_data;
+	return irq_to_desc(irq)->chip_data;
 }
 
 /* Determine if the linux irq is an IPI */
@@ -648,14 +648,14 @@
 /* Get the mpic structure from the IPI number */
 static inline struct mpic * mpic_from_ipi(unsigned int ipi)
 {
-	return irq_desc[ipi].chip_data;
+	return irq_to_desc(ipi)->chip_data;
 }
 #endif
 
 /* Get the mpic structure from the irq number */
 static inline struct mpic * mpic_from_irq(unsigned int irq)
 {
-	return irq_desc[irq].chip_data;
+	return irq_to_desc(irq)->chip_data;
 }
 
 /* Send an EOI */
@@ -735,7 +735,7 @@
 
 	mpic_unmask_irq(irq);
 
-	if (irq_desc[irq].status & IRQ_LEVEL)
+	if (irq_to_desc(irq)->status & IRQ_LEVEL)
 		mpic_ht_end_irq(mpic, src);
 }
 
@@ -745,7 +745,7 @@
 	unsigned int src = mpic_irq_to_hw(irq);
 
 	mpic_unmask_irq(irq);
-	mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status);
+	mpic_startup_ht_interrupt(mpic, src, irq_to_desc(irq)->status);
 
 	return 0;
 }
@@ -755,7 +755,7 @@
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
 
-	mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status);
+	mpic_shutdown_ht_interrupt(mpic, src, irq_to_desc(irq)->status);
 	mpic_mask_irq(irq);
 }
 
@@ -772,7 +772,7 @@
 	 * latched another edge interrupt coming in anyway
 	 */
 
-	if (irq_desc[irq].status & IRQ_LEVEL)
+	if (irq_to_desc(irq)->status & IRQ_LEVEL)
 		mpic_ht_end_irq(mpic, src);
 	mpic_eoi(mpic);
 }
@@ -856,7 +856,7 @@
 {
 	struct mpic *mpic = mpic_from_irq(virq);
 	unsigned int src = mpic_irq_to_hw(virq);
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	unsigned int vecpri, vold, vnew;
 
 	DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n",
@@ -994,7 +994,7 @@
 }
 
 static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
-			   u32 *intspec, unsigned int intsize,
+			   const u32 *intspec, unsigned int intsize,
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
@@ -1062,19 +1062,19 @@
 	mpic->name = name;
 
 	mpic->hc_irq = mpic_irq_chip;
-	mpic->hc_irq.typename = name;
+	mpic->hc_irq.name = name;
 	if (flags & MPIC_PRIMARY)
 		mpic->hc_irq.set_affinity = mpic_set_affinity;
 #ifdef CONFIG_MPIC_U3_HT_IRQS
 	mpic->hc_ht_irq = mpic_irq_ht_chip;
-	mpic->hc_ht_irq.typename = name;
+	mpic->hc_ht_irq.name = name;
 	if (flags & MPIC_PRIMARY)
 		mpic->hc_ht_irq.set_affinity = mpic_set_affinity;
 #endif /* CONFIG_MPIC_U3_HT_IRQS */
 
 #ifdef CONFIG_SMP
 	mpic->hc_ipi = mpic_ipi_chip;
-	mpic->hc_ipi.typename = name;
+	mpic->hc_ipi.name = name;
 #endif /* CONFIG_SMP */
 
 	mpic->flags = flags;
diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c
index 656cb77..0f6ab06 100644
--- a/arch/powerpc/sysdev/mpic_pasemi_msi.c
+++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c
@@ -60,7 +60,7 @@
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,
-	.typename	= "PASEMI-MSI ",
+	.name		= "PASEMI-MSI ",
 };
 
 static int pasemi_msi_check_device(struct pci_dev *pdev, int nvec, int type)
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index 0a8f5a9..d3caf23 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -42,7 +42,7 @@
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,
-	.typename	= "MPIC-U3MSI",
+	.name		= "MPIC-U3MSI",
 };
 
 static u64 read_ht_magic_addr(struct pci_dev *pdev, unsigned int pos)
diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c
index 2aa4ed0..485b924 100644
--- a/arch/powerpc/sysdev/mv64x60_pic.c
+++ b/arch/powerpc/sysdev/mv64x60_pic.c
@@ -213,7 +213,7 @@
 {
 	int level1;
 
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 
 	level1 = (hwirq & MV64x60_LEVEL1_MASK) >> MV64x60_LEVEL1_OFFSET;
 	BUG_ON(level1 > MV64x60_LEVEL1_GPP);
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index 464271b..149393c 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -27,6 +27,8 @@
 #include <linux/delay.h>
 #include <linux/ioport.h>
 #include <linux/crc32.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_platform.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -65,19 +67,6 @@
 
 static phys_addr_t qebase = -1;
 
-int qe_alive_during_sleep(void)
-{
-	static int ret = -1;
-
-	if (ret != -1)
-		return ret;
-
-	ret = !of_find_compatible_node(NULL, NULL, "fsl,mpc8569-pmc");
-
-	return ret;
-}
-EXPORT_SYMBOL(qe_alive_during_sleep);
-
 phys_addr_t get_qe_base(void)
 {
 	struct device_node *qe;
@@ -104,7 +93,7 @@
 
 EXPORT_SYMBOL(get_qe_base);
 
-void __init qe_reset(void)
+void qe_reset(void)
 {
 	if (qe_immr == NULL)
 		qe_immr = ioremap(get_qe_base(), QE_IMMAP_SIZE);
@@ -330,16 +319,18 @@
 static int qe_sdma_init(void)
 {
 	struct sdma __iomem *sdma = &qe_immr->sdma;
-	unsigned long sdma_buf_offset;
+	static unsigned long sdma_buf_offset = (unsigned long)-ENOMEM;
 
 	if (!sdma)
 		return -ENODEV;
 
 	/* allocate 2 internal temporary buffers (512 bytes size each) for
 	 * the SDMA */
- 	sdma_buf_offset = qe_muram_alloc(512 * 2, 4096);
-	if (IS_ERR_VALUE(sdma_buf_offset))
-		return -ENOMEM;
+	if (IS_ERR_VALUE(sdma_buf_offset)) {
+		sdma_buf_offset = qe_muram_alloc(512 * 2, 4096);
+		if (IS_ERR_VALUE(sdma_buf_offset))
+			return -ENOMEM;
+	}
 
 	out_be32(&sdma->sdebcr, (u32) sdma_buf_offset & QE_SDEBCR_BA_MASK);
  	out_be32(&sdma->sdmr, (QE_SDMR_GLB_1_MSK |
@@ -349,7 +340,7 @@
 }
 
 /* The maximum number of RISCs we support */
-#define MAX_QE_RISC     2
+#define MAX_QE_RISC     4
 
 /* Firmware information stored here for qe_get_firmware_info() */
 static struct qe_firmware_info qe_firmware_info;
@@ -658,3 +649,35 @@
 	return num_of_snums;
 }
 EXPORT_SYMBOL(qe_get_num_of_snums);
+
+#if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC_85xx)
+static int qe_resume(struct of_device *ofdev)
+{
+	if (!qe_alive_during_sleep())
+		qe_reset();
+	return 0;
+}
+
+static int qe_probe(struct of_device *ofdev, const struct of_device_id *id)
+{
+	return 0;
+}
+
+static const struct of_device_id qe_ids[] = {
+	{ .compatible = "fsl,qe", },
+	{ },
+};
+
+static struct of_platform_driver qe_driver = {
+	.driver.name = "fsl-qe",
+	.match_table = qe_ids,
+	.probe = qe_probe,
+	.resume = qe_resume,
+};
+
+static int __init qe_drv_init(void)
+{
+	return of_register_platform_driver(&qe_driver);
+}
+device_initcall(qe_drv_init);
+#endif /* defined(CONFIG_SUSPEND) && defined(CONFIG_PPC_85xx) */
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index 3faa42e..2acc928 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c
@@ -189,7 +189,7 @@
 
 static inline struct qe_ic *qe_ic_from_irq(unsigned int virq)
 {
-	return irq_desc[virq].chip_data;
+	return irq_to_desc(virq)->chip_data;
 }
 
 #define virq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
@@ -237,7 +237,7 @@
 }
 
 static struct irq_chip qe_ic_irq_chip = {
-	.typename = " QEIC  ",
+	.name = " QEIC  ",
 	.unmask = qe_ic_unmask_irq,
 	.mask = qe_ic_mask_irq,
 	.mask_ack = qe_ic_mask_irq,
@@ -263,7 +263,7 @@
 	chip = &qe_ic->hc_irq;
 
 	set_irq_chip_data(virq, qe_ic);
-	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	irq_to_desc(virq)->status |= IRQ_LEVEL;
 
 	set_irq_chip_and_handler(virq, chip, handle_level_irq);
 
@@ -271,7 +271,7 @@
 }
 
 static int qe_ic_host_xlate(struct irq_host *h, struct device_node *ct,
-			    u32 * intspec, unsigned int intsize,
+			    const u32 * intspec, unsigned int intsize,
 			    irq_hw_number_t * out_hwirq,
 			    unsigned int *out_flags)
 {
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index cf244a4..595034c 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -376,7 +376,7 @@
  */
 
 static struct irq_chip tsi108_pci_irq = {
-	.typename = "tsi108_PCI_int",
+	.name = "tsi108_PCI_int",
 	.mask = tsi108_pci_irq_disable,
 	.ack = tsi108_pci_irq_ack,
 	.end = tsi108_pci_irq_end,
@@ -384,7 +384,7 @@
 };
 
 static int pci_irq_host_xlate(struct irq_host *h, struct device_node *ct,
-			    u32 *intspec, unsigned int intsize,
+			    const u32 *intspec, unsigned int intsize,
 			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 {
 	*out_hwirq = intspec[0];
@@ -398,7 +398,7 @@
 	DBG("%s(%d, 0x%lx)\n", __func__, virq, hw);
 	if ((virq >= 1) && (virq <= 4)){
 		irq = virq + IRQ_PCI_INTAD_BASE - 1;
-		get_irq_desc(irq)->status |= IRQ_LEVEL;
+		irq_to_desc(irq)->status |= IRQ_LEVEL;
 		set_irq_chip(irq, &tsi108_pci_irq);
 	}
 	return 0;
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 466ce9a..7d10074 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -57,7 +57,7 @@
 
 static void uic_unmask_irq(unsigned int virq)
 {
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	struct uic *uic = get_irq_chip_data(virq);
 	unsigned int src = uic_irq_to_hw(virq);
 	unsigned long flags;
@@ -101,7 +101,7 @@
 
 static void uic_mask_ack_irq(unsigned int virq)
 {
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	struct uic *uic = get_irq_chip_data(virq);
 	unsigned int src = uic_irq_to_hw(virq);
 	unsigned long flags;
@@ -129,7 +129,7 @@
 {
 	struct uic *uic = get_irq_chip_data(virq);
 	unsigned int src = uic_irq_to_hw(virq);
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 	unsigned long flags;
 	int trigger, polarity;
 	u32 tr, pr, mask;
@@ -177,7 +177,7 @@
 }
 
 static struct irq_chip uic_irq_chip = {
-	.typename	= " UIC  ",
+	.name		= " UIC  ",
 	.unmask		= uic_unmask_irq,
 	.mask		= uic_mask_irq,
  	.mask_ack	= uic_mask_ack_irq,
@@ -202,7 +202,7 @@
 }
 
 static int uic_host_xlate(struct irq_host *h, struct device_node *ct,
-			  u32 *intspec, unsigned int intsize,
+			  const u32 *intspec, unsigned int intsize,
 			  irq_hw_number_t *out_hwirq, unsigned int *out_type)
 
 {
diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c
index 40edad5..1e0ccfa 100644
--- a/arch/powerpc/sysdev/xilinx_intc.c
+++ b/arch/powerpc/sysdev/xilinx_intc.c
@@ -79,7 +79,7 @@
 
 static int xilinx_intc_set_type(unsigned int virq, unsigned int flow_type)
 {
-	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_desc *desc = irq_to_desc(virq);
 
 	desc->status &= ~(IRQ_TYPE_SENSE_MASK | IRQ_LEVEL);
 	desc->status |= flow_type & IRQ_TYPE_SENSE_MASK;
@@ -106,7 +106,7 @@
 }
 
 static struct irq_chip xilinx_intc_level_irqchip = {
-	.typename = "Xilinx Level INTC",
+	.name = "Xilinx Level INTC",
 	.mask = xilinx_intc_mask,
 	.mask_ack = xilinx_intc_mask,
 	.unmask = xilinx_intc_level_unmask,
@@ -133,7 +133,7 @@
 }
 
 static struct irq_chip xilinx_intc_edge_irqchip = {
-	.typename = "Xilinx Edge  INTC",
+	.name = "Xilinx Edge  INTC",
 	.mask = xilinx_intc_mask,
 	.unmask = xilinx_intc_edge_unmask,
 	.ack = xilinx_intc_edge_ack,
@@ -148,7 +148,7 @@
  * xilinx_intc_xlate - translate virq# from device tree interrupts property
  */
 static int xilinx_intc_xlate(struct irq_host *h, struct device_node *ct,
-				u32 *intspec, unsigned int intsize,
+				const u32 *intspec, unsigned int intsize,
 				irq_hw_number_t *out_hwirq,
 				unsigned int *out_flags)
 {
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index bdbe96c..4e6152c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1641,7 +1641,8 @@
 			       ptrLpPaca->saved_srr0, ptrLpPaca->saved_srr1);
 			printf("    Saved Gpr3=%.16lx  Saved Gpr4=%.16lx \n",
 			       ptrLpPaca->saved_gpr3, ptrLpPaca->saved_gpr4);
-			printf("    Saved Gpr5=%.16lx \n", ptrLpPaca->saved_gpr5);
+			printf("    Saved Gpr5=%.16lx \n",
+				ptrLpPaca->gpr5_dword.saved_gpr5);
 		}
 #endif
 
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 676f08b..85844d0 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -790,5 +790,15 @@
 
 	  If unsure, say N.
 
+config PATA_MACIO
+	tristate "Apple PowerMac/PowerBook internal 'MacIO' IDE"
+	depends on PPC_PMAC
+	help
+	  Most IDE capable PowerMacs have IDE busses driven by a variant
+          of this controller which is part of the Apple chipset used on
+          most PowerMac models. Some models have multiple busses using
+          different chipsets, though generally, MacIO is one of them.
+
+
 endif # ATA_SFF
 endif # ATA
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index d909435..fc936d4 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -18,6 +18,7 @@
 obj-$(CONFIG_SATA_INIC162X)	+= sata_inic162x.o
 obj-$(CONFIG_PDC_ADMA)		+= pdc_adma.o
 obj-$(CONFIG_SATA_FSL)		+= sata_fsl.o
+obj-$(CONFIG_PATA_MACIO)	+= pata_macio.o
 
 obj-$(CONFIG_PATA_ALI)		+= pata_ali.o
 obj-$(CONFIG_PATA_AMD)		+= pata_amd.o
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
new file mode 100644
index 0000000..4cc7bbd
--- /dev/null
+++ b/drivers/ata/pata_macio.c
@@ -0,0 +1,1427 @@
+/*
+ * Libata based driver for Apple "macio" family of PATA controllers
+ *
+ * Copyright 2008/2009 Benjamin Herrenschmidt, IBM Corp
+ *                     <benh@kernel.crashing.org>
+ *
+ * Some bits and pieces from drivers/ide/ppc/pmac.c
+ *
+ */
+
+#undef DEBUG
+#undef DEBUG_DMA
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/ata.h>
+#include <linux/libata.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/scatterlist.h>
+#include <linux/of.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+
+#include <asm/macio.h>
+#include <asm/io.h>
+#include <asm/dbdma.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/mediabay.h>
+
+#ifdef DEBUG_DMA
+#define dev_dbgdma(dev, format, arg...)		\
+	dev_printk(KERN_DEBUG , dev , format , ## arg)
+#else
+#define dev_dbgdma(dev, format, arg...)		\
+	({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; })
+#endif
+
+#define DRV_NAME	"pata_macio"
+#define DRV_VERSION	"0.9"
+
+/* Models of macio ATA controller */
+enum {
+	controller_ohare,	/* OHare based */
+	controller_heathrow,	/* Heathrow/Paddington */
+	controller_kl_ata3,	/* KeyLargo ATA-3 */
+	controller_kl_ata4,	/* KeyLargo ATA-4 */
+	controller_un_ata6,	/* UniNorth2 ATA-6 */
+	controller_k2_ata6,	/* K2 ATA-6 */
+	controller_sh_ata6,	/* Shasta ATA-6 */
+};
+
+static const char* macio_ata_names[] = {
+	"OHare ATA",		/* OHare based */
+	"Heathrow ATA",		/* Heathrow/Paddington */
+	"KeyLargo ATA-3",	/* KeyLargo ATA-3 (MDMA only) */
+	"KeyLargo ATA-4",	/* KeyLargo ATA-4 (UDMA/66) */
+	"UniNorth ATA-6",	/* UniNorth2 ATA-6 (UDMA/100) */
+	"K2 ATA-6",		/* K2 ATA-6 (UDMA/100) */
+	"Shasta ATA-6",		/* Shasta ATA-6 (UDMA/133) */
+};
+
+/*
+ * Extra registers, both 32-bit little-endian
+ */
+#define IDE_TIMING_CONFIG	0x200
+#define IDE_INTERRUPT		0x300
+
+/* Kauai (U2) ATA has different register setup */
+#define IDE_KAUAI_PIO_CONFIG	0x200
+#define IDE_KAUAI_ULTRA_CONFIG	0x210
+#define IDE_KAUAI_POLL_CONFIG	0x220
+
+/*
+ * Timing configuration register definitions
+ */
+
+/* Number of IDE_SYSCLK_NS ticks, argument is in nanoseconds */
+#define SYSCLK_TICKS(t)		(((t) + IDE_SYSCLK_NS - 1) / IDE_SYSCLK_NS)
+#define SYSCLK_TICKS_66(t)	(((t) + IDE_SYSCLK_66_NS - 1) / IDE_SYSCLK_66_NS)
+#define IDE_SYSCLK_NS		30	/* 33Mhz cell */
+#define IDE_SYSCLK_66_NS	15	/* 66Mhz cell */
+
+/* 133Mhz cell, found in shasta.
+ * See comments about 100 Mhz Uninorth 2...
+ * Note that PIO_MASK and MDMA_MASK seem to overlap, that's just
+ * weird and I don't now why .. at this stage
+ */
+#define TR_133_PIOREG_PIO_MASK		0xff000fff
+#define TR_133_PIOREG_MDMA_MASK		0x00fff800
+#define TR_133_UDMAREG_UDMA_MASK	0x0003ffff
+#define TR_133_UDMAREG_UDMA_EN		0x00000001
+
+/* 100Mhz cell, found in Uninorth 2 and K2. It appears as a pci device
+ * (106b/0033) on uninorth or K2 internal PCI bus and it's clock is
+ * controlled like gem or fw. It appears to be an evolution of keylargo
+ * ATA4 with a timing register extended to 2x32bits registers (one
+ * for PIO & MWDMA and one for UDMA, and a similar DBDMA channel.
+ * It has it's own local feature control register as well.
+ *
+ * After scratching my mind over the timing values, at least for PIO
+ * and MDMA, I think I've figured the format of the timing register,
+ * though I use pre-calculated tables for UDMA as usual...
+ */
+#define TR_100_PIO_ADDRSETUP_MASK	0xff000000 /* Size of field unknown */
+#define TR_100_PIO_ADDRSETUP_SHIFT	24
+#define TR_100_MDMA_MASK		0x00fff000
+#define TR_100_MDMA_RECOVERY_MASK	0x00fc0000
+#define TR_100_MDMA_RECOVERY_SHIFT	18
+#define TR_100_MDMA_ACCESS_MASK		0x0003f000
+#define TR_100_MDMA_ACCESS_SHIFT	12
+#define TR_100_PIO_MASK			0xff000fff
+#define TR_100_PIO_RECOVERY_MASK	0x00000fc0
+#define TR_100_PIO_RECOVERY_SHIFT	6
+#define TR_100_PIO_ACCESS_MASK		0x0000003f
+#define TR_100_PIO_ACCESS_SHIFT		0
+
+#define TR_100_UDMAREG_UDMA_MASK	0x0000ffff
+#define TR_100_UDMAREG_UDMA_EN		0x00000001
+
+
+/* 66Mhz cell, found in KeyLargo. Can do ultra mode 0 to 2 on
+ * 40 connector cable and to 4 on 80 connector one.
+ * Clock unit is 15ns (66Mhz)
+ *
+ * 3 Values can be programmed:
+ *  - Write data setup, which appears to match the cycle time. They
+ *    also call it DIOW setup.
+ *  - Ready to pause time (from spec)
+ *  - Address setup. That one is weird. I don't see where exactly
+ *    it fits in UDMA cycles, I got it's name from an obscure piece
+ *    of commented out code in Darwin. They leave it to 0, we do as
+ *    well, despite a comment that would lead to think it has a
+ *    min value of 45ns.
+ * Apple also add 60ns to the write data setup (or cycle time ?) on
+ * reads.
+ */
+#define TR_66_UDMA_MASK			0xfff00000
+#define TR_66_UDMA_EN			0x00100000 /* Enable Ultra mode for DMA */
+#define TR_66_PIO_ADDRSETUP_MASK	0xe0000000 /* Address setup */
+#define TR_66_PIO_ADDRSETUP_SHIFT	29
+#define TR_66_UDMA_RDY2PAUS_MASK	0x1e000000 /* Ready 2 pause time */
+#define TR_66_UDMA_RDY2PAUS_SHIFT	25
+#define TR_66_UDMA_WRDATASETUP_MASK	0x01e00000 /* Write data setup time */
+#define TR_66_UDMA_WRDATASETUP_SHIFT	21
+#define TR_66_MDMA_MASK			0x000ffc00
+#define TR_66_MDMA_RECOVERY_MASK	0x000f8000
+#define TR_66_MDMA_RECOVERY_SHIFT	15
+#define TR_66_MDMA_ACCESS_MASK		0x00007c00
+#define TR_66_MDMA_ACCESS_SHIFT		10
+#define TR_66_PIO_MASK			0xe00003ff
+#define TR_66_PIO_RECOVERY_MASK		0x000003e0
+#define TR_66_PIO_RECOVERY_SHIFT	5
+#define TR_66_PIO_ACCESS_MASK		0x0000001f
+#define TR_66_PIO_ACCESS_SHIFT		0
+
+/* 33Mhz cell, found in OHare, Heathrow (& Paddington) and KeyLargo
+ * Can do pio & mdma modes, clock unit is 30ns (33Mhz)
+ *
+ * The access time and recovery time can be programmed. Some older
+ * Darwin code base limit OHare to 150ns cycle time. I decided to do
+ * the same here fore safety against broken old hardware ;)
+ * The HalfTick bit, when set, adds half a clock (15ns) to the access
+ * time and removes one from recovery. It's not supported on KeyLargo
+ * implementation afaik. The E bit appears to be set for PIO mode 0 and
+ * is used to reach long timings used in this mode.
+ */
+#define TR_33_MDMA_MASK			0x003ff800
+#define TR_33_MDMA_RECOVERY_MASK	0x001f0000
+#define TR_33_MDMA_RECOVERY_SHIFT	16
+#define TR_33_MDMA_ACCESS_MASK		0x0000f800
+#define TR_33_MDMA_ACCESS_SHIFT		11
+#define TR_33_MDMA_HALFTICK		0x00200000
+#define TR_33_PIO_MASK			0x000007ff
+#define TR_33_PIO_E			0x00000400
+#define TR_33_PIO_RECOVERY_MASK		0x000003e0
+#define TR_33_PIO_RECOVERY_SHIFT	5
+#define TR_33_PIO_ACCESS_MASK		0x0000001f
+#define TR_33_PIO_ACCESS_SHIFT		0
+
+/*
+ * Interrupt register definitions. Only present on newer cells
+ * (Keylargo and later afaik) so we don't use it.
+ */
+#define IDE_INTR_DMA			0x80000000
+#define IDE_INTR_DEVICE			0x40000000
+
+/*
+ * FCR Register on Kauai. Not sure what bit 0x4 is  ...
+ */
+#define KAUAI_FCR_UATA_MAGIC		0x00000004
+#define KAUAI_FCR_UATA_RESET_N		0x00000002
+#define KAUAI_FCR_UATA_ENABLE		0x00000001
+
+
+/* Allow up to 256 DBDMA commands per xfer */
+#define MAX_DCMDS		256
+
+/* Don't let a DMA segment go all the way to 64K */
+#define MAX_DBDMA_SEG		0xff00
+
+
+/*
+ * Wait 1s for disk to answer on IDE bus after a hard reset
+ * of the device (via GPIO/FCR).
+ *
+ * Some devices seem to "pollute" the bus even after dropping
+ * the BSY bit (typically some combo drives slave on the UDMA
+ * bus) after a hard reset. Since we hard reset all drives on
+ * KeyLargo ATA66, we have to keep that delay around. I may end
+ * up not hard resetting anymore on these and keep the delay only
+ * for older interfaces instead (we have to reset when coming
+ * from MacOS...) --BenH.
+ */
+#define IDE_WAKEUP_DELAY_MS	1000
+
+struct pata_macio_timing;
+
+struct pata_macio_priv {
+	int				kind;
+	int				aapl_bus_id;
+	int				mediabay : 1;
+	struct device_node		*node;
+	struct macio_dev		*mdev;
+	struct pci_dev			*pdev;
+	struct device			*dev;
+	int				irq;
+	u32				treg[2][2];
+	void __iomem			*tfregs;
+	void __iomem			*kauai_fcr;
+	struct dbdma_cmd *		dma_table_cpu;
+	dma_addr_t			dma_table_dma;
+	struct ata_host			*host;
+	const struct pata_macio_timing	*timings;
+};
+
+/* Previous variants of this driver used to calculate timings
+ * for various variants of the chip and use tables for others.
+ *
+ * Not only was this confusing, but in addition, it isn't clear
+ * whether our calculation code was correct. It didn't entirely
+ * match the darwin code and whatever documentation I could find
+ * on these cells
+ *
+ * I decided to entirely rely on a table instead for this version
+ * of the driver. Also, because I don't really care about derated
+ * modes and really old HW other than making it work, I'm not going
+ * to calculate / snoop timing values for something else than the
+ * standard modes.
+ */
+struct pata_macio_timing {
+	int	mode;
+	u32	reg1;	/* Bits to set in first timing reg */
+	u32	reg2;	/* Bits to set in second timing reg */
+};
+
+static const struct pata_macio_timing pata_macio_ohare_timings[] = {
+	{ XFER_PIO_0,		0x00000526,	0, },
+	{ XFER_PIO_1,		0x00000085,	0, },
+	{ XFER_PIO_2,		0x00000025,	0, },
+	{ XFER_PIO_3,		0x00000025,	0, },
+	{ XFER_PIO_4,		0x00000025,	0, },
+	{ XFER_MW_DMA_0,	0x00074000,	0, },
+	{ XFER_MW_DMA_1,	0x00221000,	0, },
+	{ XFER_MW_DMA_2,	0x00211000,	0, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_heathrow_timings[] = {
+	{ XFER_PIO_0,		0x00000526,	0, },
+	{ XFER_PIO_1,		0x00000085,	0, },
+	{ XFER_PIO_2,		0x00000025,	0, },
+	{ XFER_PIO_3,		0x00000025,	0, },
+	{ XFER_PIO_4,		0x00000025,	0, },
+	{ XFER_MW_DMA_0,	0x00074000,	0, },
+	{ XFER_MW_DMA_1,	0x00221000,	0, },
+	{ XFER_MW_DMA_2,	0x00211000,	0, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_kl33_timings[] = {
+	{ XFER_PIO_0,		0x00000526,	0, },
+	{ XFER_PIO_1,		0x00000085,	0, },
+	{ XFER_PIO_2,		0x00000025,	0, },
+	{ XFER_PIO_3,		0x00000025,	0, },
+	{ XFER_PIO_4,		0x00000025,	0, },
+	{ XFER_MW_DMA_0,	0x00084000,	0, },
+	{ XFER_MW_DMA_1,	0x00021800,	0, },
+	{ XFER_MW_DMA_2,	0x00011800,	0, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_kl66_timings[] = {
+	{ XFER_PIO_0,		0x0000038c,	0, },
+	{ XFER_PIO_1,		0x0000020a,	0, },
+	{ XFER_PIO_2,		0x00000127,	0, },
+	{ XFER_PIO_3,		0x000000c6,	0, },
+	{ XFER_PIO_4,		0x00000065,	0, },
+	{ XFER_MW_DMA_0,	0x00084000,	0, },
+	{ XFER_MW_DMA_1,	0x00029800,	0, },
+	{ XFER_MW_DMA_2,	0x00019400,	0, },
+	{ XFER_UDMA_0,		0x19100000,	0, },
+	{ XFER_UDMA_1,		0x14d00000,	0, },
+	{ XFER_UDMA_2,		0x10900000,	0, },
+	{ XFER_UDMA_3,		0x0c700000,	0, },
+	{ XFER_UDMA_4,		0x0c500000,	0, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_kauai_timings[] = {
+	{ XFER_PIO_0,		0x08000a92,	0, },
+	{ XFER_PIO_1,		0x0800060f,	0, },
+	{ XFER_PIO_2,		0x0800038b,	0, },
+	{ XFER_PIO_3,		0x05000249,	0, },
+	{ XFER_PIO_4,		0x04000148,	0, },
+	{ XFER_MW_DMA_0,	0x00618000,	0, },
+	{ XFER_MW_DMA_1,	0x00209000,	0, },
+	{ XFER_MW_DMA_2,	0x00148000,	0, },
+	{ XFER_UDMA_0,		         0,	0x000070c1, },
+	{ XFER_UDMA_1,		         0,	0x00005d81, },
+	{ XFER_UDMA_2,		         0,	0x00004a61, },
+	{ XFER_UDMA_3,		         0,	0x00003a51, },
+	{ XFER_UDMA_4,		         0,	0x00002a31, },
+	{ XFER_UDMA_5,		         0,	0x00002921, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing pata_macio_shasta_timings[] = {
+	{ XFER_PIO_0,		0x0a000c97,	0, },
+	{ XFER_PIO_1,		0x07000712,	0, },
+	{ XFER_PIO_2,		0x040003cd,	0, },
+	{ XFER_PIO_3,		0x0500028b,	0, },
+	{ XFER_PIO_4,		0x0400010a,	0, },
+	{ XFER_MW_DMA_0,	0x00820800,	0, },
+	{ XFER_MW_DMA_1,	0x0028b000,	0, },
+	{ XFER_MW_DMA_2,	0x001ca000,	0, },
+	{ XFER_UDMA_0,		         0,	0x00035901, },
+	{ XFER_UDMA_1,		         0,	0x000348b1, },
+	{ XFER_UDMA_2,		         0,	0x00033881, },
+	{ XFER_UDMA_3,		         0,	0x00033861, },
+	{ XFER_UDMA_4,		         0,	0x00033841, },
+	{ XFER_UDMA_5,		         0,	0x00033031, },
+	{ XFER_UDMA_6,		         0,	0x00033021, },
+	{ -1, 0, 0 }
+};
+
+static const struct pata_macio_timing *pata_macio_find_timing(
+					    struct pata_macio_priv *priv,
+					    int mode)
+{
+	int i;
+
+	for (i = 0; priv->timings[i].mode > 0; i++) {
+		if (priv->timings[i].mode == mode)
+			return &priv->timings[i];
+	}
+	return NULL;
+}
+
+
+static void pata_macio_apply_timings(struct ata_port *ap, unsigned int device)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+	void __iomem *rbase = ap->ioaddr.cmd_addr;
+
+	if (priv->kind == controller_sh_ata6 ||
+	    priv->kind == controller_un_ata6 ||
+	    priv->kind == controller_k2_ata6) {
+		writel(priv->treg[device][0], rbase + IDE_KAUAI_PIO_CONFIG);
+		writel(priv->treg[device][1], rbase + IDE_KAUAI_ULTRA_CONFIG);
+	} else
+		writel(priv->treg[device][0], rbase + IDE_TIMING_CONFIG);
+}
+
+static void pata_macio_dev_select(struct ata_port *ap, unsigned int device)
+{
+	ata_sff_dev_select(ap, device);
+
+	/* Apply timings */
+	pata_macio_apply_timings(ap, device);
+}
+
+static void pata_macio_set_timings(struct ata_port *ap,
+				   struct ata_device *adev)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+	const struct pata_macio_timing *t;
+
+	dev_dbg(priv->dev, "Set timings: DEV=%d,PIO=0x%x (%s),DMA=0x%x (%s)\n",
+		adev->devno,
+		adev->pio_mode,
+		ata_mode_string(ata_xfer_mode2mask(adev->pio_mode)),
+		adev->dma_mode,
+		ata_mode_string(ata_xfer_mode2mask(adev->dma_mode)));
+
+	/* First clear timings */
+	priv->treg[adev->devno][0] = priv->treg[adev->devno][1] = 0;
+
+	/* Now get the PIO timings */
+	t = pata_macio_find_timing(priv, adev->pio_mode);
+	if (t == NULL) {
+		dev_warn(priv->dev, "Invalid PIO timing requested: 0x%x\n",
+			 adev->pio_mode);
+		t = pata_macio_find_timing(priv, XFER_PIO_0);
+	}
+	BUG_ON(t == NULL);
+
+	/* PIO timings only ever use the first treg */
+	priv->treg[adev->devno][0] |= t->reg1;
+
+	/* Now get DMA timings */
+	t = pata_macio_find_timing(priv, adev->dma_mode);
+	if (t == NULL || (t->reg1 == 0 && t->reg2 == 0)) {
+		dev_dbg(priv->dev, "DMA timing not set yet, using MW_DMA_0\n");
+		t = pata_macio_find_timing(priv, XFER_MW_DMA_0);
+	}
+	BUG_ON(t == NULL);
+
+	/* DMA timings can use both tregs */
+	priv->treg[adev->devno][0] |= t->reg1;
+	priv->treg[adev->devno][1] |= t->reg2;
+
+	dev_dbg(priv->dev, " -> %08x %08x\n",
+		priv->treg[adev->devno][0],
+		priv->treg[adev->devno][1]);
+
+	/* Apply to hardware */
+	pata_macio_apply_timings(ap, adev->devno);
+}
+
+/*
+ * Blast some well known "safe" values to the timing registers at init or
+ * wakeup from sleep time, before we do real calculation
+ */
+static void pata_macio_default_timings(struct pata_macio_priv *priv)
+{
+	unsigned int value, value2 = 0;
+
+	switch(priv->kind) {
+		case controller_sh_ata6:
+			value = 0x0a820c97;
+			value2 = 0x00033031;
+			break;
+		case controller_un_ata6:
+		case controller_k2_ata6:
+			value = 0x08618a92;
+			value2 = 0x00002921;
+			break;
+		case controller_kl_ata4:
+			value = 0x0008438c;
+			break;
+		case controller_kl_ata3:
+			value = 0x00084526;
+			break;
+		case controller_heathrow:
+		case controller_ohare:
+		default:
+			value = 0x00074526;
+			break;
+	}
+	priv->treg[0][0] = priv->treg[1][0] = value;
+	priv->treg[0][1] = priv->treg[1][1] = value2;
+}
+
+static int pata_macio_cable_detect(struct ata_port *ap)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+
+	/* Get cable type from device-tree */
+	if (priv->kind == controller_kl_ata4 ||
+	    priv->kind == controller_un_ata6 ||
+	    priv->kind == controller_k2_ata6 ||
+	    priv->kind == controller_sh_ata6) {
+		const char* cable = of_get_property(priv->node, "cable-type",
+						    NULL);
+		struct device_node *root = of_find_node_by_path("/");
+		const char *model = of_get_property(root, "model", NULL);
+
+		if (cable && !strncmp(cable, "80-", 3)) {
+			/* Some drives fail to detect 80c cable in PowerBook
+			 * These machine use proprietary short IDE cable
+			 * anyway
+			 */
+			if (!strncmp(model, "PowerBook", 9))
+				return ATA_CBL_PATA40_SHORT;
+			else
+				return ATA_CBL_PATA80;
+		}
+	}
+
+	/* G5's seem to have incorrect cable type in device-tree.
+	 * Let's assume they always have a 80 conductor cable, this seem to
+	 * be always the case unless the user mucked around
+	 */
+	if (of_device_is_compatible(priv->node, "K2-UATA") ||
+	    of_device_is_compatible(priv->node, "shasta-ata"))
+		return ATA_CBL_PATA80;
+
+	/* Anything else is 40 connectors */
+	return ATA_CBL_PATA40;
+}
+
+static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
+{
+	unsigned int write = (qc->tf.flags & ATA_TFLAG_WRITE);
+	struct ata_port *ap = qc->ap;
+	struct pata_macio_priv *priv = ap->private_data;
+	struct scatterlist *sg;
+	struct dbdma_cmd *table;
+	unsigned int si, pi;
+
+	dev_dbgdma(priv->dev, "%s: qc %p flags %lx, write %d dev %d\n",
+		   __func__, qc, qc->flags, write, qc->dev->devno);
+
+	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+		return;
+
+	table = (struct dbdma_cmd *) priv->dma_table_cpu;
+
+	pi = 0;
+	for_each_sg(qc->sg, sg, qc->n_elem, si) {
+		u32 addr, sg_len, len;
+
+		/* determine if physical DMA addr spans 64K boundary.
+		 * Note h/w doesn't support 64-bit, so we unconditionally
+		 * truncate dma_addr_t to u32.
+		 */
+		addr = (u32) sg_dma_address(sg);
+		sg_len = sg_dma_len(sg);
+
+		while (sg_len) {
+			/* table overflow should never happen */
+			BUG_ON (pi++ >= MAX_DCMDS);
+
+			len = (sg_len < MAX_DBDMA_SEG) ? sg_len : MAX_DBDMA_SEG;
+			st_le16(&table->command, write ? OUTPUT_MORE: INPUT_MORE);
+			st_le16(&table->req_count, len);
+			st_le32(&table->phy_addr, addr);
+			table->cmd_dep = 0;
+			table->xfer_status = 0;
+			table->res_count = 0;
+			addr += len;
+			sg_len -= len;
+			++table;
+		}
+	}
+
+	/* Should never happen according to Tejun */
+	BUG_ON(!pi);
+
+	/* Convert the last command to an input/output */
+	table--;
+	st_le16(&table->command, write ? OUTPUT_LAST: INPUT_LAST);
+	table++;
+
+	/* Add the stop command to the end of the list */
+	memset(table, 0, sizeof(struct dbdma_cmd));
+	st_le16(&table->command, DBDMA_STOP);
+
+	dev_dbgdma(priv->dev, "%s: %d DMA list entries\n", __func__, pi);
+}
+
+
+static void pata_macio_freeze(struct ata_port *ap)
+{
+	struct dbdma_regs __iomem *dma_regs = ap->ioaddr.bmdma_addr;
+
+	if (dma_regs) {
+		unsigned int timeout = 1000000;
+
+		/* Make sure DMA controller is stopped */
+		writel((RUN|PAUSE|FLUSH|WAKE|DEAD) << 16, &dma_regs->control);
+		while (--timeout && (readl(&dma_regs->status) & RUN))
+			udelay(1);
+	}
+
+	ata_sff_freeze(ap);
+}
+
+
+static void pata_macio_bmdma_setup(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct pata_macio_priv *priv = ap->private_data;
+	struct dbdma_regs __iomem *dma_regs = ap->ioaddr.bmdma_addr;
+	int dev = qc->dev->devno;
+
+	dev_dbgdma(priv->dev, "%s: qc %p\n", __func__, qc);
+
+	/* Make sure DMA commands updates are visible */
+	writel(priv->dma_table_dma, &dma_regs->cmdptr);
+
+	/* On KeyLargo 66Mhz cell, we need to add 60ns to wrDataSetup on
+	 * UDMA reads
+	 */
+	if (priv->kind == controller_kl_ata4 &&
+	    (priv->treg[dev][0] & TR_66_UDMA_EN)) {
+		void __iomem *rbase = ap->ioaddr.cmd_addr;
+		u32 reg = priv->treg[dev][0];
+
+		if (!(qc->tf.flags & ATA_TFLAG_WRITE))
+			reg += 0x00800000;
+		writel(reg, rbase + IDE_TIMING_CONFIG);
+	}
+
+	/* issue r/w command */
+	ap->ops->sff_exec_command(ap, &qc->tf);
+}
+
+static void pata_macio_bmdma_start(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct pata_macio_priv *priv = ap->private_data;
+	struct dbdma_regs __iomem *dma_regs = ap->ioaddr.bmdma_addr;
+
+	dev_dbgdma(priv->dev, "%s: qc %p\n", __func__, qc);
+
+	writel((RUN << 16) | RUN, &dma_regs->control);
+	/* Make sure it gets to the controller right now */
+	(void)readl(&dma_regs->control);
+}
+
+static void pata_macio_bmdma_stop(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct pata_macio_priv *priv = ap->private_data;
+	struct dbdma_regs __iomem *dma_regs = ap->ioaddr.bmdma_addr;
+	unsigned int timeout = 1000000;
+
+	dev_dbgdma(priv->dev, "%s: qc %p\n", __func__, qc);
+
+	/* Stop the DMA engine and wait for it to full halt */
+	writel (((RUN|WAKE|DEAD) << 16), &dma_regs->control);
+	while (--timeout && (readl(&dma_regs->status) & RUN))
+		udelay(1);
+}
+
+static u8 pata_macio_bmdma_status(struct ata_port *ap)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+	struct dbdma_regs __iomem *dma_regs = ap->ioaddr.bmdma_addr;
+	u32 dstat, rstat = ATA_DMA_INTR;
+	unsigned long timeout = 0;
+
+	dstat = readl(&dma_regs->status);
+
+	dev_dbgdma(priv->dev, "%s: dstat=%x\n", __func__, dstat);
+
+	/* We have two things to deal with here:
+	 *
+	 * - The dbdma won't stop if the command was started
+	 * but completed with an error without transferring all
+	 * datas. This happens when bad blocks are met during
+	 * a multi-block transfer.
+	 *
+	 * - The dbdma fifo hasn't yet finished flushing to
+	 * to system memory when the disk interrupt occurs.
+	 *
+	 */
+
+	/* First check for errors */
+	if ((dstat & (RUN|DEAD)) != RUN)
+		rstat |= ATA_DMA_ERR;
+
+	/* If ACTIVE is cleared, the STOP command has been hit and
+	 * the transfer is complete. If not, we have to flush the
+	 * channel.
+	 */
+	if ((dstat & ACTIVE) == 0)
+		return rstat;
+
+	dev_dbgdma(priv->dev, "%s: DMA still active, flushing...\n", __func__);
+
+	/* If dbdma didn't execute the STOP command yet, the
+	 * active bit is still set. We consider that we aren't
+	 * sharing interrupts (which is hopefully the case with
+	 * those controllers) and so we just try to flush the
+	 * channel for pending data in the fifo
+	 */
+	udelay(1);
+	writel((FLUSH << 16) | FLUSH, &dma_regs->control);
+	for (;;) {
+		udelay(1);
+		dstat = readl(&dma_regs->status);
+		if ((dstat & FLUSH) == 0)
+			break;
+		if (++timeout > 1000) {
+			dev_warn(priv->dev, "timeout flushing DMA\n");
+			rstat |= ATA_DMA_ERR;
+			break;
+		}
+	}
+	return rstat;
+}
+
+/* port_start is when we allocate the DMA command list */
+static int pata_macio_port_start(struct ata_port *ap)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+
+	if (ap->ioaddr.bmdma_addr == NULL)
+		return 0;
+
+	/* Allocate space for the DBDMA commands.
+	 *
+	 * The +2 is +1 for the stop command and +1 to allow for
+	 * aligning the start address to a multiple of 16 bytes.
+	 */
+	priv->dma_table_cpu =
+		dmam_alloc_coherent(priv->dev,
+				    (MAX_DCMDS + 2) * sizeof(struct dbdma_cmd),
+				    &priv->dma_table_dma, GFP_KERNEL);
+	if (priv->dma_table_cpu == NULL) {
+		dev_err(priv->dev, "Unable to allocate DMA command list\n");
+		ap->ioaddr.bmdma_addr = NULL;
+	}
+	return 0;
+}
+
+static void pata_macio_irq_clear(struct ata_port *ap)
+{
+	struct pata_macio_priv *priv = ap->private_data;
+
+	/* Nothing to do here */
+
+	dev_dbgdma(priv->dev, "%s\n", __func__);
+}
+
+static void pata_macio_reset_hw(struct pata_macio_priv *priv, int resume)
+{
+	dev_dbg(priv->dev, "Enabling & resetting... \n");
+
+	if (priv->mediabay)
+		return;
+
+	if (priv->kind == controller_ohare && !resume) {
+		/* The code below is having trouble on some ohare machines
+		 * (timing related ?). Until I can put my hand on one of these
+		 * units, I keep the old way
+		 */
+		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, priv->node, 0, 1);
+	} else {
+		int rc;
+
+ 		/* Reset and enable controller */
+		rc = ppc_md.feature_call(PMAC_FTR_IDE_RESET,
+					 priv->node, priv->aapl_bus_id, 1);
+		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE,
+				    priv->node, priv->aapl_bus_id, 1);
+		msleep(10);
+		/* Only bother waiting if there's a reset control */
+		if (rc == 0) {
+			ppc_md.feature_call(PMAC_FTR_IDE_RESET,
+					    priv->node, priv->aapl_bus_id, 0);
+			msleep(IDE_WAKEUP_DELAY_MS);
+		}
+	}
+
+	/* If resuming a PCI device, restore the config space here */
+	if (priv->pdev && resume) {
+		int rc;
+
+		pci_restore_state(priv->pdev);
+		rc = pcim_enable_device(priv->pdev);
+		if (rc)
+			dev_printk(KERN_ERR, &priv->pdev->dev,
+				   "Failed to enable device after resume (%d)\n", rc);
+		else
+			pci_set_master(priv->pdev);
+	}
+
+	/* On Kauai, initialize the FCR. We don't perform a reset, doesn't really
+	 * seem necessary and speeds up the boot process
+	 */
+	if (priv->kauai_fcr)
+		writel(KAUAI_FCR_UATA_MAGIC |
+		       KAUAI_FCR_UATA_RESET_N |
+		       KAUAI_FCR_UATA_ENABLE, priv->kauai_fcr);
+}
+
+/* Hook the standard slave config to fixup some HW related alignment
+ * restrictions
+ */
+static int pata_macio_slave_config(struct scsi_device *sdev)
+{
+	struct ata_port *ap = ata_shost_to_port(sdev->host);
+	struct pata_macio_priv *priv = ap->private_data;
+	struct ata_device *dev;
+	u16 cmd;
+	int rc;
+
+	/* First call original */
+	rc = ata_scsi_slave_config(sdev);
+	if (rc)
+		return rc;
+
+	/* This is lifted from sata_nv */
+	dev = &ap->link.device[sdev->id];
+
+	/* OHare has issues with non cache aligned DMA on some chipsets */
+	if (priv->kind == controller_ohare) {
+		blk_queue_update_dma_alignment(sdev->request_queue, 31);
+		blk_queue_update_dma_pad(sdev->request_queue, 31);
+
+		/* Tell the world about it */
+		ata_dev_printk(dev, KERN_INFO, "OHare alignment limits applied\n");
+		return 0;
+	}
+
+	/* We only have issues with ATAPI */
+	if (dev->class != ATA_DEV_ATAPI)
+		return 0;
+
+	/* Shasta and K2 seem to have "issues" with reads ... */
+	if (priv->kind == controller_sh_ata6 || priv->kind == controller_k2_ata6) {
+		/* Allright these are bad, apply restrictions */
+		blk_queue_update_dma_alignment(sdev->request_queue, 15);
+		blk_queue_update_dma_pad(sdev->request_queue, 15);
+
+		/* We enable MWI and hack cache line size directly here, this
+		 * is specific to this chipset and not normal values, we happen
+		 * to somewhat know what we are doing here (which is basically
+		 * to do the same Apple does and pray they did not get it wrong :-)
+		 */
+		BUG_ON(!priv->pdev);
+		pci_write_config_byte(priv->pdev, PCI_CACHE_LINE_SIZE, 0x08);
+		pci_read_config_word(priv->pdev, PCI_COMMAND, &cmd);
+		pci_write_config_word(priv->pdev, PCI_COMMAND,
+				      cmd | PCI_COMMAND_INVALIDATE);
+
+		/* Tell the world about it */
+		ata_dev_printk(dev, KERN_INFO,
+			       "K2/Shasta alignment limits applied\n");
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int pata_macio_do_suspend(struct pata_macio_priv *priv, pm_message_t mesg)
+{
+	int rc;
+
+	/* First, core libata suspend to do most of the work */
+	rc = ata_host_suspend(priv->host, mesg);
+	if (rc)
+		return rc;
+
+	/* Restore to default timings */
+	pata_macio_default_timings(priv);
+
+	/* Mask interrupt. Not strictly necessary but old driver did
+	 * it and I'd rather not change that here */
+	disable_irq(priv->irq);
+
+	/* The media bay will handle itself just fine */
+	if (priv->mediabay)
+		return 0;
+
+	/* Kauai has bus control FCRs directly here */
+	if (priv->kauai_fcr) {
+		u32 fcr = readl(priv->kauai_fcr);
+		fcr &= ~(KAUAI_FCR_UATA_RESET_N | KAUAI_FCR_UATA_ENABLE);
+		writel(fcr, priv->kauai_fcr);
+	}
+
+	/* For PCI, save state and disable DMA. No need to call
+	 * pci_set_power_state(), the HW doesn't do D states that
+	 * way, the platform code will take care of suspending the
+	 * ASIC properly
+	 */
+	if (priv->pdev) {
+		pci_save_state(priv->pdev);
+		pci_disable_device(priv->pdev);
+	}
+
+	/* Disable the bus on older machines and the cell on kauai */
+	ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, priv->node,
+			    priv->aapl_bus_id, 0);
+
+	return 0;
+}
+
+static int pata_macio_do_resume(struct pata_macio_priv *priv)
+{
+	/* Reset and re-enable the HW */
+	pata_macio_reset_hw(priv, 1);
+
+	/* Sanitize drive timings */
+	pata_macio_apply_timings(priv->host->ports[0], 0);
+
+	/* We want our IRQ back ! */
+	enable_irq(priv->irq);
+
+	/* Let the libata core take it from there */
+	ata_host_resume(priv->host);
+
+	return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static struct scsi_host_template pata_macio_sht = {
+	ATA_BASE_SHT(DRV_NAME),
+	.sg_tablesize		= MAX_DCMDS,
+	/* We may not need that strict one */
+	.dma_boundary		= ATA_DMA_BOUNDARY,
+	.slave_configure	= pata_macio_slave_config,
+};
+
+static struct ata_port_operations pata_macio_ops = {
+	.inherits		= &ata_sff_port_ops,
+
+	.freeze			= pata_macio_freeze,
+	.set_piomode		= pata_macio_set_timings,
+	.set_dmamode		= pata_macio_set_timings,
+	.cable_detect		= pata_macio_cable_detect,
+	.sff_dev_select		= pata_macio_dev_select,
+	.qc_prep		= pata_macio_qc_prep,
+	.mode_filter		= ata_bmdma_mode_filter,
+	.bmdma_setup		= pata_macio_bmdma_setup,
+	.bmdma_start		= pata_macio_bmdma_start,
+	.bmdma_stop		= pata_macio_bmdma_stop,
+	.bmdma_status		= pata_macio_bmdma_status,
+	.port_start		= pata_macio_port_start,
+	.sff_irq_clear		= pata_macio_irq_clear,
+};
+
+static void __devinit pata_macio_invariants(struct pata_macio_priv *priv)
+{
+	const int *bidp;
+
+	/* Identify the type of controller */
+	if (of_device_is_compatible(priv->node, "shasta-ata")) {
+		priv->kind = controller_sh_ata6;
+	        priv->timings = pata_macio_shasta_timings;
+	} else if (of_device_is_compatible(priv->node, "kauai-ata")) {
+		priv->kind = controller_un_ata6;
+	        priv->timings = pata_macio_kauai_timings;
+	} else if (of_device_is_compatible(priv->node, "K2-UATA")) {
+		priv->kind = controller_k2_ata6;
+	        priv->timings = pata_macio_kauai_timings;
+	} else if (of_device_is_compatible(priv->node, "keylargo-ata")) {
+		if (strcmp(priv->node->name, "ata-4") == 0) {
+			priv->kind = controller_kl_ata4;
+			priv->timings = pata_macio_kl66_timings;
+		} else {
+			priv->kind = controller_kl_ata3;
+			priv->timings = pata_macio_kl33_timings;
+		}
+	} else if (of_device_is_compatible(priv->node, "heathrow-ata")) {
+		priv->kind = controller_heathrow;
+		priv->timings = pata_macio_heathrow_timings;
+	} else {
+		priv->kind = controller_ohare;
+		priv->timings = pata_macio_ohare_timings;
+	}
+
+	/* XXX FIXME --- setup priv->mediabay here */
+
+	/* Get Apple bus ID (for clock and ASIC control) */
+	bidp = of_get_property(priv->node, "AAPL,bus-id", NULL);
+	priv->aapl_bus_id =  bidp ? *bidp : 0;
+
+	/* Fixup missing Apple bus ID in case of media-bay */
+	if (priv->mediabay && bidp == 0)
+		priv->aapl_bus_id = 1;
+}
+
+static void __devinit pata_macio_setup_ios(struct ata_ioports *ioaddr,
+					   void __iomem * base,
+					   void __iomem * dma)
+{
+	/* cmd_addr is the base of regs for that port */
+	ioaddr->cmd_addr	= base;
+
+	/* taskfile registers */
+	ioaddr->data_addr	= base + (ATA_REG_DATA    << 4);
+	ioaddr->error_addr	= base + (ATA_REG_ERR     << 4);
+	ioaddr->feature_addr	= base + (ATA_REG_FEATURE << 4);
+	ioaddr->nsect_addr	= base + (ATA_REG_NSECT   << 4);
+	ioaddr->lbal_addr	= base + (ATA_REG_LBAL    << 4);
+	ioaddr->lbam_addr	= base + (ATA_REG_LBAM    << 4);
+	ioaddr->lbah_addr	= base + (ATA_REG_LBAH    << 4);
+	ioaddr->device_addr	= base + (ATA_REG_DEVICE  << 4);
+	ioaddr->status_addr	= base + (ATA_REG_STATUS  << 4);
+	ioaddr->command_addr	= base + (ATA_REG_CMD     << 4);
+	ioaddr->altstatus_addr	= base + 0x160;
+	ioaddr->ctl_addr	= base + 0x160;
+	ioaddr->bmdma_addr	= dma;
+}
+
+static void __devinit pmac_macio_calc_timing_masks(struct pata_macio_priv *priv,
+						   struct ata_port_info   *pinfo)
+{
+	int i = 0;
+
+	pinfo->pio_mask		= 0;
+	pinfo->mwdma_mask	= 0;
+	pinfo->udma_mask	= 0;
+
+	while (priv->timings[i].mode > 0) {
+		unsigned int mask = 1U << (priv->timings[i].mode & 0x0f);
+		switch(priv->timings[i].mode & 0xf0) {
+		case 0x00: /* PIO */
+			pinfo->pio_mask |= (mask >> 8);
+			break;
+		case 0x20: /* MWDMA */
+			pinfo->mwdma_mask |= mask;
+			break;
+		case 0x40: /* UDMA */
+			pinfo->udma_mask |= mask;
+			break;
+		}
+		i++;
+	}
+	dev_dbg(priv->dev, "Supported masks: PIO=%lx, MWDMA=%lx, UDMA=%lx\n",
+		pinfo->pio_mask, pinfo->mwdma_mask, pinfo->udma_mask);
+}
+
+static int __devinit pata_macio_common_init(struct pata_macio_priv	*priv,
+					    resource_size_t		tfregs,
+					    resource_size_t		dmaregs,
+					    resource_size_t		fcregs,
+					    unsigned long		irq)
+{
+	struct ata_port_info		pinfo;
+	const struct ata_port_info	*ppi[] = { &pinfo, NULL };
+	void __iomem			*dma_regs = NULL;
+
+	/* Fill up privates with various invariants collected from the
+	 * device-tree
+	 */
+	pata_macio_invariants(priv);
+
+	/* Make sure we have sane initial timings in the cache */
+	pata_macio_default_timings(priv);
+
+	/* Not sure what the real max is but we know it's less than 64K, let's
+	 * use 64K minus 256
+	 */
+	dma_set_max_seg_size(priv->dev, MAX_DBDMA_SEG);
+
+	/* Allocate libata host for 1 port */
+	memset(&pinfo, 0, sizeof(struct ata_port_info));
+	pmac_macio_calc_timing_masks(priv, &pinfo);
+	pinfo.flags		= ATA_FLAG_SLAVE_POSS | ATA_FLAG_MMIO |
+				  ATA_FLAG_NO_LEGACY;
+	pinfo.port_ops		= &pata_macio_ops;
+	pinfo.private_data	= priv;
+
+	priv->host = ata_host_alloc_pinfo(priv->dev, ppi, 1);
+	if (priv->host == NULL) {
+		dev_err(priv->dev, "Failed to allocate ATA port structure\n");
+		return -ENOMEM;
+	}
+
+	/* Setup the private data in host too */
+	priv->host->private_data = priv;
+
+	/* Map base registers */
+	priv->tfregs = devm_ioremap(priv->dev, tfregs, 0x100);
+	if (priv->tfregs == NULL) {
+		dev_err(priv->dev, "Failed to map ATA ports\n");
+		return -ENOMEM;
+	}
+	priv->host->iomap = &priv->tfregs;
+
+	/* Map DMA regs */
+	if (dmaregs != 0) {
+		dma_regs = devm_ioremap(priv->dev, dmaregs,
+					sizeof(struct dbdma_regs));
+		if (dma_regs == NULL)
+			dev_warn(priv->dev, "Failed to map ATA DMA registers\n");
+	}
+
+	/* If chip has local feature control, map those regs too */
+	if (fcregs != 0) {
+		priv->kauai_fcr = devm_ioremap(priv->dev, fcregs, 4);
+		if (priv->kauai_fcr == NULL) {
+			dev_err(priv->dev, "Failed to map ATA FCR register\n");
+			return -ENOMEM;
+		}
+	}
+
+	/* Setup port data structure */
+	pata_macio_setup_ios(&priv->host->ports[0]->ioaddr,
+			     priv->tfregs, dma_regs);
+	priv->host->ports[0]->private_data = priv;
+
+	/* hard-reset the controller */
+	pata_macio_reset_hw(priv, 0);
+	pata_macio_apply_timings(priv->host->ports[0], 0);
+
+	/* Enable bus master if necessary */
+	if (priv->pdev && dma_regs)
+		pci_set_master(priv->pdev);
+
+	dev_info(priv->dev, "Activating pata-macio chipset %s, Apple bus ID %d\n",
+		 macio_ata_names[priv->kind], priv->aapl_bus_id);
+
+	/* Start it up */
+	priv->irq = irq;
+	return ata_host_activate(priv->host, irq, ata_sff_interrupt, 0,
+				 &pata_macio_sht);
+}
+
+static int __devinit pata_macio_attach(struct macio_dev *mdev,
+				       const struct of_device_id *match)
+{
+	struct pata_macio_priv	*priv;
+	resource_size_t		tfregs, dmaregs = 0;
+	unsigned long		irq;
+	int			rc;
+
+	/* Check for broken device-trees */
+	if (macio_resource_count(mdev) == 0) {
+		dev_err(&mdev->ofdev.dev,
+			"No addresses for controller\n");
+		return -ENXIO;
+	}
+
+	/* Enable managed resources */
+	macio_enable_devres(mdev);
+
+	/* Allocate and init private data structure */
+	priv = devm_kzalloc(&mdev->ofdev.dev,
+			    sizeof(struct pata_macio_priv), GFP_KERNEL);
+	if (priv == NULL) {
+		dev_err(&mdev->ofdev.dev,
+			"Failed to allocate private memory\n");
+		return -ENOMEM;
+	}
+	priv->node = of_node_get(mdev->ofdev.node);
+	priv->mdev = mdev;
+	priv->dev = &mdev->ofdev.dev;
+
+	/* Request memory resource for taskfile registers */
+	if (macio_request_resource(mdev, 0, "pata-macio")) {
+		dev_err(&mdev->ofdev.dev,
+			"Cannot obtain taskfile resource\n");
+		return -EBUSY;
+	}
+	tfregs = macio_resource_start(mdev, 0);
+
+	/* Request resources for DMA registers if any */
+	if (macio_resource_count(mdev) >= 2) {
+		if (macio_request_resource(mdev, 1, "pata-macio-dma"))
+			dev_err(&mdev->ofdev.dev,
+				"Cannot obtain DMA resource\n");
+		else
+			dmaregs = macio_resource_start(mdev, 1);
+	}
+
+	/*
+	 * Fixup missing IRQ for some old implementations with broken
+	 * device-trees.
+	 *
+	 * This is a bit bogus, it should be fixed in the device-tree itself,
+	 * via the existing macio fixups, based on the type of interrupt
+	 * controller in the machine. However, I have no test HW for this case,
+	 * and this trick works well enough on those old machines...
+	 */
+	if (macio_irq_count(mdev) == 0) {
+		dev_warn(&mdev->ofdev.dev,
+			 "No interrupts for controller, using 13\n");
+		irq = irq_create_mapping(NULL, 13);
+	} else
+		irq = macio_irq(mdev, 0);
+
+	/* Prevvent media bay callbacks until fully registered */
+	lock_media_bay(priv->mdev->media_bay);
+
+	/* Get register addresses and call common initialization */
+	rc = pata_macio_common_init(priv,
+				    tfregs,		/* Taskfile regs */
+				    dmaregs,		/* DBDMA regs */
+				    0,			/* Feature control */
+				    irq);
+	unlock_media_bay(priv->mdev->media_bay);
+
+	return rc;
+}
+
+static int __devexit pata_macio_detach(struct macio_dev *mdev)
+{
+	struct ata_host *host = macio_get_drvdata(mdev);
+	struct pata_macio_priv *priv = host->private_data;
+
+	lock_media_bay(priv->mdev->media_bay);
+
+	/* Make sure the mediabay callback doesn't try to access
+	 * dead stuff
+	 */
+	priv->host->private_data = NULL;
+
+	ata_host_detach(host);
+
+	unlock_media_bay(priv->mdev->media_bay);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int pata_macio_suspend(struct macio_dev *mdev, pm_message_t mesg)
+{
+	struct ata_host *host = macio_get_drvdata(mdev);
+
+	return pata_macio_do_suspend(host->private_data, mesg);
+}
+
+static int pata_macio_resume(struct macio_dev *mdev)
+{
+	struct ata_host *host = macio_get_drvdata(mdev);
+
+	return pata_macio_do_resume(host->private_data);
+}
+
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_PMAC_MEDIABAY
+static void pata_macio_mb_event(struct macio_dev* mdev, int mb_state)
+{
+	struct ata_host *host = macio_get_drvdata(mdev);
+	struct ata_port *ap;
+	struct ata_eh_info *ehi;
+	struct ata_device *dev;
+	unsigned long flags;
+
+	if (!host || !host->private_data)
+		return;
+	ap = host->ports[0];
+	spin_lock_irqsave(ap->lock, flags);
+	ehi = &ap->link.eh_info;
+	if (mb_state == MB_CD) {
+		ata_ehi_push_desc(ehi, "mediabay plug");
+		ata_ehi_hotplugged(ehi);
+		ata_port_freeze(ap);
+	} else {
+		ata_ehi_push_desc(ehi, "mediabay unplug");
+		ata_for_each_dev(dev, &ap->link, ALL)
+			dev->flags |= ATA_DFLAG_DETACH;
+		ata_port_abort(ap);
+	}
+	spin_unlock_irqrestore(ap->lock, flags);
+
+}
+#endif /* CONFIG_PMAC_MEDIABAY */
+
+
+static int __devinit pata_macio_pci_attach(struct pci_dev *pdev,
+					   const struct pci_device_id *id)
+{
+	struct pata_macio_priv	*priv;
+	struct device_node	*np;
+	resource_size_t		rbase;
+
+	/* We cannot use a MacIO controller without its OF device node */
+	np = pci_device_to_OF_node(pdev);
+	if (np == NULL) {
+		dev_err(&pdev->dev,
+			"Cannot find OF device node for controller\n");
+		return -ENODEV;
+	}
+
+	/* Check that it can be enabled */
+	if (pcim_enable_device(pdev)) {
+		dev_err(&pdev->dev,
+			"Cannot enable controller PCI device\n");
+		return -ENXIO;
+	}
+
+	/* Allocate and init private data structure */
+	priv = devm_kzalloc(&pdev->dev,
+			    sizeof(struct pata_macio_priv), GFP_KERNEL);
+	if (priv == NULL) {
+		dev_err(&pdev->dev,
+			"Failed to allocate private memory\n");
+		return -ENOMEM;
+	}
+	priv->node = of_node_get(np);
+	priv->pdev = pdev;
+	priv->dev = &pdev->dev;
+
+	/* Get MMIO regions */
+	if (pci_request_regions(pdev, "pata-macio")) {
+		dev_err(&pdev->dev,
+			"Cannot obtain PCI resources\n");
+		return -EBUSY;
+	}
+
+	/* Get register addresses and call common initialization */
+	rbase = pci_resource_start(pdev, 0);
+	if (pata_macio_common_init(priv,
+				   rbase + 0x2000,	/* Taskfile regs */
+				   rbase + 0x1000,	/* DBDMA regs */
+				   rbase,		/* Feature control */
+				   pdev->irq))
+		return -ENXIO;
+
+	return 0;
+}
+
+static void __devexit pata_macio_pci_detach(struct pci_dev *pdev)
+{
+	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+
+	ata_host_detach(host);
+}
+
+#ifdef CONFIG_PM
+
+static int pata_macio_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
+{
+	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+
+	return pata_macio_do_suspend(host->private_data, mesg);
+}
+
+static int pata_macio_pci_resume(struct pci_dev *pdev)
+{
+	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+
+	return pata_macio_do_resume(host->private_data);
+}
+
+#endif /* CONFIG_PM */
+
+static struct of_device_id pata_macio_match[] =
+{
+	{
+	.name 		= "IDE",
+	},
+	{
+	.name 		= "ATA",
+	},
+	{
+	.type		= "ide",
+	},
+	{
+	.type		= "ata",
+	},
+	{},
+};
+
+static struct macio_driver pata_macio_driver =
+{
+	.name 		= "pata-macio",
+	.match_table	= pata_macio_match,
+	.probe		= pata_macio_attach,
+	.remove		= pata_macio_detach,
+#ifdef CONFIG_PM
+	.suspend	= pata_macio_suspend,
+	.resume		= pata_macio_resume,
+#endif
+#ifdef CONFIG_PMAC_MEDIABAY
+	.mediabay_event	= pata_macio_mb_event,
+#endif
+	.driver = {
+		.owner		= THIS_MODULE,
+	},
+};
+
+static const struct pci_device_id pata_macio_pci_match[] = {
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_UNI_N_ATA),	0 },
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_IPID_ATA100),	0 },
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_K2_ATA100),	0 },
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_SH_ATA),	0 },
+	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_IPID2_ATA),	0 },
+	{},
+};
+
+static struct pci_driver pata_macio_pci_driver = {
+	.name		= "pata-pci-macio",
+	.id_table	= pata_macio_pci_match,
+	.probe		= pata_macio_pci_attach,
+	.remove		= pata_macio_pci_detach,
+#ifdef CONFIG_PM
+	.suspend	= pata_macio_pci_suspend,
+	.resume		= pata_macio_pci_resume,
+#endif
+	.driver = {
+		.owner		= THIS_MODULE,
+	},
+};
+MODULE_DEVICE_TABLE(pci, pata_macio_pci_match);
+
+
+static int __init pata_macio_init(void)
+{
+	int rc;
+
+	if (!machine_is(powermac))
+		return -ENODEV;
+
+	rc = pci_register_driver(&pata_macio_pci_driver);
+	if (rc)
+		return rc;
+	rc = macio_register_driver(&pata_macio_driver);
+	if (rc) {
+		pci_unregister_driver(&pata_macio_pci_driver);
+		return rc;
+	}
+	return 0;
+}
+
+static void __exit pata_macio_exit(void)
+{
+	macio_unregister_driver(&pata_macio_driver);
+	pci_unregister_driver(&pata_macio_pci_driver);
+}
+
+module_init(pata_macio_init);
+module_exit(pata_macio_exit);
+
+MODULE_AUTHOR("Benjamin Herrenschmidt");
+MODULE_DESCRIPTION("Apple MacIO PATA driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index e62a4cc..27fd775 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -35,6 +35,7 @@
 	struct cpu *cpu = container_of(dev, struct cpu, sysdev);
 	ssize_t ret;
 
+	cpu_hotplug_driver_lock();
 	switch (buf[0]) {
 	case '0':
 		ret = cpu_down(cpu->sysdev.id);
@@ -49,6 +50,7 @@
 	default:
 		ret = -EINVAL;
 	}
+	cpu_hotplug_driver_unlock();
 
 	if (ret >= 0)
 		ret = count;
@@ -72,6 +74,38 @@
 	per_cpu(cpu_sys_devices, logical_cpu) = NULL;
 	return;
 }
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static ssize_t cpu_probe_store(struct class *class, const char *buf,
+			       size_t count)
+{
+	return arch_cpu_probe(buf, count);
+}
+
+static ssize_t cpu_release_store(struct class *class, const char *buf,
+				 size_t count)
+{
+	return arch_cpu_release(buf, count);
+}
+
+static CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+static CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store);
+
+int __init cpu_probe_release_init(void)
+{
+	int rc;
+
+	rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+			       &class_attr_probe.attr);
+	if (!rc)
+		rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+				       &class_attr_release.attr);
+
+	return rc;
+}
+device_initcall(cpu_probe_release_init);
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
 #else /* ... !CONFIG_HOTPLUG_CPU */
 static inline void register_cpu_control(struct cpu *cpu)
 {
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 6380ad8..59ca2b7 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -200,7 +200,7 @@
 	int	ejected;
 	wait_queue_head_t wait;
 	int	wanted;
-	struct device_node*	media_bay; /* NULL when not in bay */
+	struct macio_dev *mdev;
 	char	dbdma_cmd_space[5 * sizeof(struct dbdma_cmd)];
 };
 
@@ -303,14 +303,13 @@
 static void do_fd_request(struct request_queue * q)
 {
 	int i;
-	for(i=0;i<floppy_count;i++)
-	{
-#ifdef CONFIG_PMAC_MEDIABAY
-		if (floppy_states[i].media_bay &&
-			check_media_bay(floppy_states[i].media_bay, MB_FD))
+
+	for(i=0; i<floppy_count; i++) {
+		struct floppy_state *fs = &floppy_states[i];
+		if (fs->mdev->media_bay &&
+		    check_media_bay(fs->mdev->media_bay) != MB_FD)
 			continue;
-#endif /* CONFIG_PMAC_MEDIABAY */
-		start_request(&floppy_states[i]);
+		start_request(fs);
 	}
 }
 
@@ -849,10 +848,9 @@
 	if ((cmd & 0x80) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-#ifdef CONFIG_PMAC_MEDIABAY
-	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
+	if (fs->mdev->media_bay &&
+	    check_media_bay(fs->mdev->media_bay) != MB_FD)
 		return -ENXIO;
-#endif
 
 	switch (cmd) {
 	case FDEJECT:
@@ -876,10 +874,9 @@
 	int n, err = 0;
 
 	if (fs->ref_count == 0) {
-#ifdef CONFIG_PMAC_MEDIABAY
-		if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
+		if (fs->mdev->media_bay &&
+		    check_media_bay(fs->mdev->media_bay) != MB_FD)
 			return -ENXIO;
-#endif
 		out_8(&sw->setup, S_IBM_DRIVE | S_FCLK_DIV2);
 		out_8(&sw->control_bic, 0xff);
 		out_8(&sw->mode, 0x95);
@@ -963,10 +960,9 @@
 	struct swim3 __iomem *sw;
 	int ret, n;
 
-#ifdef CONFIG_PMAC_MEDIABAY
-	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
+	if (fs->mdev->media_bay &&
+	    check_media_bay(fs->mdev->media_bay) != MB_FD)
 		return -ENXIO;
-#endif
 
 	sw = fs->swim3;
 	grab_drive(fs, revalidating, 0);
@@ -1009,7 +1005,6 @@
 static int swim3_add_device(struct macio_dev *mdev, int index)
 {
 	struct device_node *swim = mdev->ofdev.node;
-	struct device_node *mediabay;
 	struct floppy_state *fs = &floppy_states[index];
 	int rc = -EBUSY;
 
@@ -1036,9 +1031,7 @@
 	}
 	dev_set_drvdata(&mdev->ofdev.dev, fs);
 
-	mediabay = (strcasecmp(swim->parent->type, "media-bay") == 0) ?
-		swim->parent : NULL;
-	if (mediabay == NULL)
+	if (mdev->media_bay == NULL)
 		pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 1);
 	
 	memset(fs, 0, sizeof(*fs));
@@ -1068,7 +1061,7 @@
 	fs->secpercyl = 36;
 	fs->secpertrack = 18;
 	fs->total_secs = 2880;
-	fs->media_bay = mediabay;
+	fs->mdev = mdev;
 	init_waitqueue_head(&fs->wait);
 
 	fs->dma_cmd = (struct dbdma_cmd *) DBDMA_ALIGN(fs->dbdma_cmd_space);
@@ -1093,7 +1086,7 @@
 	init_timer(&fs->timeout);
 
 	printk(KERN_INFO "fd%d: SWIM3 floppy controller %s\n", floppy_count,
-		mediabay ? "in media bay" : "");
+		mdev->media_bay ? "in media bay" : "");
 
 	return 0;
 
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 703959e..d89da4a 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -144,59 +144,13 @@
 	return 0;
 }
 
-static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start,
-				int type)
-{
-	int i, j, num_entries;
-	void *temp;
-	int mask_type;
-
-	temp = agp_bridge->current_size;
-	num_entries = A_SIZE_32(temp)->num_entries;
-
-	if (type != mem->type)
-		return -EINVAL;
-
-	mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type);
-	if (mask_type != 0) {
-		/* We know nothing of memory types */
-		return -EINVAL;
-	}
-
-	if ((pg_start + mem->page_count) > num_entries)
-		return -EINVAL;
-
-	j = pg_start;
-
-	while (j < (pg_start + mem->page_count)) {
-		if (agp_bridge->gatt_table[j])
-			return -EBUSY;
-		j++;
-	}
-
-	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
-		agp_bridge->gatt_table[j] =
-			cpu_to_le32((page_to_phys(mem->pages[i]) & 0xFFFFF000UL) | 0x1UL);
-		flush_dcache_range((unsigned long)__va(page_to_phys(mem->pages[i])),
-				   (unsigned long)__va(page_to_phys(mem->pages[i]))+0x1000);
-	}
-	(void)in_le32((volatile u32*)&agp_bridge->gatt_table[pg_start]);
-	mb();
-
-	uninorth_tlbflush(mem);
-	return 0;
-}
-
-static int u3_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
+static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 {
 	int i, num_entries;
 	void *temp;
 	u32 *gp;
 	int mask_type;
 
-	temp = agp_bridge->current_size;
-	num_entries = A_SIZE_32(temp)->num_entries;
-
 	if (type != mem->type)
 		return -EINVAL;
 
@@ -206,6 +160,12 @@
 		return -EINVAL;
 	}
 
+	if (mem->page_count == 0)
+		return 0;
+
+	temp = agp_bridge->current_size;
+	num_entries = A_SIZE_32(temp)->num_entries;
+
 	if ((pg_start + mem->page_count) > num_entries)
 		return -EINVAL;
 
@@ -213,14 +173,18 @@
 	for (i = 0; i < mem->page_count; ++i) {
 		if (gp[i]) {
 			dev_info(&agp_bridge->dev->dev,
-				 "u3_insert_memory: entry 0x%x occupied (%x)\n",
+				 "uninorth_insert_memory: entry 0x%x occupied (%x)\n",
 				 i, gp[i]);
 			return -EBUSY;
 		}
 	}
 
 	for (i = 0; i < mem->page_count; i++) {
-		gp[i] = (page_to_phys(mem->pages[i]) >> PAGE_SHIFT) | 0x80000000UL;
+		if (is_u3)
+			gp[i] = (page_to_phys(mem->pages[i]) >> PAGE_SHIFT) | 0x80000000UL;
+		else
+			gp[i] =	cpu_to_le32((page_to_phys(mem->pages[i]) & 0xFFFFF000UL) |
+					    0x1UL);
 		flush_dcache_range((unsigned long)__va(page_to_phys(mem->pages[i])),
 				   (unsigned long)__va(page_to_phys(mem->pages[i]))+0x1000);
 	}
@@ -230,14 +194,23 @@
 	return 0;
 }
 
-int u3_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
+int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type)
 {
 	size_t i;
 	u32 *gp;
+	int mask_type;
 
-	if (type != 0 || mem->type != 0)
+	if (type != mem->type)
+		return -EINVAL;
+
+	mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type);
+	if (mask_type != 0) {
 		/* We know nothing of memory types */
 		return -EINVAL;
+	}
+
+	if (mem->page_count == 0)
+		return 0;
 
 	gp = (u32 *) &agp_bridge->gatt_table[pg_start];
 	for (i = 0; i < mem->page_count; ++i)
@@ -536,7 +509,7 @@
 	.create_gatt_table	= uninorth_create_gatt_table,
 	.free_gatt_table	= uninorth_free_gatt_table,
 	.insert_memory		= uninorth_insert_memory,
-	.remove_memory		= agp_generic_remove_memory,
+	.remove_memory		= uninorth_remove_memory,
 	.alloc_by_type		= agp_generic_alloc_by_type,
 	.free_by_type		= agp_generic_free_by_type,
 	.agp_alloc_page		= agp_generic_alloc_page,
@@ -562,8 +535,8 @@
 	.agp_enable		= uninorth_agp_enable,
 	.create_gatt_table	= uninorth_create_gatt_table,
 	.free_gatt_table	= uninorth_free_gatt_table,
-	.insert_memory		= u3_insert_memory,
-	.remove_memory		= u3_remove_memory,
+	.insert_memory		= uninorth_insert_memory,
+	.remove_memory		= uninorth_remove_memory,
 	.alloc_by_type		= agp_generic_alloc_by_type,
 	.free_by_type		= agp_generic_free_by_type,
 	.agp_alloc_page		= agp_generic_alloc_page,
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index a632f25..416d342 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -832,6 +832,7 @@
 		tty_hangup(tty);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(hvc_remove);
 
 /* Driver initialization: called as soon as someone uses hvc_alloc(). */
 static int hvc_init(void)
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 97642a7a..7a4e788 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -43,10 +43,7 @@
 #include <asm/pmac_feature.h>
 #include <asm/sections.h>
 #include <asm/irq.h>
-
-#ifndef CONFIG_PPC64
 #include <asm/mediabay.h>
-#endif
 
 #define DRV_NAME "ide-pmac"
 
@@ -59,13 +56,14 @@
 	int				irq;
 	int				kind;
 	int				aapl_bus_id;
-	unsigned			mediabay : 1;
 	unsigned			broken_dma : 1;
 	unsigned			broken_dma_warn : 1;
 	struct device_node*		node;
 	struct macio_dev		*mdev;
 	u32				timings[4];
 	volatile u32 __iomem *		*kauai_fcr;
+	ide_hwif_t			*hwif;
+
 	/* Those fields are duplicating what is in hwif. We currently
 	 * can't use the hwif ones because of some assumptions that are
 	 * beeing done by the generic code about the kind of dma controller
@@ -854,6 +852,11 @@
 	pmif->timings[2] = pmif->timings[3] = value2;
 }
 
+static int on_media_bay(pmac_ide_hwif_t *pmif)
+{
+	return pmif->mdev && pmif->mdev->media_bay != NULL;
+}
+
 /* Suspend call back, should be called after the child devices
  * have actually been suspended
  */
@@ -866,7 +869,7 @@
 	disable_irq(pmif->irq);
 
 	/* The media bay will handle itself just fine */
-	if (pmif->mediabay)
+	if (on_media_bay(pmif))
 		return 0;
 	
 	/* Kauai has bus control FCRs directly here */
@@ -889,7 +892,7 @@
 static int pmac_ide_do_resume(pmac_ide_hwif_t *pmif)
 {
 	/* Hard reset & re-enable controller (do we really need to reset ? -BenH) */
-	if (!pmif->mediabay) {
+	if (!on_media_bay(pmif)) {
 		ppc_md.feature_call(PMAC_FTR_IDE_RESET, pmif->node, pmif->aapl_bus_id, 1);
 		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, pmif->node, pmif->aapl_bus_id, 1);
 		msleep(10);
@@ -950,13 +953,11 @@
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
 
-	if (pmif->mediabay) {
-#ifdef CONFIG_PMAC_MEDIABAY
-		if (check_media_bay_by_base(pmif->regbase, MB_CD) == 0) {
+	if (on_media_bay(pmif)) {
+		if (check_media_bay(pmif->mdev->media_bay) == MB_CD) {
 			drive->dev_flags &= ~IDE_DFLAG_NOPROBE;
 			return;
 		}
-#endif
 		drive->dev_flags |= IDE_DFLAG_NOPROBE;
 	}
 }
@@ -1072,26 +1073,23 @@
 		writel(KAUAI_FCR_UATA_MAGIC |
 		       KAUAI_FCR_UATA_RESET_N |
 		       KAUAI_FCR_UATA_ENABLE, pmif->kauai_fcr);
-
-	pmif->mediabay = 0;
 	
 	/* Make sure we have sane timings */
 	sanitize_timings(pmif);
 
-	host = ide_host_alloc(&d, hws, 1);
-	if (host == NULL)
-		return -ENOMEM;
-	hwif = host->ports[0];
+	/* If we are on a media bay, wait for it to settle and lock it */
+	if (pmif->mdev)
+		lock_media_bay(pmif->mdev->media_bay);
 
-#ifndef CONFIG_PPC64
-	/* XXX FIXME: Media bay stuff need re-organizing */
-	if (np->parent && np->parent->name
-	    && strcasecmp(np->parent->name, "media-bay") == 0) {
-#ifdef CONFIG_PMAC_MEDIABAY
-		media_bay_set_ide_infos(np->parent, pmif->regbase, pmif->irq,
-					hwif);
-#endif /* CONFIG_PMAC_MEDIABAY */
-		pmif->mediabay = 1;
+	host = ide_host_alloc(&d, hws, 1);
+	if (host == NULL) {
+		rc = -ENOMEM;
+		goto bail;
+	}
+	hwif = pmif->hwif = host->ports[0];
+
+	if (on_media_bay(pmif)) {
+		/* Fixup bus ID for media bay */
 		if (!bidp)
 			pmif->aapl_bus_id = 1;
 	} else if (pmif->kind == controller_ohare) {
@@ -1100,9 +1098,7 @@
 		 * units, I keep the old way
 		 */
 		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, np, 0, 1);
-	} else
-#endif
-	{
+	} else {
  		/* This is necessary to enable IDE when net-booting */
 		ppc_md.feature_call(PMAC_FTR_IDE_RESET, np, pmif->aapl_bus_id, 1);
 		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, np, pmif->aapl_bus_id, 1);
@@ -1112,17 +1108,21 @@
 	}
 
 	printk(KERN_INFO DRV_NAME ": Found Apple %s controller (%s), "
-			 "bus ID %d%s, irq %d\n", model_name[pmif->kind],
-			 pmif->mdev ? "macio" : "PCI", pmif->aapl_bus_id,
-			 pmif->mediabay ? " (mediabay)" : "", hw->irq);
+	       "bus ID %d%s, irq %d\n", model_name[pmif->kind],
+	       pmif->mdev ? "macio" : "PCI", pmif->aapl_bus_id,
+	       on_media_bay(pmif) ? " (mediabay)" : "", hw->irq);
 
 	rc = ide_host_register(host, &d, hws);
-	if (rc) {
-		ide_host_free(host);
-		return rc;
-	}
+	if (rc)
+		pmif->hwif = NULL;
 
-	return 0;
+	if (pmif->mdev)
+		unlock_media_bay(pmif->mdev->media_bay);
+
+ bail:
+	if (rc && host)
+		ide_host_free(host);
+	return rc;
 }
 
 static void __devinit pmac_ide_init_ports(struct ide_hw *hw, unsigned long base)
@@ -1362,6 +1362,25 @@
 	return rc;
 }
 
+#ifdef CONFIG_PMAC_MEDIABAY
+static void pmac_ide_macio_mb_event(struct macio_dev* mdev, int mb_state)
+{
+	pmac_ide_hwif_t *pmif =
+		(pmac_ide_hwif_t *)dev_get_drvdata(&mdev->ofdev.dev);
+
+	switch(mb_state) {
+	case MB_CD:
+		if (!pmif->hwif->present)
+			ide_port_scan(pmif->hwif);
+		break;
+	default:
+		if (pmif->hwif->present)
+			ide_port_unregister_devices(pmif->hwif);
+	}
+}
+#endif /* CONFIG_PMAC_MEDIABAY */
+
+
 static struct of_device_id pmac_ide_macio_match[] = 
 {
 	{
@@ -1386,6 +1405,9 @@
 	.probe		= pmac_ide_macio_attach,
 	.suspend	= pmac_ide_macio_suspend,
 	.resume		= pmac_ide_macio_resume,
+#ifdef CONFIG_PMAC_MEDIABAY
+	.mediabay_event	= pmac_ide_macio_mb_event,
+#endif
 };
 
 static const struct pci_device_id pmac_ide_pci_match[] = {
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 588a5b0..26a303a 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -379,6 +379,11 @@
 	dev->ofdev.dev.parent = parent;
 	dev->ofdev.dev.bus = &macio_bus_type;
 	dev->ofdev.dev.release = macio_release_dev;
+	dev->ofdev.dev.dma_parms = &dev->dma_parms;
+
+	/* Standard DMA paremeters */
+	dma_set_max_seg_size(&dev->ofdev.dev, 65536);
+	dma_set_seg_boundary(&dev->ofdev.dev, 0xffffffff);
 
 #ifdef CONFIG_PCI
 	/* Set the DMA ops to the ones from the PCI device, this could be
@@ -538,6 +543,42 @@
 	driver_unregister(&drv->driver);
 }
 
+/* Managed MacIO resources */
+struct macio_devres {
+	u32	res_mask;
+};
+
+static void maciom_release(struct device *gendev, void *res)
+{
+	struct macio_dev *dev = to_macio_device(gendev);
+	struct macio_devres *dr = res;
+	int i, max;
+
+	max = min(dev->n_resources, 32);
+	for (i = 0; i < max; i++) {
+		if (dr->res_mask & (1 << i))
+			macio_release_resource(dev, i);
+	}
+}
+
+int macio_enable_devres(struct macio_dev *dev)
+{
+	struct macio_devres *dr;
+
+	dr = devres_find(&dev->ofdev.dev, maciom_release, NULL, NULL);
+	if (!dr) {
+		dr = devres_alloc(maciom_release, sizeof(*dr), GFP_KERNEL);
+		if (!dr)
+			return -ENOMEM;
+	}
+	return devres_get(&dev->ofdev.dev, dr, NULL, NULL) != NULL;
+}
+
+static struct macio_devres * find_macio_dr(struct macio_dev *dev)
+{
+	return devres_find(&dev->ofdev.dev, maciom_release, NULL, NULL);
+}
+
 /**
  *	macio_request_resource - Request an MMIO resource
  * 	@dev: pointer to the device holding the resource
@@ -555,6 +596,8 @@
 int macio_request_resource(struct macio_dev *dev, int resource_no,
 			   const char *name)
 {
+	struct macio_devres *dr = find_macio_dr(dev);
+
 	if (macio_resource_len(dev, resource_no) == 0)
 		return 0;
 		
@@ -562,6 +605,9 @@
 				macio_resource_len(dev, resource_no),
 				name))
 		goto err_out;
+
+	if (dr && resource_no < 32)
+		dr->res_mask |= 1 << resource_no;
 	
 	return 0;
 
@@ -582,10 +628,14 @@
  */
 void macio_release_resource(struct macio_dev *dev, int resource_no)
 {
+	struct macio_devres *dr = find_macio_dr(dev);
+
 	if (macio_resource_len(dev, resource_no) == 0)
 		return;
 	release_mem_region(macio_resource_start(dev, resource_no),
 			   macio_resource_len(dev, resource_no));
+	if (dr && resource_no < 32)
+		dr->res_mask &= ~(1 << resource_no);
 }
 
 /**
@@ -744,3 +794,5 @@
 EXPORT_SYMBOL(macio_release_resource);
 EXPORT_SYMBOL(macio_request_resources);
 EXPORT_SYMBOL(macio_release_resources);
+EXPORT_SYMBOL(macio_enable_devres);
+
diff --git a/drivers/macintosh/mediabay.c b/drivers/macintosh/mediabay.c
index 029ad8c..08002b8 100644
--- a/drivers/macintosh/mediabay.c
+++ b/drivers/macintosh/mediabay.c
@@ -33,15 +33,6 @@
 #include <linux/adb.h>
 #include <linux/pmu.h>
 
-
-#define MB_DEBUG
-
-#ifdef MB_DEBUG
-#define MBDBG(fmt, arg...)	printk(KERN_INFO fmt , ## arg)
-#else
-#define MBDBG(fmt, arg...)	do { } while (0)
-#endif
-
 #define MB_FCR32(bay, r)	((bay)->base + ((r) >> 2))
 #define MB_FCR8(bay, r)		(((volatile u8 __iomem *)((bay)->base)) + (r))
 
@@ -76,28 +67,14 @@
 	int				index;
 	int				cached_gpio;
 	int				sleeping;
+	int				user_lock;
 	struct mutex			lock;
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-	ide_hwif_t			*cd_port;
-	void __iomem			*cd_base;
-	int				cd_irq;
-	int				cd_retry;
-#endif
-#if defined(CONFIG_BLK_DEV_IDE_PMAC)
-	int 				cd_index;
-#endif
 };
 
 #define MAX_BAYS	2
 
 static struct media_bay_info media_bays[MAX_BAYS];
-int media_bay_count = 0;
-
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-/* check the busy bit in the media-bay ide interface
-   (assumes the media-bay contains an ide device) */
-#define MB_IDE_READY(i)	((readb(media_bays[i].cd_base + 0x70) & 0x80) == 0)
-#endif
+static int media_bay_count = 0;
 
 /*
  * Wait that number of ms between each step in normal polling mode
@@ -130,21 +107,11 @@
 
 /*
  * Wait this many ticks after an IDE device (e.g. CD-ROM) is inserted
- * (or until the device is ready) before waiting for busy bit to disappear
+ * (or until the device is ready) before calling into the driver
  */
 #define MB_IDE_WAIT	1000
 
 /*
- * Timeout waiting for busy bit of an IDE device to go down
- */
-#define MB_IDE_TIMEOUT	5000
-
-/*
- * Max retries of the full power up/down sequence for an IDE device
- */
-#define MAX_CD_RETRIES	3
-
-/*
  * States of a media bay
  */
 enum {
@@ -153,7 +120,6 @@
 	mb_enabling_bay,	/* enable bits set, waiting MB_RESET_DELAY */
 	mb_resetting,		/* reset bit unset, waiting MB_SETUP_DELAY */
 	mb_ide_resetting,	/* IDE reset bit unser, waiting MB_IDE_WAIT */
-	mb_ide_waiting,		/* Waiting for BUSY bit to go away until MB_IDE_TIMEOUT */
 	mb_up,			/* Media bay full */
 	mb_powering_down	/* Powering down (avoid too fast down/up) */
 };
@@ -373,12 +339,12 @@
 	if (onoff) {
 		bay->ops->power(bay, 1);
 		bay->state = mb_powering_up;
-		MBDBG("mediabay%d: powering up\n", bay->index);
+		pr_debug("mediabay%d: powering up\n", bay->index);
 	} else { 
 		/* Make sure everything is powered down & disabled */
 		bay->ops->power(bay, 0);
 		bay->state = mb_powering_down;
-		MBDBG("mediabay%d: powering down\n", bay->index);
+		pr_debug("mediabay%d: powering down\n", bay->index);
 	}
 	bay->timer = msecs_to_jiffies(MB_POWER_DELAY);
 }
@@ -387,107 +353,118 @@
 {
 	int id = bay->ops->content(bay);
 
-	if (id == bay->last_value) {
-		if (id != bay->content_id) {
-			bay->value_count += msecs_to_jiffies(MB_POLL_DELAY);
-			if (bay->value_count >= msecs_to_jiffies(MB_STABLE_DELAY)) {
-				/* If the device type changes without going thru
-				 * "MB_NO", we force a pass by "MB_NO" to make sure
-				 * things are properly reset
-				 */
-				if ((id != MB_NO) && (bay->content_id != MB_NO)) {
-					id = MB_NO;
-					MBDBG("mediabay%d: forcing MB_NO\n", bay->index);
-				}
-				MBDBG("mediabay%d: switching to %d\n", bay->index, id);
-				set_mb_power(bay, id != MB_NO);
-				bay->content_id = id;
-				if (id == MB_NO) {
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-					bay->cd_retry = 0;
-#endif
-					printk(KERN_INFO "media bay %d is empty\n", bay->index);
-				}
-			}
-		}
-	} else {
+	static char *mb_content_types[] = {
+		"a floppy drive",
+		"a floppy drive",
+		"an unsuported audio device",
+		"an ATA device",
+		"an unsupported PCI device",
+		"an unknown device",
+	};
+
+	if (id != bay->last_value) {
 		bay->last_value = id;
 		bay->value_count = 0;
+		return;
+	}
+	if (id == bay->content_id)
+		return;
+
+	bay->value_count += msecs_to_jiffies(MB_POLL_DELAY);
+	if (bay->value_count >= msecs_to_jiffies(MB_STABLE_DELAY)) {
+		/* If the device type changes without going thru
+		 * "MB_NO", we force a pass by "MB_NO" to make sure
+		 * things are properly reset
+		 */
+		if ((id != MB_NO) && (bay->content_id != MB_NO)) {
+			id = MB_NO;
+			pr_debug("mediabay%d: forcing MB_NO\n", bay->index);
+		}
+		pr_debug("mediabay%d: switching to %d\n", bay->index, id);
+		set_mb_power(bay, id != MB_NO);
+		bay->content_id = id;
+		if (id >= MB_NO || id < 0)
+			printk(KERN_INFO "mediabay%d: Bay is now empty\n", bay->index);
+		else
+			printk(KERN_INFO "mediabay%d: Bay contains %s\n",
+			       bay->index, mb_content_types[id]);
 	}
 }
 
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-int check_media_bay(struct device_node *which_bay, int what)
+int check_media_bay(struct macio_dev *baydev)
 {
-	int	i;
+	struct media_bay_info* bay;
+	int id;
 
-	for (i=0; i<media_bay_count; i++)
-		if (media_bays[i].mdev && which_bay == media_bays[i].mdev->ofdev.node) {
-			if ((what == media_bays[i].content_id) && media_bays[i].state == mb_up)
-				return 0;
-			media_bays[i].cd_index = -1;
-			return -EINVAL;
-		}
-	return -ENODEV;
+	if (baydev == NULL)
+		return MB_NO;
+
+	/* This returns an instant snapshot, not locking, sine
+	 * we may be called with the bay lock held. The resulting
+	 * fuzzyness of the result if called at the wrong time is
+	 * not actually a huge deal
+	 */
+	bay = macio_get_drvdata(baydev);
+	if (bay == NULL)
+		return MB_NO;
+	id = bay->content_id;
+	if (bay->state != mb_up)
+		return MB_NO;
+	if (id == MB_FD1)
+		return MB_FD;
+	return id;
 }
-EXPORT_SYMBOL(check_media_bay);
+EXPORT_SYMBOL_GPL(check_media_bay);
 
-int check_media_bay_by_base(unsigned long base, int what)
+void lock_media_bay(struct macio_dev *baydev)
 {
-	int	i;
+	struct media_bay_info* bay;
 
-	for (i=0; i<media_bay_count; i++)
-		if (media_bays[i].mdev && base == (unsigned long) media_bays[i].cd_base) {
-			if ((what == media_bays[i].content_id) && media_bays[i].state == mb_up)
-				return 0;
-			media_bays[i].cd_index = -1;
-			return -EINVAL;
-		} 
-
-	return -ENODEV;
+	if (baydev == NULL)
+		return;
+	bay = macio_get_drvdata(baydev);
+	if (bay == NULL)
+		return;
+	mutex_lock(&bay->lock);
+	bay->user_lock = 1;
 }
-EXPORT_SYMBOL_GPL(check_media_bay_by_base);
+EXPORT_SYMBOL_GPL(lock_media_bay);
 
-int media_bay_set_ide_infos(struct device_node* which_bay, unsigned long base,
-			    int irq, ide_hwif_t *hwif)
+void unlock_media_bay(struct macio_dev *baydev)
 {
-	int	i;
+	struct media_bay_info* bay;
 
-	for (i=0; i<media_bay_count; i++) {
-		struct media_bay_info* bay = &media_bays[i];
-
-		if (bay->mdev && which_bay == bay->mdev->ofdev.node) {
-			int timeout = 5000, index = hwif->index;
-			
-			mutex_lock(&bay->lock);
-
-			bay->cd_port	= hwif;
- 			bay->cd_base	= (void __iomem *) base;
-			bay->cd_irq	= irq;
-
-			if ((MB_CD != bay->content_id) || bay->state != mb_up) {
-				mutex_unlock(&bay->lock);
-				return 0;
-			}
-			printk(KERN_DEBUG "Registered ide%d for media bay %d\n", index, i);
-			do {
-				if (MB_IDE_READY(i)) {
-					bay->cd_index	= index;
-					mutex_unlock(&bay->lock);
-					return 0;
-				}
-				mdelay(1);
-			} while(--timeout);
-			printk(KERN_DEBUG "Timeount waiting IDE in bay %d\n", i);
-			mutex_unlock(&bay->lock);
-			return -ENODEV;
-		}
+	if (baydev == NULL)
+		return;
+	bay = macio_get_drvdata(baydev);
+	if (bay == NULL)
+		return;
+	if (bay->user_lock) {
+		bay->user_lock = 0;
+		mutex_unlock(&bay->lock);
 	}
-
-	return -ENODEV;
 }
-EXPORT_SYMBOL_GPL(media_bay_set_ide_infos);
-#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+EXPORT_SYMBOL_GPL(unlock_media_bay);
+
+static int mb_broadcast_hotplug(struct device *dev, void *data)
+{
+	struct media_bay_info* bay = data;
+	struct macio_dev *mdev;
+	struct macio_driver *drv;
+	int state;
+
+	if (dev->bus != &macio_bus_type)
+		return 0;
+
+	state = bay->state == mb_up ? bay->content_id : MB_NO;
+	if (state == MB_FD1)
+		state = MB_FD;
+	mdev = to_macio_device(dev);
+	drv = to_macio_driver(dev->driver);
+	if (dev->driver && drv->mediabay_event)
+		drv->mediabay_event(mdev, state);
+	return 0;
+}
 
 static void media_bay_step(int i)
 {
@@ -497,8 +474,8 @@
 	if (bay->state != mb_powering_down)
 	    poll_media_bay(bay);
 
-	/* If timer expired or polling IDE busy, run state machine */
-	if ((bay->state != mb_ide_waiting) && (bay->timer != 0)) {
+	/* If timer expired run state machine */
+	if (bay->timer != 0) {
 		bay->timer -= msecs_to_jiffies(MB_POLL_DELAY);
 		if (bay->timer > 0)
 			return;
@@ -508,100 +485,50 @@
 	switch(bay->state) {
 	case mb_powering_up:
 	    	if (bay->ops->setup_bus(bay, bay->last_value) < 0) {
-			MBDBG("mediabay%d: device not supported (kind:%d)\n", i, bay->content_id);
+			pr_debug("mediabay%d: device not supported (kind:%d)\n",
+				 i, bay->content_id);
 	    		set_mb_power(bay, 0);
 	    		break;
 	    	}
 	    	bay->timer = msecs_to_jiffies(MB_RESET_DELAY);
 	    	bay->state = mb_enabling_bay;
-		MBDBG("mediabay%d: enabling (kind:%d)\n", i, bay->content_id);
+		pr_debug("mediabay%d: enabling (kind:%d)\n", i, bay->content_id);
 		break;
 	case mb_enabling_bay:
 		bay->ops->un_reset(bay);
 	    	bay->timer = msecs_to_jiffies(MB_SETUP_DELAY);
 	    	bay->state = mb_resetting;
-		MBDBG("mediabay%d: waiting reset (kind:%d)\n", i, bay->content_id);
+		pr_debug("mediabay%d: releasing bay reset (kind:%d)\n",
+			 i, bay->content_id);
 	    	break;
 	case mb_resetting:
 		if (bay->content_id != MB_CD) {
-			MBDBG("mediabay%d: bay is up (kind:%d)\n", i, bay->content_id);
+			pr_debug("mediabay%d: bay is up (kind:%d)\n", i,
+				 bay->content_id);
 			bay->state = mb_up;
+			device_for_each_child(&bay->mdev->ofdev.dev,
+					      bay, mb_broadcast_hotplug);
 			break;
 	    	}
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-		MBDBG("mediabay%d: waiting IDE reset (kind:%d)\n", i, bay->content_id);
+		pr_debug("mediabay%d: releasing ATA reset (kind:%d)\n",
+			 i, bay->content_id);
 		bay->ops->un_reset_ide(bay);
 	    	bay->timer = msecs_to_jiffies(MB_IDE_WAIT);
 	    	bay->state = mb_ide_resetting;
-#else
-		printk(KERN_DEBUG "media-bay %d is ide (not compiled in kernel)\n", i);
-		set_mb_power(bay, 0);
-#endif /* CONFIG_BLK_DEV_IDE_PMAC */
 	    	break;
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
+
 	case mb_ide_resetting:
-	    	bay->timer = msecs_to_jiffies(MB_IDE_TIMEOUT);
-	    	bay->state = mb_ide_waiting;
-		MBDBG("mediabay%d: waiting IDE ready (kind:%d)\n", i, bay->content_id);
+		pr_debug("mediabay%d: bay is up (kind:%d)\n", i, bay->content_id);
+		bay->state = mb_up;
+		device_for_each_child(&bay->mdev->ofdev.dev,
+				      bay, mb_broadcast_hotplug);
 	    	break;
-	case mb_ide_waiting:
-		if (bay->cd_base == NULL) {
-			bay->timer = 0;
-			bay->state = mb_up;
-			MBDBG("mediabay%d: up before IDE init\n", i);
-			break;
-		} else if (MB_IDE_READY(i)) {
-			bay->timer = 0;
-			bay->state = mb_up;
-			if (bay->cd_index < 0) {
-				printk("mediabay %d, registering IDE...\n", i);
-				pmu_suspend();
-				ide_port_scan(bay->cd_port);
-				if (bay->cd_port->present)
-					bay->cd_index = bay->cd_port->index;
-				pmu_resume();
-			}
-			if (bay->cd_index == -1) {
-				/* We eventually do a retry */
-				bay->cd_retry++;
-				printk("IDE register error\n");
-				set_mb_power(bay, 0);
-			} else {
-				printk(KERN_DEBUG "media-bay %d is ide%d\n", i, bay->cd_index);
-				MBDBG("mediabay %d IDE ready\n", i);
-			}
-			break;
-	    	} else if (bay->timer > 0)
-			bay->timer -= msecs_to_jiffies(MB_POLL_DELAY);
-	    	if (bay->timer <= 0) {
-			printk("\nIDE Timeout in bay %d !, IDE state is: 0x%02x\n",
-			       i, readb(bay->cd_base + 0x70));
-			MBDBG("mediabay%d: nIDE Timeout !\n", i);
-			set_mb_power(bay, 0);
-			bay->timer = 0;
-	    	}
-		break;
-#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+
 	case mb_powering_down:
 	    	bay->state = mb_empty;
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-    	        if (bay->cd_index >= 0) {
-			printk(KERN_DEBUG "Unregistering mb %d ide, index:%d\n", i,
-			       bay->cd_index);
-			ide_port_unregister_devices(bay->cd_port);
-			bay->cd_index = -1;
-		}
-	    	if (bay->cd_retry) {
-			if (bay->cd_retry > MAX_CD_RETRIES) {
-				/* Should add an error sound (sort of beep in dmasound) */
-				printk("\nmedia-bay %d, IDE device badly inserted or unrecognised\n", i);
-			} else {
-				/* Force a new power down/up sequence */
-				bay->content_id = MB_NO;
-			}
-	    	}
-#endif /* CONFIG_BLK_DEV_IDE_PMAC */
-		MBDBG("mediabay%d: end of power down\n", i);
+		device_for_each_child(&bay->mdev->ofdev.dev,
+				      bay, mb_broadcast_hotplug);
+		pr_debug("mediabay%d: end of power down\n", i);
 	    	break;
 	}
 }
@@ -676,11 +603,6 @@
 	bay->last_value = bay->ops->content(bay);
 	bay->value_count = msecs_to_jiffies(MB_STABLE_DELAY);
 	bay->state = mb_empty;
-	do {
-		msleep(MB_POLL_DELAY);
-		media_bay_step(i);
-	} while((bay->state != mb_empty) &&
-		(bay->state != mb_up));
 
 	/* Mark us ready by filling our mdev data */
 	macio_set_drvdata(mdev, bay);
@@ -725,7 +647,7 @@
 	       	set_mb_power(bay, 0);
 		msleep(MB_POWER_DELAY);
 	       	if (bay->ops->content(bay) != bay->content_id) {
-			printk("mediabay%d: content changed during sleep...\n", bay->index);
+			printk("mediabay%d: Content changed during sleep...\n", bay->index);
 			mutex_unlock(&bay->lock);
 	       		return 0;
 		}
@@ -733,9 +655,6 @@
 	       	bay->last_value = bay->content_id;
 	       	bay->value_count = msecs_to_jiffies(MB_STABLE_DELAY);
 	       	bay->timer = msecs_to_jiffies(MB_POWER_DELAY);
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-	       	bay->cd_retry = 0;
-#endif
 	       	do {
 			msleep(MB_POLL_DELAY);
 	       		media_bay_step(bay->index);
@@ -823,9 +742,6 @@
 	for (i=0; i<MAX_BAYS; i++) {
 		memset((char *)&media_bays[i], 0, sizeof(struct media_bay_info));
 		media_bays[i].content_id	= -1;
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-		media_bays[i].cd_index		= -1;
-#endif
 	}
 	if (!machine_is(powermac))
 		return 0;
diff --git a/drivers/macintosh/nvram.c b/drivers/macintosh/nvram.c
index b195d75..c876349 100644
--- a/drivers/macintosh/nvram.c
+++ b/drivers/macintosh/nvram.c
@@ -13,7 +13,6 @@
 #include <linux/fcntl.h>
 #include <linux/nvram.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <asm/nvram.h>
 
@@ -21,7 +20,6 @@
 
 static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
 {
-	lock_kernel();
 	switch (origin) {
 	case 1:
 		offset += file->f_pos;
@@ -30,12 +28,10 @@
 		offset += NVRAM_SIZE;
 		break;
 	}
-	if (offset < 0) {
-		unlock_kernel();
+	if (offset < 0)
 		return -EINVAL;
-	}
+
 	file->f_pos = offset;
-	unlock_kernel();
 	return file->f_pos;
 }
 
@@ -76,8 +72,7 @@
 	return p - buf;
 }
 
-static int nvram_ioctl(struct inode *inode, struct file *file,
-	unsigned int cmd, unsigned long arg)
+static long nvram_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	switch(cmd) {
 		case PMAC_NVRAM_GET_OFFSET:
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 556f0fe..5ff47ba 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -79,6 +79,7 @@
 	u8			limits[3];
 	int			last_speed[2];
 	int			last_var[2];
+	int			pwm_inv[2];
 };
 
 static enum {ADT7460, ADT7467} therm_type;
@@ -229,19 +230,23 @@
 	
 	if (speed >= 0) {
 		manual = read_reg(th, MANUAL_MODE[fan]);
+		manual &= ~INVERT_MASK;
 		write_reg(th, MANUAL_MODE[fan],
-			(manual|MANUAL_MASK) & (~INVERT_MASK));
+			manual | MANUAL_MASK | th->pwm_inv[fan]);
 		write_reg(th, FAN_SPD_SET[fan], speed);
 	} else {
 		/* back to automatic */
 		if(therm_type == ADT7460) {
 			manual = read_reg(th,
 				MANUAL_MODE[fan]) & (~MANUAL_MASK);
-
+			manual &= ~INVERT_MASK;
+			manual |= th->pwm_inv[fan];
 			write_reg(th,
 				MANUAL_MODE[fan], manual|REM_CONTROL[fan]);
 		} else {
 			manual = read_reg(th, MANUAL_MODE[fan]);
+			manual &= ~INVERT_MASK;
+			manual |= th->pwm_inv[fan];
 			write_reg(th, MANUAL_MODE[fan], manual&(~AUTO_MASK));
 		}
 	}
@@ -387,7 +392,7 @@
 	i2c_set_clientdata(client, th);
 	th->clt = client;
 
-	rc = read_reg(th, 0);
+	rc = read_reg(th, CONFIG_REG);
 	if (rc < 0) {
 		dev_err(&client->dev, "Thermostat failed to read config!\n");
 		kfree(th);
@@ -418,6 +423,10 @@
 
 	thermostat = th;
 
+	/* record invert bit status because fw can corrupt it after suspend */
+	th->pwm_inv[0] = read_reg(th, MANUAL_MODE[0]) & INVERT_MASK;
+	th->pwm_inv[1] = read_reg(th, MANUAL_MODE[1]) & INVERT_MASK;
+
 	/* be sure to really write fan speed the first time */
 	th->last_speed[0] = -2;
 	th->last_speed[1] = -2;
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 6f308a4..db379c3 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -36,6 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/pm.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/device.h>
@@ -186,17 +187,11 @@
 static void pmu_start(void);
 static irqreturn_t via_pmu_interrupt(int irq, void *arg);
 static irqreturn_t gpio1_interrupt(int irq, void *arg);
-static int proc_get_info(char *page, char **start, off_t off,
-			  int count, int *eof, void *data);
-static int proc_get_irqstats(char *page, char **start, off_t off,
-			  int count, int *eof, void *data);
+static const struct file_operations pmu_info_proc_fops;
+static const struct file_operations pmu_irqstats_proc_fops;
 static void pmu_pass_intr(unsigned char *data, int len);
-static int proc_get_batt(char *page, char **start, off_t off,
-			int count, int *eof, void *data);
-static int proc_read_options(char *page, char **start, off_t off,
-			int count, int *eof, void *data);
-static int proc_write_options(struct file *file, const char __user *buffer,
-			unsigned long count, void *data);
+static const struct file_operations pmu_battery_proc_fops;
+static const struct file_operations pmu_options_proc_fops;
 
 #ifdef CONFIG_ADB
 struct adb_driver via_pmu_driver = {
@@ -507,19 +502,15 @@
 		for (i=0; i<pmu_battery_count; i++) {
 			char title[16];
 			sprintf(title, "battery_%ld", i);
-			proc_pmu_batt[i] = create_proc_read_entry(title, 0, proc_pmu_root,
-						proc_get_batt, (void *)i);
+			proc_pmu_batt[i] = proc_create_data(title, 0, proc_pmu_root,
+					&pmu_battery_proc_fops, (void *)i);
 		}
 
-		proc_pmu_info = create_proc_read_entry("info", 0, proc_pmu_root,
-					proc_get_info, NULL);
-		proc_pmu_irqstats = create_proc_read_entry("interrupts", 0, proc_pmu_root,
-					proc_get_irqstats, NULL);
-		proc_pmu_options = create_proc_entry("options", 0600, proc_pmu_root);
-		if (proc_pmu_options) {
-			proc_pmu_options->read_proc = proc_read_options;
-			proc_pmu_options->write_proc = proc_write_options;
-		}
+		proc_pmu_info = proc_create("info", 0, proc_pmu_root, &pmu_info_proc_fops);
+		proc_pmu_irqstats = proc_create("interrupts", 0, proc_pmu_root,
+						&pmu_irqstats_proc_fops);
+		proc_pmu_options = proc_create("options", 0600, proc_pmu_root,
+						&pmu_options_proc_fops);
 	}
 	return 0;
 }
@@ -799,27 +790,33 @@
 			2, PMU_SMART_BATTERY_STATE, pmu_cur_battery+1);
 }
 
-static int
-proc_get_info(char *page, char **start, off_t off,
-		int count, int *eof, void *data)
+static int pmu_info_proc_show(struct seq_file *m, void *v)
 {
-	char* p = page;
-
-	p += sprintf(p, "PMU driver version     : %d\n", PMU_DRIVER_VERSION);
-	p += sprintf(p, "PMU firmware version   : %02x\n", pmu_version);
-	p += sprintf(p, "AC Power               : %d\n",
+	seq_printf(m, "PMU driver version     : %d\n", PMU_DRIVER_VERSION);
+	seq_printf(m, "PMU firmware version   : %02x\n", pmu_version);
+	seq_printf(m, "AC Power               : %d\n",
 		((pmu_power_flags & PMU_PWR_AC_PRESENT) != 0) || pmu_battery_count == 0);
-	p += sprintf(p, "Battery count          : %d\n", pmu_battery_count);
+	seq_printf(m, "Battery count          : %d\n", pmu_battery_count);
 
-	return p - page;
+	return 0;
 }
 
-static int
-proc_get_irqstats(char *page, char **start, off_t off,
-		  int count, int *eof, void *data)
+static int pmu_info_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pmu_info_proc_show, NULL);
+}
+
+static const struct file_operations pmu_info_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pmu_info_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int pmu_irqstats_proc_show(struct seq_file *m, void *v)
 {
 	int i;
-	char* p = page;
 	static const char *irq_names[] = {
 		"Total CB1 triggered events",
 		"Total GPIO1 triggered events",
@@ -835,60 +832,76 @@
         };
 
 	for (i=0; i<11; i++) {
-		p += sprintf(p, " %2u: %10u (%s)\n",
+		seq_printf(m, " %2u: %10u (%s)\n",
 			     i, pmu_irq_stats[i], irq_names[i]);
 	}
-	return p - page;
+	return 0;
 }
 
-static int
-proc_get_batt(char *page, char **start, off_t off,
-		int count, int *eof, void *data)
+static int pmu_irqstats_proc_open(struct inode *inode, struct file *file)
 {
-	long batnum = (long)data;
-	char *p = page;
+	return single_open(file, pmu_irqstats_proc_show, NULL);
+}
+
+static const struct file_operations pmu_irqstats_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pmu_irqstats_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int pmu_battery_proc_show(struct seq_file *m, void *v)
+{
+	long batnum = (long)m->private;
 	
-	p += sprintf(p, "\n");
-	p += sprintf(p, "flags      : %08x\n",
-		pmu_batteries[batnum].flags);
-	p += sprintf(p, "charge     : %d\n",
-		pmu_batteries[batnum].charge);
-	p += sprintf(p, "max_charge : %d\n",
-		pmu_batteries[batnum].max_charge);
-	p += sprintf(p, "current    : %d\n",
-		pmu_batteries[batnum].amperage);
-	p += sprintf(p, "voltage    : %d\n",
-		pmu_batteries[batnum].voltage);
-	p += sprintf(p, "time rem.  : %d\n",
-		pmu_batteries[batnum].time_remaining);
-
-	return p - page;
+	seq_putc(m, '\n');
+	seq_printf(m, "flags      : %08x\n", pmu_batteries[batnum].flags);
+	seq_printf(m, "charge     : %d\n", pmu_batteries[batnum].charge);
+	seq_printf(m, "max_charge : %d\n", pmu_batteries[batnum].max_charge);
+	seq_printf(m, "current    : %d\n", pmu_batteries[batnum].amperage);
+	seq_printf(m, "voltage    : %d\n", pmu_batteries[batnum].voltage);
+	seq_printf(m, "time rem.  : %d\n", pmu_batteries[batnum].time_remaining);
+	return 0;
 }
 
-static int
-proc_read_options(char *page, char **start, off_t off,
-			int count, int *eof, void *data)
+static int pmu_battery_proc_open(struct inode *inode, struct file *file)
 {
-	char *p = page;
+	return single_open(file, pmu_battery_proc_show, PDE(inode)->data);
+}
 
+static const struct file_operations pmu_battery_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pmu_battery_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int pmu_options_proc_show(struct seq_file *m, void *v)
+{
 #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32)
 	if (pmu_kind == PMU_KEYLARGO_BASED &&
 	    pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,0,-1) >= 0)
-		p += sprintf(p, "lid_wakeup=%d\n", option_lid_wakeup);
+		seq_printf(m, "lid_wakeup=%d\n", option_lid_wakeup);
 #endif
 	if (pmu_kind == PMU_KEYLARGO_BASED)
-		p += sprintf(p, "server_mode=%d\n", option_server_mode);
+		seq_printf(m, "server_mode=%d\n", option_server_mode);
 
-	return p - page;
+	return 0;
 }
-			
-static int
-proc_write_options(struct file *file, const char __user *buffer,
-			unsigned long count, void *data)
+
+static int pmu_options_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pmu_options_proc_show, NULL);
+}
+
+static ssize_t pmu_options_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
 {
 	char tmp[33];
 	char *label, *val;
-	unsigned long fcount = count;
+	size_t fcount = count;
 	
 	if (!count)
 		return -EINVAL;
@@ -927,6 +940,15 @@
 	return fcount;
 }
 
+static const struct file_operations pmu_options_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pmu_options_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= pmu_options_proc_write,
+};
+
 #ifdef CONFIG_ADB
 /* Send an ADB command */
 static int pmu_send_request(struct adb_request *req, int sync)
diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c
index 961fa0e..6c68b9e 100644
--- a/drivers/macintosh/windfarm_smu_controls.c
+++ b/drivers/macintosh/windfarm_smu_controls.c
@@ -202,6 +202,8 @@
 		fct->ctrl.name = "cpu-front-fan-1";
 	else if (!strcmp(l, "CPU A PUMP"))
 		fct->ctrl.name = "cpu-pump-0";
+	else if (!strcmp(l, "CPU B PUMP"))
+		fct->ctrl.name = "cpu-pump-1";
 	else if (!strcmp(l, "Slots Fan") || !strcmp(l, "Slots fan") ||
 		 !strcmp(l, "EXPANSION SLOTS INTAKE"))
 		fct->ctrl.name = "slots-fan";
diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c
index 0c44d56..0c7a63c1 100644
--- a/drivers/mmc/host/of_mmc_spi.c
+++ b/drivers/mmc/host/of_mmc_spi.c
@@ -22,6 +22,8 @@
 #include <linux/mmc/core.h>
 #include <linux/mmc/host.h>
 
+MODULE_LICENSE("GPL");
+
 enum {
 	CD_GPIO = 0,
 	WP_GPIO,
diff --git a/drivers/net/ehea/ehea_hcall.h b/drivers/net/ehea/ehea_hcall.h
deleted file mode 100644
index 8e7d1c3..0000000
--- a/drivers/net/ehea/ehea_hcall.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- *  linux/drivers/net/ehea/ehea_hcall.h
- *
- *  eHEA ethernet device driver for IBM eServer System p
- *
- *  (C) Copyright IBM Corp. 2006
- *
- *  Authors:
- *       Christoph Raisch <raisch@de.ibm.com>
- *       Jan-Bernd Themann <themann@de.ibm.com>
- *       Thomas Klein <tklein@de.ibm.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __EHEA_HCALL_H__
-#define __EHEA_HCALL_H__
-
-/**
- * This file contains HCALL defines that are to be included in the appropriate
- * kernel files later
- */
-
-#define H_ALLOC_HEA_RESOURCE   0x278
-#define H_MODIFY_HEA_QP        0x250
-#define H_QUERY_HEA_QP         0x254
-#define H_QUERY_HEA            0x258
-#define H_QUERY_HEA_PORT       0x25C
-#define H_MODIFY_HEA_PORT      0x260
-#define H_REG_BCMC             0x264
-#define H_DEREG_BCMC           0x268
-#define H_REGISTER_HEA_RPAGES  0x26C
-#define H_DISABLE_AND_GET_HEA  0x270
-#define H_GET_HEA_INFO         0x274
-#define H_ADD_CONN             0x284
-#define H_DEL_CONN             0x288
-
-#endif	/* __EHEA_HCALL_H__ */
diff --git a/drivers/net/ehea/ehea_phyp.h b/drivers/net/ehea/ehea_phyp.h
index f3628c8..2f8174c 100644
--- a/drivers/net/ehea/ehea_phyp.h
+++ b/drivers/net/ehea/ehea_phyp.h
@@ -33,7 +33,6 @@
 #include <asm/hvcall.h>
 #include "ehea.h"
 #include "ehea_hw.h"
-#include "ehea_hcall.h"
 
 /* Some abbreviations used here:
  *
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 298de0f..d58ade1 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -65,28 +65,6 @@
 	return 0;
 }
 
-static int of_platform_device_suspend(struct device *dev, pm_message_t state)
-{
-	struct of_device *of_dev = to_of_device(dev);
-	struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
-	int error = 0;
-
-	if (dev->driver && drv->suspend)
-		error = drv->suspend(of_dev, state);
-	return error;
-}
-
-static int of_platform_device_resume(struct device * dev)
-{
-	struct of_device *of_dev = to_of_device(dev);
-	struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
-	int error = 0;
-
-	if (dev->driver && drv->resume)
-		error = drv->resume(of_dev);
-	return error;
-}
-
 static void of_platform_device_shutdown(struct device *dev)
 {
 	struct of_device *of_dev = to_of_device(dev);
@@ -96,16 +74,313 @@
 		drv->shutdown(of_dev);
 }
 
+#ifdef CONFIG_PM_SLEEP
+
+static int of_platform_legacy_suspend(struct device *dev, pm_message_t mesg)
+{
+	struct of_device *of_dev = to_of_device(dev);
+	struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+	int ret = 0;
+
+	if (dev->driver && drv->suspend)
+		ret = drv->suspend(of_dev, mesg);
+	return ret;
+}
+
+static int of_platform_legacy_resume(struct device *dev)
+{
+	struct of_device *of_dev = to_of_device(dev);
+	struct of_platform_driver *drv = to_of_platform_driver(dev->driver);
+	int ret = 0;
+
+	if (dev->driver && drv->resume)
+		ret = drv->resume(of_dev);
+	return ret;
+}
+
+static int of_platform_pm_prepare(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (drv && drv->pm && drv->pm->prepare)
+		ret = drv->pm->prepare(dev);
+
+	return ret;
+}
+
+static void of_platform_pm_complete(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+
+	if (drv && drv->pm && drv->pm->complete)
+		drv->pm->complete(dev);
+}
+
+#ifdef CONFIG_SUSPEND
+
+static int of_platform_pm_suspend(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->suspend)
+			ret = drv->pm->suspend(dev);
+	} else {
+		ret = of_platform_legacy_suspend(dev, PMSG_SUSPEND);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_suspend_noirq(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->suspend_noirq)
+			ret = drv->pm->suspend_noirq(dev);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_resume(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->resume)
+			ret = drv->pm->resume(dev);
+	} else {
+		ret = of_platform_legacy_resume(dev);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_resume_noirq(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->resume_noirq)
+			ret = drv->pm->resume_noirq(dev);
+	}
+
+	return ret;
+}
+
+#else /* !CONFIG_SUSPEND */
+
+#define of_platform_pm_suspend		NULL
+#define of_platform_pm_resume		NULL
+#define of_platform_pm_suspend_noirq	NULL
+#define of_platform_pm_resume_noirq	NULL
+
+#endif /* !CONFIG_SUSPEND */
+
+#ifdef CONFIG_HIBERNATION
+
+static int of_platform_pm_freeze(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->freeze)
+			ret = drv->pm->freeze(dev);
+	} else {
+		ret = of_platform_legacy_suspend(dev, PMSG_FREEZE);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_freeze_noirq(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->freeze_noirq)
+			ret = drv->pm->freeze_noirq(dev);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_thaw(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->thaw)
+			ret = drv->pm->thaw(dev);
+	} else {
+		ret = of_platform_legacy_resume(dev);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_thaw_noirq(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->thaw_noirq)
+			ret = drv->pm->thaw_noirq(dev);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_poweroff(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->poweroff)
+			ret = drv->pm->poweroff(dev);
+	} else {
+		ret = of_platform_legacy_suspend(dev, PMSG_HIBERNATE);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_poweroff_noirq(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->poweroff_noirq)
+			ret = drv->pm->poweroff_noirq(dev);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_restore(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->restore)
+			ret = drv->pm->restore(dev);
+	} else {
+		ret = of_platform_legacy_resume(dev);
+	}
+
+	return ret;
+}
+
+static int of_platform_pm_restore_noirq(struct device *dev)
+{
+	struct device_driver *drv = dev->driver;
+	int ret = 0;
+
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
+		if (drv->pm->restore_noirq)
+			ret = drv->pm->restore_noirq(dev);
+	}
+
+	return ret;
+}
+
+#else /* !CONFIG_HIBERNATION */
+
+#define of_platform_pm_freeze		NULL
+#define of_platform_pm_thaw		NULL
+#define of_platform_pm_poweroff		NULL
+#define of_platform_pm_restore		NULL
+#define of_platform_pm_freeze_noirq	NULL
+#define of_platform_pm_thaw_noirq		NULL
+#define of_platform_pm_poweroff_noirq	NULL
+#define of_platform_pm_restore_noirq	NULL
+
+#endif /* !CONFIG_HIBERNATION */
+
+static struct dev_pm_ops of_platform_dev_pm_ops = {
+	.prepare = of_platform_pm_prepare,
+	.complete = of_platform_pm_complete,
+	.suspend = of_platform_pm_suspend,
+	.resume = of_platform_pm_resume,
+	.freeze = of_platform_pm_freeze,
+	.thaw = of_platform_pm_thaw,
+	.poweroff = of_platform_pm_poweroff,
+	.restore = of_platform_pm_restore,
+	.suspend_noirq = of_platform_pm_suspend_noirq,
+	.resume_noirq = of_platform_pm_resume_noirq,
+	.freeze_noirq = of_platform_pm_freeze_noirq,
+	.thaw_noirq = of_platform_pm_thaw_noirq,
+	.poweroff_noirq = of_platform_pm_poweroff_noirq,
+	.restore_noirq = of_platform_pm_restore_noirq,
+};
+
+#define OF_PLATFORM_PM_OPS_PTR	(&of_platform_dev_pm_ops)
+
+#else /* !CONFIG_PM_SLEEP */
+
+#define OF_PLATFORM_PM_OPS_PTR	NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
 int of_bus_type_init(struct bus_type *bus, const char *name)
 {
 	bus->name = name;
 	bus->match = of_platform_bus_match;
 	bus->probe = of_platform_device_probe;
 	bus->remove = of_platform_device_remove;
-	bus->suspend = of_platform_device_suspend;
-	bus->resume = of_platform_device_resume;
 	bus->shutdown = of_platform_device_shutdown;
 	bus->dev_attrs = of_platform_device_attrs;
+	bus->pm = OF_PLATFORM_PM_OPS_PTR;
 	return bus_register(bus);
 }
 
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 4b6f7cb..28fce65 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -133,6 +133,14 @@
 	  which interfaces to an LM70 temperature sensor using
 	  a parallel port.
 
+config SPI_MPC52xx
+	tristate "Freescale MPC52xx SPI (non-PSC) controller support"
+	depends on PPC_MPC52xx && SPI
+	select SPI_MASTER_OF
+	help
+	  This drivers supports the MPC52xx SPI controller in master SPI
+	  mode.
+
 config SPI_MPC52xx_PSC
 	tristate "Freescale MPC52xx PSC SPI controller"
 	depends on PPC_MPC52xx && EXPERIMENTAL
@@ -147,9 +155,6 @@
 	  This enables using the Freescale MPC8xxx SPI controllers in master
 	  mode.
 
-	  This driver uses a simple set of shift registers for data (opposed
-	  to the CPM based descriptor model).
-
 config SPI_OMAP_UWIRE
 	tristate "OMAP1 MicroWire"
 	depends on ARCH_OMAP1
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 21a1182..e3f092a 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -25,6 +25,7 @@
 obj-$(CONFIG_SPI_ORION)			+= orion_spi.o
 obj-$(CONFIG_SPI_PL022)			+= amba-pl022.o
 obj-$(CONFIG_SPI_MPC52xx_PSC)		+= mpc52xx_psc_spi.o
+obj-$(CONFIG_SPI_MPC52xx)		+= mpc52xx_spi.o
 obj-$(CONFIG_SPI_MPC8xxx)		+= spi_mpc8xxx.o
 obj-$(CONFIG_SPI_PPC4xx)		+= spi_ppc4xx.o
 obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index 1b74d5c..f50c81d 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/of_platform.h>
+#include <linux/of_spi.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
 #include <linux/io.h>
@@ -313,11 +314,13 @@
 	struct mpc52xx_psc __iomem *psc = mps->psc;
 	struct mpc52xx_psc_fifo __iomem *fifo = mps->fifo;
 	u32 mclken_div;
-	int ret = 0;
+	int ret;
 
 	/* default sysclk is 512MHz */
 	mclken_div = (mps->sysclk ? mps->sysclk : 512000000) / MCLK;
-	mpc52xx_set_psc_clkdiv(psc_id, mclken_div);
+	ret = mpc52xx_set_psc_clkdiv(psc_id, mclken_div);
+	if (ret)
+		return ret;
 
 	/* Reset the PSC into a known state */
 	out_8(&psc->command, MPC52xx_PSC_RST_RX);
@@ -341,7 +344,7 @@
 
 	mps->bits_per_word = 8;
 
-	return ret;
+	return 0;
 }
 
 static irqreturn_t mpc52xx_psc_spi_isr(int irq, void *dev_id)
@@ -410,8 +413,10 @@
 		goto free_master;
 
 	ret = mpc52xx_psc_spi_port_config(master->bus_num, mps);
-	if (ret < 0)
+	if (ret < 0) {
+		dev_err(dev, "can't configure PSC! Is it capable of SPI?\n");
 		goto free_irq;
+	}
 
 	spin_lock_init(&mps->lock);
 	init_completion(&mps->done);
@@ -464,10 +469,11 @@
 	const u32 *regaddr_p;
 	u64 regaddr64, size64;
 	s16 id = -1;
+	int rc;
 
 	regaddr_p = of_get_address(op->node, 0, &size64, NULL);
 	if (!regaddr_p) {
-		printk(KERN_ERR "Invalid PSC address\n");
+		dev_err(&op->dev, "Invalid PSC address\n");
 		return -EINVAL;
 	}
 	regaddr64 = of_translate_address(op->node, regaddr_p);
@@ -478,15 +484,18 @@
 
 		psc_nump = of_get_property(op->node, "cell-index", NULL);
 		if (!psc_nump || *psc_nump > 5) {
-			printk(KERN_ERR "mpc52xx_psc_spi: Device node %s has invalid "
-					"cell-index property\n", op->node->full_name);
+			dev_err(&op->dev, "Invalid cell-index property\n");
 			return -EINVAL;
 		}
 		id = *psc_nump + 1;
 	}
 
-	return mpc52xx_psc_spi_do_probe(&op->dev, (u32)regaddr64, (u32)size64,
+	rc = mpc52xx_psc_spi_do_probe(&op->dev, (u32)regaddr64, (u32)size64,
 					irq_of_parse_and_map(op->node, 0), id);
+	if (rc == 0)
+		of_register_spi_devices(dev_get_drvdata(&op->dev), op->node);
+
+	return rc;
 }
 
 static int __exit mpc52xx_psc_spi_of_remove(struct of_device *op)
diff --git a/drivers/spi/mpc52xx_spi.c b/drivers/spi/mpc52xx_spi.c
new file mode 100644
index 0000000..ef8379b
--- /dev/null
+++ b/drivers/spi/mpc52xx_spi.c
@@ -0,0 +1,520 @@
+/*
+ * MPC52xx SPI bus driver.
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * This file is released under the GPLv2
+ *
+ * This is the driver for the MPC5200's dedicated SPI controller.
+ *
+ * Note: this driver does not support the MPC5200 PSC in SPI mode.  For
+ * that driver see drivers/spi/mpc52xx_psc_spi.c
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mpc52xx_spi.h>
+#include <linux/of_spi.h>
+#include <linux/io.h>
+#include <asm/time.h>
+#include <asm/mpc52xx.h>
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("MPC52xx SPI (non-PSC) Driver");
+MODULE_LICENSE("GPL");
+
+/* Register offsets */
+#define SPI_CTRL1	0x00
+#define SPI_CTRL1_SPIE		(1 << 7)
+#define SPI_CTRL1_SPE		(1 << 6)
+#define SPI_CTRL1_MSTR		(1 << 4)
+#define SPI_CTRL1_CPOL		(1 << 3)
+#define SPI_CTRL1_CPHA		(1 << 2)
+#define SPI_CTRL1_SSOE		(1 << 1)
+#define SPI_CTRL1_LSBFE		(1 << 0)
+
+#define SPI_CTRL2	0x01
+#define SPI_BRR		0x04
+
+#define SPI_STATUS	0x05
+#define SPI_STATUS_SPIF		(1 << 7)
+#define SPI_STATUS_WCOL		(1 << 6)
+#define SPI_STATUS_MODF		(1 << 4)
+
+#define SPI_DATA	0x09
+#define SPI_PORTDATA	0x0d
+#define SPI_DATADIR	0x10
+
+/* FSM state return values */
+#define FSM_STOP	0	/* Nothing more for the state machine to */
+				/* do.  If something interesting happens */
+				/* then and IRQ will be received */
+#define FSM_POLL	1	/* need to poll for completion, an IRQ is */
+				/* not expected */
+#define FSM_CONTINUE	2	/* Keep iterating the state machine */
+
+/* Driver internal data */
+struct mpc52xx_spi {
+	struct spi_master *master;
+	u32 sysclk;
+	void __iomem *regs;
+	int irq0;	/* MODF irq */
+	int irq1;	/* SPIF irq */
+	int ipb_freq;
+
+	/* Statistics */
+	int msg_count;
+	int wcol_count;
+	int wcol_ticks;
+	u32 wcol_tx_timestamp;
+	int modf_count;
+	int byte_count;
+
+	struct list_head queue;		/* queue of pending messages */
+	spinlock_t lock;
+	struct work_struct work;
+
+
+	/* Details of current transfer (length, and buffer pointers) */
+	struct spi_message *message;	/* current message */
+	struct spi_transfer *transfer;	/* current transfer */
+	int (*state)(int irq, struct mpc52xx_spi *ms, u8 status, u8 data);
+	int len;
+	int timestamp;
+	u8 *rx_buf;
+	const u8 *tx_buf;
+	int cs_change;
+};
+
+/*
+ * CS control function
+ */
+static void mpc52xx_spi_chipsel(struct mpc52xx_spi *ms, int value)
+{
+	out_8(ms->regs + SPI_PORTDATA, value ? 0 : 0x08);
+}
+
+/*
+ * Start a new transfer.  This is called both by the idle state
+ * for the first transfer in a message, and by the wait state when the
+ * previous transfer in a message is complete.
+ */
+static void mpc52xx_spi_start_transfer(struct mpc52xx_spi *ms)
+{
+	ms->rx_buf = ms->transfer->rx_buf;
+	ms->tx_buf = ms->transfer->tx_buf;
+	ms->len = ms->transfer->len;
+
+	/* Activate the chip select */
+	if (ms->cs_change)
+		mpc52xx_spi_chipsel(ms, 1);
+	ms->cs_change = ms->transfer->cs_change;
+
+	/* Write out the first byte */
+	ms->wcol_tx_timestamp = get_tbl();
+	if (ms->tx_buf)
+		out_8(ms->regs + SPI_DATA, *ms->tx_buf++);
+	else
+		out_8(ms->regs + SPI_DATA, 0);
+}
+
+/* Forward declaration of state handlers */
+static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms,
+					 u8 status, u8 data);
+static int mpc52xx_spi_fsmstate_wait(int irq, struct mpc52xx_spi *ms,
+				     u8 status, u8 data);
+
+/*
+ * IDLE state
+ *
+ * No transfers are in progress; if another transfer is pending then retrieve
+ * it and kick it off.  Otherwise, stop processing the state machine
+ */
+static int
+mpc52xx_spi_fsmstate_idle(int irq, struct mpc52xx_spi *ms, u8 status, u8 data)
+{
+	struct spi_device *spi;
+	int spr, sppr;
+	u8 ctrl1;
+
+	if (status && (irq != NO_IRQ))
+		dev_err(&ms->master->dev, "spurious irq, status=0x%.2x\n",
+			status);
+
+	/* Check if there is another transfer waiting. */
+	if (list_empty(&ms->queue))
+		return FSM_STOP;
+
+	/* get the head of the queue */
+	ms->message = list_first_entry(&ms->queue, struct spi_message, queue);
+	list_del_init(&ms->message->queue);
+
+	/* Setup the controller parameters */
+	ctrl1 = SPI_CTRL1_SPIE | SPI_CTRL1_SPE | SPI_CTRL1_MSTR;
+	spi = ms->message->spi;
+	if (spi->mode & SPI_CPHA)
+		ctrl1 |= SPI_CTRL1_CPHA;
+	if (spi->mode & SPI_CPOL)
+		ctrl1 |= SPI_CTRL1_CPOL;
+	if (spi->mode & SPI_LSB_FIRST)
+		ctrl1 |= SPI_CTRL1_LSBFE;
+	out_8(ms->regs + SPI_CTRL1, ctrl1);
+
+	/* Setup the controller speed */
+	/* minimum divider is '2'.  Also, add '1' to force rounding the
+	 * divider up. */
+	sppr = ((ms->ipb_freq / ms->message->spi->max_speed_hz) + 1) >> 1;
+	spr = 0;
+	if (sppr < 1)
+		sppr = 1;
+	while (((sppr - 1) & ~0x7) != 0) {
+		sppr = (sppr + 1) >> 1; /* add '1' to force rounding up */
+		spr++;
+	}
+	sppr--;		/* sppr quantity in register is offset by 1 */
+	if (spr > 7) {
+		/* Don't overrun limits of SPI baudrate register */
+		spr = 7;
+		sppr = 7;
+	}
+	out_8(ms->regs + SPI_BRR, sppr << 4 | spr); /* Set speed */
+
+	ms->cs_change = 1;
+	ms->transfer = container_of(ms->message->transfers.next,
+				    struct spi_transfer, transfer_list);
+
+	mpc52xx_spi_start_transfer(ms);
+	ms->state = mpc52xx_spi_fsmstate_transfer;
+
+	return FSM_CONTINUE;
+}
+
+/*
+ * TRANSFER state
+ *
+ * In the middle of a transfer.  If the SPI core has completed processing
+ * a byte, then read out the received data and write out the next byte
+ * (unless this transfer is finished; in which case go on to the wait
+ * state)
+ */
+static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms,
+					 u8 status, u8 data)
+{
+	if (!status)
+		return ms->irq0 ? FSM_STOP : FSM_POLL;
+
+	if (status & SPI_STATUS_WCOL) {
+		/* The SPI controller is stoopid.  At slower speeds, it may
+		 * raise the SPIF flag before the state machine is actually
+		 * finished, which causes a collision (internal to the state
+		 * machine only).  The manual recommends inserting a delay
+		 * between receiving the interrupt and sending the next byte,
+		 * but it can also be worked around simply by retrying the
+		 * transfer which is what we do here. */
+		ms->wcol_count++;
+		ms->wcol_ticks += get_tbl() - ms->wcol_tx_timestamp;
+		ms->wcol_tx_timestamp = get_tbl();
+		data = 0;
+		if (ms->tx_buf)
+			data = *(ms->tx_buf-1);
+		out_8(ms->regs + SPI_DATA, data); /* try again */
+		return FSM_CONTINUE;
+	} else if (status & SPI_STATUS_MODF) {
+		ms->modf_count++;
+		dev_err(&ms->master->dev, "mode fault\n");
+		mpc52xx_spi_chipsel(ms, 0);
+		ms->message->status = -EIO;
+		ms->message->complete(ms->message->context);
+		ms->state = mpc52xx_spi_fsmstate_idle;
+		return FSM_CONTINUE;
+	}
+
+	/* Read data out of the spi device */
+	ms->byte_count++;
+	if (ms->rx_buf)
+		*ms->rx_buf++ = data;
+
+	/* Is the transfer complete? */
+	ms->len--;
+	if (ms->len == 0) {
+		ms->timestamp = get_tbl();
+		ms->timestamp += ms->transfer->delay_usecs * tb_ticks_per_usec;
+		ms->state = mpc52xx_spi_fsmstate_wait;
+		return FSM_CONTINUE;
+	}
+
+	/* Write out the next byte */
+	ms->wcol_tx_timestamp = get_tbl();
+	if (ms->tx_buf)
+		out_8(ms->regs + SPI_DATA, *ms->tx_buf++);
+	else
+		out_8(ms->regs + SPI_DATA, 0);
+
+	return FSM_CONTINUE;
+}
+
+/*
+ * WAIT state
+ *
+ * A transfer has completed; need to wait for the delay period to complete
+ * before starting the next transfer
+ */
+static int
+mpc52xx_spi_fsmstate_wait(int irq, struct mpc52xx_spi *ms, u8 status, u8 data)
+{
+	if (status && irq)
+		dev_err(&ms->master->dev, "spurious irq, status=0x%.2x\n",
+			status);
+
+	if (((int)get_tbl()) - ms->timestamp < 0)
+		return FSM_POLL;
+
+	ms->message->actual_length += ms->transfer->len;
+
+	/* Check if there is another transfer in this message.  If there
+	 * aren't then deactivate CS, notify sender, and drop back to idle
+	 * to start the next message. */
+	if (ms->transfer->transfer_list.next == &ms->message->transfers) {
+		ms->msg_count++;
+		mpc52xx_spi_chipsel(ms, 0);
+		ms->message->status = 0;
+		ms->message->complete(ms->message->context);
+		ms->state = mpc52xx_spi_fsmstate_idle;
+		return FSM_CONTINUE;
+	}
+
+	/* There is another transfer; kick it off */
+
+	if (ms->cs_change)
+		mpc52xx_spi_chipsel(ms, 0);
+
+	ms->transfer = container_of(ms->transfer->transfer_list.next,
+				    struct spi_transfer, transfer_list);
+	mpc52xx_spi_start_transfer(ms);
+	ms->state = mpc52xx_spi_fsmstate_transfer;
+	return FSM_CONTINUE;
+}
+
+/**
+ * mpc52xx_spi_fsm_process - Finite State Machine iteration function
+ * @irq: irq number that triggered the FSM or 0 for polling
+ * @ms: pointer to mpc52xx_spi driver data
+ */
+static void mpc52xx_spi_fsm_process(int irq, struct mpc52xx_spi *ms)
+{
+	int rc = FSM_CONTINUE;
+	u8 status, data;
+
+	while (rc == FSM_CONTINUE) {
+		/* Interrupt cleared by read of STATUS followed by
+		 * read of DATA registers */
+		status = in_8(ms->regs + SPI_STATUS);
+		data = in_8(ms->regs + SPI_DATA);
+		rc = ms->state(irq, ms, status, data);
+	}
+
+	if (rc == FSM_POLL)
+		schedule_work(&ms->work);
+}
+
+/**
+ * mpc52xx_spi_irq - IRQ handler
+ */
+static irqreturn_t mpc52xx_spi_irq(int irq, void *_ms)
+{
+	struct mpc52xx_spi *ms = _ms;
+	spin_lock(&ms->lock);
+	mpc52xx_spi_fsm_process(irq, ms);
+	spin_unlock(&ms->lock);
+	return IRQ_HANDLED;
+}
+
+/**
+ * mpc52xx_spi_wq - Workqueue function for polling the state machine
+ */
+static void mpc52xx_spi_wq(struct work_struct *work)
+{
+	struct mpc52xx_spi *ms = container_of(work, struct mpc52xx_spi, work);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ms->lock, flags);
+	mpc52xx_spi_fsm_process(0, ms);
+	spin_unlock_irqrestore(&ms->lock, flags);
+}
+
+/*
+ * spi_master ops
+ */
+
+static int mpc52xx_spi_setup(struct spi_device *spi)
+{
+	if (spi->bits_per_word % 8)
+		return -EINVAL;
+
+	if (spi->mode & ~(SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST))
+		return -EINVAL;
+
+	if (spi->chip_select >= spi->master->num_chipselect)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int mpc52xx_spi_transfer(struct spi_device *spi, struct spi_message *m)
+{
+	struct mpc52xx_spi *ms = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	m->actual_length = 0;
+	m->status = -EINPROGRESS;
+
+	spin_lock_irqsave(&ms->lock, flags);
+	list_add_tail(&m->queue, &ms->queue);
+	spin_unlock_irqrestore(&ms->lock, flags);
+	schedule_work(&ms->work);
+
+	return 0;
+}
+
+/*
+ * OF Platform Bus Binding
+ */
+static int __devinit mpc52xx_spi_probe(struct of_device *op,
+				       const struct of_device_id *match)
+{
+	struct spi_master *master;
+	struct mpc52xx_spi *ms;
+	void __iomem *regs;
+	int rc;
+
+	/* MMIO registers */
+	dev_dbg(&op->dev, "probing mpc5200 SPI device\n");
+	regs = of_iomap(op->node, 0);
+	if (!regs)
+		return -ENODEV;
+
+	/* initialize the device */
+	out_8(regs+SPI_CTRL1, SPI_CTRL1_SPIE | SPI_CTRL1_SPE | SPI_CTRL1_MSTR);
+	out_8(regs + SPI_CTRL2, 0x0);
+	out_8(regs + SPI_DATADIR, 0xe);	/* Set output pins */
+	out_8(regs + SPI_PORTDATA, 0x8);	/* Deassert /SS signal */
+
+	/* Clear the status register and re-read it to check for a MODF
+	 * failure.  This driver cannot currently handle multiple masters
+	 * on the SPI bus.  This fault will also occur if the SPI signals
+	 * are not connected to any pins (port_config setting) */
+	in_8(regs + SPI_STATUS);
+	in_8(regs + SPI_DATA);
+	if (in_8(regs + SPI_STATUS) & SPI_STATUS_MODF) {
+		dev_err(&op->dev, "mode fault; is port_config correct?\n");
+		rc = -EIO;
+		goto err_init;
+	}
+
+	dev_dbg(&op->dev, "allocating spi_master struct\n");
+	master = spi_alloc_master(&op->dev, sizeof *ms);
+	if (!master) {
+		rc = -ENOMEM;
+		goto err_alloc;
+	}
+	master->bus_num = -1;
+	master->num_chipselect = 1;
+	master->setup = mpc52xx_spi_setup;
+	master->transfer = mpc52xx_spi_transfer;
+	dev_set_drvdata(&op->dev, master);
+
+	ms = spi_master_get_devdata(master);
+	ms->master = master;
+	ms->regs = regs;
+	ms->irq0 = irq_of_parse_and_map(op->node, 0);
+	ms->irq1 = irq_of_parse_and_map(op->node, 1);
+	ms->state = mpc52xx_spi_fsmstate_idle;
+	ms->ipb_freq = mpc5xxx_get_bus_frequency(op->node);
+	spin_lock_init(&ms->lock);
+	INIT_LIST_HEAD(&ms->queue);
+	INIT_WORK(&ms->work, mpc52xx_spi_wq);
+
+	/* Decide if interrupts can be used */
+	if (ms->irq0 && ms->irq1) {
+		rc = request_irq(ms->irq0, mpc52xx_spi_irq, IRQF_SAMPLE_RANDOM,
+				  "mpc5200-spi-modf", ms);
+		rc |= request_irq(ms->irq1, mpc52xx_spi_irq, IRQF_SAMPLE_RANDOM,
+				  "mpc5200-spi-spiF", ms);
+		if (rc) {
+			free_irq(ms->irq0, ms);
+			free_irq(ms->irq1, ms);
+			ms->irq0 = ms->irq1 = 0;
+		}
+	} else {
+		/* operate in polled mode */
+		ms->irq0 = ms->irq1 = 0;
+	}
+
+	if (!ms->irq0)
+		dev_info(&op->dev, "using polled mode\n");
+
+	dev_dbg(&op->dev, "registering spi_master struct\n");
+	rc = spi_register_master(master);
+	if (rc)
+		goto err_register;
+
+	of_register_spi_devices(master, op->node);
+	dev_info(&ms->master->dev, "registered MPC5200 SPI bus\n");
+
+	return rc;
+
+ err_register:
+	dev_err(&ms->master->dev, "initialization failed\n");
+	spi_master_put(master);
+ err_alloc:
+ err_init:
+	iounmap(regs);
+	return rc;
+}
+
+static int __devexit mpc52xx_spi_remove(struct of_device *op)
+{
+	struct spi_master *master = dev_get_drvdata(&op->dev);
+	struct mpc52xx_spi *ms = spi_master_get_devdata(master);
+
+	free_irq(ms->irq0, ms);
+	free_irq(ms->irq1, ms);
+
+	spi_unregister_master(master);
+	spi_master_put(master);
+	iounmap(ms->regs);
+
+	return 0;
+}
+
+static struct of_device_id mpc52xx_spi_match[] __devinitdata = {
+	{ .compatible = "fsl,mpc5200-spi", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mpc52xx_spi_match);
+
+static struct of_platform_driver mpc52xx_spi_of_driver = {
+	.owner = THIS_MODULE,
+	.name = "mpc52xx-spi",
+	.match_table = mpc52xx_spi_match,
+	.probe = mpc52xx_spi_probe,
+	.remove = __exit_p(mpc52xx_spi_remove),
+};
+
+static int __init mpc52xx_spi_init(void)
+{
+	return of_register_platform_driver(&mpc52xx_spi_of_driver);
+}
+module_init(mpc52xx_spi_init);
+
+static void __exit mpc52xx_spi_exit(void)
+{
+	of_unregister_platform_driver(&mpc52xx_spi_of_driver);
+}
+module_exit(mpc52xx_spi_exit);
+
diff --git a/drivers/spi/spi_mpc8xxx.c b/drivers/spi/spi_mpc8xxx.c
index 0fd0ec4..930135d 100644
--- a/drivers/spi/spi_mpc8xxx.c
+++ b/drivers/spi/spi_mpc8xxx.c
@@ -5,6 +5,10 @@
  *
  * Copyright (C) 2006 Polycom, Inc.
  *
+ * CPM SPI and QE buffer descriptors mode support:
+ * Copyright (c) 2009  MontaVista Software, Inc.
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
@@ -27,6 +31,9 @@
 #include <linux/spi/spi_bitbang.h>
 #include <linux/platform_device.h>
 #include <linux/fsl_devices.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/gpio.h>
@@ -34,8 +41,19 @@
 #include <linux/of_spi.h>
 
 #include <sysdev/fsl_soc.h>
+#include <asm/cpm.h>
+#include <asm/qe.h>
 #include <asm/irq.h>
 
+/* CPM1 and CPM2 are mutually exclusive. */
+#ifdef CONFIG_CPM1
+#include <asm/cpm1.h>
+#define CPM_SPI_CMD mk_cr_cmd(CPM_CR_CH_SPI, 0)
+#else
+#include <asm/cpm2.h>
+#define CPM_SPI_CMD mk_cr_cmd(CPM_CR_SPI_PAGE, CPM_CR_SPI_SBLOCK, 0, 0)
+#endif
+
 /* SPI Controller registers */
 struct mpc8xxx_spi_reg {
 	u8 res1[0x20];
@@ -47,6 +65,28 @@
 	__be32 receive;
 };
 
+/* SPI Parameter RAM */
+struct spi_pram {
+	__be16	rbase;	/* Rx Buffer descriptor base address */
+	__be16	tbase;	/* Tx Buffer descriptor base address */
+	u8	rfcr;	/* Rx function code */
+	u8	tfcr;	/* Tx function code */
+	__be16	mrblr;	/* Max receive buffer length */
+	__be32	rstate;	/* Internal */
+	__be32	rdp;	/* Internal */
+	__be16	rbptr;	/* Internal */
+	__be16	rbc;	/* Internal */
+	__be32	rxtmp;	/* Internal */
+	__be32	tstate;	/* Internal */
+	__be32	tdp;	/* Internal */
+	__be16	tbptr;	/* Internal */
+	__be16	tbc;	/* Internal */
+	__be32	txtmp;	/* Internal */
+	__be32	res;	/* Tx temp. */
+	__be16  rpbase;	/* Relocation pointer (CPM1 only) */
+	__be16	res1;	/* Reserved */
+};
+
 /* SPI Controller mode register definitions */
 #define	SPMODE_LOOP		(1 << 30)
 #define	SPMODE_CI_INACTIVEHIGH	(1 << 29)
@@ -75,14 +115,40 @@
 #define	SPIM_NE		0x00000200	/* Not empty */
 #define	SPIM_NF		0x00000100	/* Not full */
 
+#define	SPIE_TXB	0x00000200	/* Last char is written to tx fifo */
+#define	SPIE_RXB	0x00000100	/* Last char is written to rx buf */
+
+/* SPCOM register values */
+#define	SPCOM_STR	(1 << 23)	/* Start transmit */
+
+#define	SPI_PRAM_SIZE	0x100
+#define	SPI_MRBLR	((unsigned int)PAGE_SIZE)
+
 /* SPI Controller driver's private data. */
 struct mpc8xxx_spi {
+	struct device *dev;
 	struct mpc8xxx_spi_reg __iomem *base;
 
 	/* rx & tx bufs from the spi_transfer */
 	const void *tx;
 	void *rx;
 
+	int subblock;
+	struct spi_pram __iomem *pram;
+	struct cpm_buf_desc __iomem *tx_bd;
+	struct cpm_buf_desc __iomem *rx_bd;
+
+	struct spi_transfer *xfer_in_progress;
+
+	/* dma addresses for CPM transfers */
+	dma_addr_t tx_dma;
+	dma_addr_t rx_dma;
+	bool map_tx_dma;
+	bool map_rx_dma;
+
+	dma_addr_t dma_dummy_tx;
+	dma_addr_t dma_dummy_rx;
+
 	/* functions to deal with different sized buffers */
 	void (*get_rx) (u32 rx_data, struct mpc8xxx_spi *);
 	u32(*get_tx) (struct mpc8xxx_spi *);
@@ -96,7 +162,7 @@
 	u32 rx_shift;		/* RX data reg shift when in qe mode */
 	u32 tx_shift;		/* TX data reg shift when in qe mode */
 
-	bool qe_mode;
+	unsigned int flags;
 
 	struct workqueue_struct *workqueue;
 	struct work_struct work;
@@ -107,6 +173,10 @@
 	struct completion done;
 };
 
+static void *mpc8xxx_dummy_rx;
+static DEFINE_MUTEX(mpc8xxx_dummy_rx_lock);
+static int mpc8xxx_dummy_rx_refcnt;
+
 struct spi_mpc8xxx_cs {
 	/* functions to deal with different sized buffers */
 	void (*get_rx) (u32 rx_data, struct mpc8xxx_spi *);
@@ -155,6 +225,42 @@
 MPC83XX_SPI_TX_BUF(u16)
 MPC83XX_SPI_TX_BUF(u32)
 
+static void mpc8xxx_spi_change_mode(struct spi_device *spi)
+{
+	struct mpc8xxx_spi *mspi = spi_master_get_devdata(spi->master);
+	struct spi_mpc8xxx_cs *cs = spi->controller_state;
+	__be32 __iomem *mode = &mspi->base->mode;
+	unsigned long flags;
+
+	if (cs->hw_mode == mpc8xxx_spi_read_reg(mode))
+		return;
+
+	/* Turn off IRQs locally to minimize time that SPI is disabled. */
+	local_irq_save(flags);
+
+	/* Turn off SPI unit prior changing mode */
+	mpc8xxx_spi_write_reg(mode, cs->hw_mode & ~SPMODE_ENABLE);
+	mpc8xxx_spi_write_reg(mode, cs->hw_mode);
+
+	/* When in CPM mode, we need to reinit tx and rx. */
+	if (mspi->flags & SPI_CPM_MODE) {
+		if (mspi->flags & SPI_QE) {
+			qe_issue_cmd(QE_INIT_TX_RX, mspi->subblock,
+				     QE_CR_PROTOCOL_UNSPECIFIED, 0);
+		} else {
+			cpm_command(CPM_SPI_CMD, CPM_CR_INIT_TRX);
+			if (mspi->flags & SPI_CPM1) {
+				out_be16(&mspi->pram->rbptr,
+					 in_be16(&mspi->pram->rbase));
+				out_be16(&mspi->pram->tbptr,
+					 in_be16(&mspi->pram->tbase));
+			}
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
 static void mpc8xxx_spi_chipselect(struct spi_device *spi, int value)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
@@ -168,27 +274,13 @@
 	}
 
 	if (value == BITBANG_CS_ACTIVE) {
-		u32 regval = mpc8xxx_spi_read_reg(&mpc8xxx_spi->base->mode);
-
 		mpc8xxx_spi->rx_shift = cs->rx_shift;
 		mpc8xxx_spi->tx_shift = cs->tx_shift;
 		mpc8xxx_spi->get_rx = cs->get_rx;
 		mpc8xxx_spi->get_tx = cs->get_tx;
 
-		if (cs->hw_mode != regval) {
-			unsigned long flags;
-			__be32 __iomem *mode = &mpc8xxx_spi->base->mode;
+		mpc8xxx_spi_change_mode(spi);
 
-			regval = cs->hw_mode;
-			/* Turn off IRQs locally to minimize time that
-			 * SPI is disabled
-			 */
-			local_irq_save(flags);
-			/* Turn off SPI unit prior changing mode */
-			mpc8xxx_spi_write_reg(mode, regval & ~SPMODE_ENABLE);
-			mpc8xxx_spi_write_reg(mode, regval);
-			local_irq_restore(flags);
-		}
 		if (pdata->cs_control)
 			pdata->cs_control(spi, pol);
 	}
@@ -198,7 +290,6 @@
 int mpc8xxx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi;
-	u32 regval;
 	u8 bits_per_word, pm;
 	u32 hz;
 	struct spi_mpc8xxx_cs	*cs = spi->controller_state;
@@ -230,14 +321,14 @@
 	if (bits_per_word <= 8) {
 		cs->get_rx = mpc8xxx_spi_rx_buf_u8;
 		cs->get_tx = mpc8xxx_spi_tx_buf_u8;
-		if (mpc8xxx_spi->qe_mode) {
+		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
 			cs->rx_shift = 16;
 			cs->tx_shift = 24;
 		}
 	} else if (bits_per_word <= 16) {
 		cs->get_rx = mpc8xxx_spi_rx_buf_u16;
 		cs->get_tx = mpc8xxx_spi_tx_buf_u16;
-		if (mpc8xxx_spi->qe_mode) {
+		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
 			cs->rx_shift = 16;
 			cs->tx_shift = 16;
 		}
@@ -247,7 +338,8 @@
 	} else
 		return -EINVAL;
 
-	if (mpc8xxx_spi->qe_mode && spi->mode & SPI_LSB_FIRST) {
+	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE &&
+			spi->mode & SPI_LSB_FIRST) {
 		cs->tx_shift = 0;
 		if (bits_per_word <= 8)
 			cs->rx_shift = 8;
@@ -286,37 +378,138 @@
 		pm--;
 
 	cs->hw_mode |= SPMODE_PM(pm);
-	regval =  mpc8xxx_spi_read_reg(&mpc8xxx_spi->base->mode);
-	if (cs->hw_mode != regval) {
-		unsigned long flags;
-		__be32 __iomem *mode = &mpc8xxx_spi->base->mode;
 
-		regval = cs->hw_mode;
-		/* Turn off IRQs locally to minimize time
-		 * that SPI is disabled
-		 */
-		local_irq_save(flags);
-		/* Turn off SPI unit prior changing mode */
-		mpc8xxx_spi_write_reg(mode, regval & ~SPMODE_ENABLE);
-		mpc8xxx_spi_write_reg(mode, regval);
-		local_irq_restore(flags);
-	}
+	mpc8xxx_spi_change_mode(spi);
 	return 0;
 }
 
-static int mpc8xxx_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
+static void mpc8xxx_spi_cpm_bufs_start(struct mpc8xxx_spi *mspi)
 {
-	struct mpc8xxx_spi *mpc8xxx_spi;
-	u32 word, len, bits_per_word;
+	struct cpm_buf_desc __iomem *tx_bd = mspi->tx_bd;
+	struct cpm_buf_desc __iomem *rx_bd = mspi->rx_bd;
+	unsigned int xfer_len = min(mspi->count, SPI_MRBLR);
+	unsigned int xfer_ofs;
 
-	mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	xfer_ofs = mspi->xfer_in_progress->len - mspi->count;
 
-	mpc8xxx_spi->tx = t->tx_buf;
-	mpc8xxx_spi->rx = t->rx_buf;
+	out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma + xfer_ofs);
+	out_be16(&rx_bd->cbd_datlen, 0);
+	out_be16(&rx_bd->cbd_sc, BD_SC_EMPTY | BD_SC_INTRPT | BD_SC_WRAP);
+
+	out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma + xfer_ofs);
+	out_be16(&tx_bd->cbd_datlen, xfer_len);
+	out_be16(&tx_bd->cbd_sc, BD_SC_READY | BD_SC_INTRPT | BD_SC_WRAP |
+				 BD_SC_LAST);
+
+	/* start transfer */
+	mpc8xxx_spi_write_reg(&mspi->base->command, SPCOM_STR);
+}
+
+static int mpc8xxx_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
+				struct spi_transfer *t, bool is_dma_mapped)
+{
+	struct device *dev = mspi->dev;
+
+	if (is_dma_mapped) {
+		mspi->map_tx_dma = 0;
+		mspi->map_rx_dma = 0;
+	} else {
+		mspi->map_tx_dma = 1;
+		mspi->map_rx_dma = 1;
+	}
+
+	if (!t->tx_buf) {
+		mspi->tx_dma = mspi->dma_dummy_tx;
+		mspi->map_tx_dma = 0;
+	}
+
+	if (!t->rx_buf) {
+		mspi->rx_dma = mspi->dma_dummy_rx;
+		mspi->map_rx_dma = 0;
+	}
+
+	if (mspi->map_tx_dma) {
+		void *nonconst_tx = (void *)mspi->tx; /* shut up gcc */
+
+		mspi->tx_dma = dma_map_single(dev, nonconst_tx, t->len,
+					      DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, mspi->tx_dma)) {
+			dev_err(dev, "unable to map tx dma\n");
+			return -ENOMEM;
+		}
+	} else {
+		mspi->tx_dma = t->tx_dma;
+	}
+
+	if (mspi->map_rx_dma) {
+		mspi->rx_dma = dma_map_single(dev, mspi->rx, t->len,
+					      DMA_FROM_DEVICE);
+		if (dma_mapping_error(dev, mspi->rx_dma)) {
+			dev_err(dev, "unable to map rx dma\n");
+			goto err_rx_dma;
+		}
+	} else {
+		mspi->rx_dma = t->rx_dma;
+	}
+
+	/* enable rx ints */
+	mpc8xxx_spi_write_reg(&mspi->base->mask, SPIE_RXB);
+
+	mspi->xfer_in_progress = t;
+	mspi->count = t->len;
+
+	/* start CPM transfers */
+	mpc8xxx_spi_cpm_bufs_start(mspi);
+
+	return 0;
+
+err_rx_dma:
+	if (mspi->map_tx_dma)
+		dma_unmap_single(dev, mspi->tx_dma, t->len, DMA_TO_DEVICE);
+	return -ENOMEM;
+}
+
+static void mpc8xxx_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi)
+{
+	struct device *dev = mspi->dev;
+	struct spi_transfer *t = mspi->xfer_in_progress;
+
+	if (mspi->map_tx_dma)
+		dma_unmap_single(dev, mspi->tx_dma, t->len, DMA_TO_DEVICE);
+	if (mspi->map_tx_dma)
+		dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE);
+	mspi->xfer_in_progress = NULL;
+}
+
+static int mpc8xxx_spi_cpu_bufs(struct mpc8xxx_spi *mspi,
+				struct spi_transfer *t, unsigned int len)
+{
+	u32 word;
+
+	mspi->count = len;
+
+	/* enable rx ints */
+	mpc8xxx_spi_write_reg(&mspi->base->mask, SPIM_NE);
+
+	/* transmit word */
+	word = mspi->get_tx(mspi);
+	mpc8xxx_spi_write_reg(&mspi->base->transmit, word);
+
+	return 0;
+}
+
+static int mpc8xxx_spi_bufs(struct spi_device *spi, struct spi_transfer *t,
+			    bool is_dma_mapped)
+{
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	unsigned int len = t->len;
+	u8 bits_per_word;
+	int ret;
+
 	bits_per_word = spi->bits_per_word;
 	if (t->bits_per_word)
 		bits_per_word = t->bits_per_word;
-	len = t->len;
+
 	if (bits_per_word > 8) {
 		/* invalid length? */
 		if (len & 1)
@@ -329,22 +522,27 @@
 			return -EINVAL;
 		len /= 2;
 	}
-	mpc8xxx_spi->count = len;
+
+	mpc8xxx_spi->tx = t->tx_buf;
+	mpc8xxx_spi->rx = t->rx_buf;
 
 	INIT_COMPLETION(mpc8xxx_spi->done);
 
-	/* enable rx ints */
-	mpc8xxx_spi_write_reg(&mpc8xxx_spi->base->mask, SPIM_NE);
-
-	/* transmit word */
-	word = mpc8xxx_spi->get_tx(mpc8xxx_spi);
-	mpc8xxx_spi_write_reg(&mpc8xxx_spi->base->transmit, word);
+	if (mpc8xxx_spi->flags & SPI_CPM_MODE)
+		ret = mpc8xxx_spi_cpm_bufs(mpc8xxx_spi, t, is_dma_mapped);
+	else
+		ret = mpc8xxx_spi_cpu_bufs(mpc8xxx_spi, t, len);
+	if (ret)
+		return ret;
 
 	wait_for_completion(&mpc8xxx_spi->done);
 
 	/* disable rx ints */
 	mpc8xxx_spi_write_reg(&mpc8xxx_spi->base->mask, 0);
 
+	if (mpc8xxx_spi->flags & SPI_CPM_MODE)
+		mpc8xxx_spi_cpm_bufs_complete(mpc8xxx_spi);
+
 	return mpc8xxx_spi->count;
 }
 
@@ -375,7 +573,7 @@
 		}
 		cs_change = t->cs_change;
 		if (t->len)
-			status = mpc8xxx_spi_bufs(spi, t);
+			status = mpc8xxx_spi_bufs(spi, t, m->is_dma_mapped);
 		if (status) {
 			status = -EMSGSIZE;
 			break;
@@ -464,45 +662,80 @@
 	return 0;
 }
 
-static irqreturn_t mpc8xxx_spi_irq(s32 irq, void *context_data)
+static void mpc8xxx_spi_cpm_irq(struct mpc8xxx_spi *mspi, u32 events)
 {
-	struct mpc8xxx_spi *mpc8xxx_spi = context_data;
-	u32 event;
-	irqreturn_t ret = IRQ_NONE;
+	u16 len;
 
-	/* Get interrupt events(tx/rx) */
-	event = mpc8xxx_spi_read_reg(&mpc8xxx_spi->base->event);
+	dev_dbg(mspi->dev, "%s: bd datlen %d, count %d\n", __func__,
+		in_be16(&mspi->rx_bd->cbd_datlen), mspi->count);
 
-	/* We need handle RX first */
-	if (event & SPIE_NE) {
-		u32 rx_data = mpc8xxx_spi_read_reg(&mpc8xxx_spi->base->receive);
-
-		if (mpc8xxx_spi->rx)
-			mpc8xxx_spi->get_rx(rx_data, mpc8xxx_spi);
-
-		ret = IRQ_HANDLED;
-	}
-
-	if ((event & SPIE_NF) == 0)
-		/* spin until TX is done */
-		while (((event =
-			 mpc8xxx_spi_read_reg(&mpc8xxx_spi->base->event)) &
-						SPIE_NF) == 0)
-			cpu_relax();
-
-	mpc8xxx_spi->count -= 1;
-	if (mpc8xxx_spi->count) {
-		u32 word = mpc8xxx_spi->get_tx(mpc8xxx_spi);
-		mpc8xxx_spi_write_reg(&mpc8xxx_spi->base->transmit, word);
-	} else {
-		complete(&mpc8xxx_spi->done);
+	len = in_be16(&mspi->rx_bd->cbd_datlen);
+	if (len > mspi->count) {
+		WARN_ON(1);
+		len = mspi->count;
 	}
 
 	/* Clear the events */
-	mpc8xxx_spi_write_reg(&mpc8xxx_spi->base->event, event);
+	mpc8xxx_spi_write_reg(&mspi->base->event, events);
+
+	mspi->count -= len;
+	if (mspi->count)
+		mpc8xxx_spi_cpm_bufs_start(mspi);
+	else
+		complete(&mspi->done);
+}
+
+static void mpc8xxx_spi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
+{
+	/* We need handle RX first */
+	if (events & SPIE_NE) {
+		u32 rx_data = mpc8xxx_spi_read_reg(&mspi->base->receive);
+
+		if (mspi->rx)
+			mspi->get_rx(rx_data, mspi);
+	}
+
+	if ((events & SPIE_NF) == 0)
+		/* spin until TX is done */
+		while (((events =
+			mpc8xxx_spi_read_reg(&mspi->base->event)) &
+						SPIE_NF) == 0)
+			cpu_relax();
+
+	/* Clear the events */
+	mpc8xxx_spi_write_reg(&mspi->base->event, events);
+
+	mspi->count -= 1;
+	if (mspi->count) {
+		u32 word = mspi->get_tx(mspi);
+
+		mpc8xxx_spi_write_reg(&mspi->base->transmit, word);
+	} else {
+		complete(&mspi->done);
+	}
+}
+
+static irqreturn_t mpc8xxx_spi_irq(s32 irq, void *context_data)
+{
+	struct mpc8xxx_spi *mspi = context_data;
+	irqreturn_t ret = IRQ_NONE;
+	u32 events;
+
+	/* Get interrupt events(tx/rx) */
+	events = mpc8xxx_spi_read_reg(&mspi->base->event);
+	if (events)
+		ret = IRQ_HANDLED;
+
+	dev_dbg(mspi->dev, "%s: events %x\n", __func__, events);
+
+	if (mspi->flags & SPI_CPM_MODE)
+		mpc8xxx_spi_cpm_irq(mspi, events);
+	else
+		mpc8xxx_spi_cpu_irq(mspi, events);
 
 	return ret;
 }
+
 static int mpc8xxx_spi_transfer(struct spi_device *spi,
 				struct spi_message *m)
 {
@@ -526,6 +759,215 @@
 	kfree(spi->controller_state);
 }
 
+static void *mpc8xxx_spi_alloc_dummy_rx(void)
+{
+	mutex_lock(&mpc8xxx_dummy_rx_lock);
+
+	if (!mpc8xxx_dummy_rx)
+		mpc8xxx_dummy_rx = kmalloc(SPI_MRBLR, GFP_KERNEL);
+	if (mpc8xxx_dummy_rx)
+		mpc8xxx_dummy_rx_refcnt++;
+
+	mutex_unlock(&mpc8xxx_dummy_rx_lock);
+
+	return mpc8xxx_dummy_rx;
+}
+
+static void mpc8xxx_spi_free_dummy_rx(void)
+{
+	mutex_lock(&mpc8xxx_dummy_rx_lock);
+
+	switch (mpc8xxx_dummy_rx_refcnt) {
+	case 0:
+		WARN_ON(1);
+		break;
+	case 1:
+		kfree(mpc8xxx_dummy_rx);
+		mpc8xxx_dummy_rx = NULL;
+		/* fall through */
+	default:
+		mpc8xxx_dummy_rx_refcnt--;
+		break;
+	}
+
+	mutex_unlock(&mpc8xxx_dummy_rx_lock);
+}
+
+static unsigned long mpc8xxx_spi_cpm_get_pram(struct mpc8xxx_spi *mspi)
+{
+	struct device *dev = mspi->dev;
+	struct device_node *np = dev_archdata_get_node(&dev->archdata);
+	const u32 *iprop;
+	int size;
+	unsigned long spi_base_ofs;
+	unsigned long pram_ofs = -ENOMEM;
+
+	/* Can't use of_address_to_resource(), QE muram isn't at 0. */
+	iprop = of_get_property(np, "reg", &size);
+
+	/* QE with a fixed pram location? */
+	if (mspi->flags & SPI_QE && iprop && size == sizeof(*iprop) * 4)
+		return cpm_muram_alloc_fixed(iprop[2], SPI_PRAM_SIZE);
+
+	/* QE but with a dynamic pram location? */
+	if (mspi->flags & SPI_QE) {
+		pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
+		qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, mspi->subblock,
+				QE_CR_PROTOCOL_UNSPECIFIED, pram_ofs);
+		return pram_ofs;
+	}
+
+	/* CPM1 and CPM2 pram must be at a fixed addr. */
+	if (!iprop || size != sizeof(*iprop) * 4)
+		return -ENOMEM;
+
+	spi_base_ofs = cpm_muram_alloc_fixed(iprop[2], 2);
+	if (IS_ERR_VALUE(spi_base_ofs))
+		return -ENOMEM;
+
+	if (mspi->flags & SPI_CPM2) {
+		pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
+		if (!IS_ERR_VALUE(pram_ofs)) {
+			u16 __iomem *spi_base = cpm_muram_addr(spi_base_ofs);
+
+			out_be16(spi_base, pram_ofs);
+		}
+	} else {
+		struct spi_pram __iomem *pram = cpm_muram_addr(spi_base_ofs);
+		u16 rpbase = in_be16(&pram->rpbase);
+
+		/* Microcode relocation patch applied? */
+		if (rpbase)
+			pram_ofs = rpbase;
+		else
+			return spi_base_ofs;
+	}
+
+	cpm_muram_free(spi_base_ofs);
+	return pram_ofs;
+}
+
+static int mpc8xxx_spi_cpm_init(struct mpc8xxx_spi *mspi)
+{
+	struct device *dev = mspi->dev;
+	struct device_node *np = dev_archdata_get_node(&dev->archdata);
+	const u32 *iprop;
+	int size;
+	unsigned long pram_ofs;
+	unsigned long bds_ofs;
+
+	if (!(mspi->flags & SPI_CPM_MODE))
+		return 0;
+
+	if (!mpc8xxx_spi_alloc_dummy_rx())
+		return -ENOMEM;
+
+	if (mspi->flags & SPI_QE) {
+		iprop = of_get_property(np, "cell-index", &size);
+		if (iprop && size == sizeof(*iprop))
+			mspi->subblock = *iprop;
+
+		switch (mspi->subblock) {
+		default:
+			dev_warn(dev, "cell-index unspecified, assuming SPI1");
+			/* fall through */
+		case 0:
+			mspi->subblock = QE_CR_SUBBLOCK_SPI1;
+			break;
+		case 1:
+			mspi->subblock = QE_CR_SUBBLOCK_SPI2;
+			break;
+		}
+	}
+
+	pram_ofs = mpc8xxx_spi_cpm_get_pram(mspi);
+	if (IS_ERR_VALUE(pram_ofs)) {
+		dev_err(dev, "can't allocate spi parameter ram\n");
+		goto err_pram;
+	}
+
+	bds_ofs = cpm_muram_alloc(sizeof(*mspi->tx_bd) +
+				  sizeof(*mspi->rx_bd), 8);
+	if (IS_ERR_VALUE(bds_ofs)) {
+		dev_err(dev, "can't allocate bds\n");
+		goto err_bds;
+	}
+
+	mspi->dma_dummy_tx = dma_map_single(dev, empty_zero_page, PAGE_SIZE,
+					    DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, mspi->dma_dummy_tx)) {
+		dev_err(dev, "unable to map dummy tx buffer\n");
+		goto err_dummy_tx;
+	}
+
+	mspi->dma_dummy_rx = dma_map_single(dev, mpc8xxx_dummy_rx, SPI_MRBLR,
+					    DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, mspi->dma_dummy_rx)) {
+		dev_err(dev, "unable to map dummy rx buffer\n");
+		goto err_dummy_rx;
+	}
+
+	mspi->pram = cpm_muram_addr(pram_ofs);
+
+	mspi->tx_bd = cpm_muram_addr(bds_ofs);
+	mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd));
+
+	/* Initialize parameter ram. */
+	out_be16(&mspi->pram->tbase, cpm_muram_offset(mspi->tx_bd));
+	out_be16(&mspi->pram->rbase, cpm_muram_offset(mspi->rx_bd));
+	out_8(&mspi->pram->tfcr, CPMFCR_EB | CPMFCR_GBL);
+	out_8(&mspi->pram->rfcr, CPMFCR_EB | CPMFCR_GBL);
+	out_be16(&mspi->pram->mrblr, SPI_MRBLR);
+	out_be32(&mspi->pram->rstate, 0);
+	out_be32(&mspi->pram->rdp, 0);
+	out_be16(&mspi->pram->rbptr, 0);
+	out_be16(&mspi->pram->rbc, 0);
+	out_be32(&mspi->pram->rxtmp, 0);
+	out_be32(&mspi->pram->tstate, 0);
+	out_be32(&mspi->pram->tdp, 0);
+	out_be16(&mspi->pram->tbptr, 0);
+	out_be16(&mspi->pram->tbc, 0);
+	out_be32(&mspi->pram->txtmp, 0);
+
+	return 0;
+
+err_dummy_rx:
+	dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE);
+err_dummy_tx:
+	cpm_muram_free(bds_ofs);
+err_bds:
+	cpm_muram_free(pram_ofs);
+err_pram:
+	mpc8xxx_spi_free_dummy_rx();
+	return -ENOMEM;
+}
+
+static void mpc8xxx_spi_cpm_free(struct mpc8xxx_spi *mspi)
+{
+	struct device *dev = mspi->dev;
+
+	dma_unmap_single(dev, mspi->dma_dummy_rx, SPI_MRBLR, DMA_FROM_DEVICE);
+	dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE);
+	cpm_muram_free(cpm_muram_offset(mspi->tx_bd));
+	cpm_muram_free(cpm_muram_offset(mspi->pram));
+	mpc8xxx_spi_free_dummy_rx();
+}
+
+static const char *mpc8xxx_spi_strmode(unsigned int flags)
+{
+	if (flags & SPI_QE_CPU_MODE) {
+		return "QE CPU";
+	} else if (flags & SPI_CPM_MODE) {
+		if (flags & SPI_QE)
+			return "QE";
+		else if (flags & SPI_CPM2)
+			return "CPM2";
+		else
+			return "CPM1";
+	}
+	return "CPU";
+}
+
 static struct spi_master * __devinit
 mpc8xxx_spi_probe(struct device *dev, struct resource *mem, unsigned int irq)
 {
@@ -552,14 +994,19 @@
 	master->cleanup = mpc8xxx_spi_cleanup;
 
 	mpc8xxx_spi = spi_master_get_devdata(master);
-	mpc8xxx_spi->qe_mode = pdata->qe_mode;
+	mpc8xxx_spi->dev = dev;
 	mpc8xxx_spi->get_rx = mpc8xxx_spi_rx_buf_u8;
 	mpc8xxx_spi->get_tx = mpc8xxx_spi_tx_buf_u8;
+	mpc8xxx_spi->flags = pdata->flags;
 	mpc8xxx_spi->spibrg = pdata->sysclk;
 
+	ret = mpc8xxx_spi_cpm_init(mpc8xxx_spi);
+	if (ret)
+		goto err_cpm_init;
+
 	mpc8xxx_spi->rx_shift = 0;
 	mpc8xxx_spi->tx_shift = 0;
-	if (mpc8xxx_spi->qe_mode) {
+	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
 		mpc8xxx_spi->rx_shift = 16;
 		mpc8xxx_spi->tx_shift = 24;
 	}
@@ -569,7 +1016,7 @@
 	mpc8xxx_spi->base = ioremap(mem->start, mem->end - mem->start + 1);
 	if (mpc8xxx_spi->base == NULL) {
 		ret = -ENOMEM;
-		goto put_master;
+		goto err_ioremap;
 	}
 
 	mpc8xxx_spi->irq = irq;
@@ -592,7 +1039,7 @@
 
 	/* Enable SPI interface */
 	regval = pdata->initial_spmode | SPMODE_INIT_VAL | SPMODE_ENABLE;
-	if (pdata->qe_mode)
+	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE)
 		regval |= SPMODE_OP;
 
 	mpc8xxx_spi_write_reg(&mpc8xxx_spi->base->mode, regval);
@@ -612,9 +1059,8 @@
 	if (ret < 0)
 		goto unreg_master;
 
-	printk(KERN_INFO
-	       "%s: MPC8xxx SPI Controller driver at 0x%p (irq = %d)\n",
-	       dev_name(dev), mpc8xxx_spi->base, mpc8xxx_spi->irq);
+	dev_info(dev, "at 0x%p (irq = %d), %s mode\n", mpc8xxx_spi->base,
+		 mpc8xxx_spi->irq, mpc8xxx_spi_strmode(mpc8xxx_spi->flags));
 
 	return master;
 
@@ -624,7 +1070,9 @@
 	free_irq(mpc8xxx_spi->irq, mpc8xxx_spi);
 unmap_io:
 	iounmap(mpc8xxx_spi->base);
-put_master:
+err_ioremap:
+	mpc8xxx_spi_cpm_free(mpc8xxx_spi);
+err_cpm_init:
 	spi_master_put(master);
 err:
 	return ERR_PTR(ret);
@@ -644,6 +1092,7 @@
 
 	free_irq(mpc8xxx_spi->irq, mpc8xxx_spi);
 	iounmap(mpc8xxx_spi->base);
+	mpc8xxx_spi_cpm_free(mpc8xxx_spi);
 
 	return 0;
 }
@@ -709,6 +1158,7 @@
 		gpio = of_get_gpio_flags(np, i, &flags);
 		if (!gpio_is_valid(gpio)) {
 			dev_err(dev, "invalid gpio #%d: %d\n", i, gpio);
+			ret = gpio;
 			goto err_loop;
 		}
 
@@ -804,7 +1254,13 @@
 
 	prop = of_get_property(np, "mode", NULL);
 	if (prop && !strcmp(prop, "cpu-qe"))
-		pdata->qe_mode = 1;
+		pdata->flags = SPI_QE_CPU_MODE;
+	else if (prop && !strcmp(prop, "qe"))
+		pdata->flags = SPI_CPM_MODE | SPI_QE;
+	else if (of_device_is_compatible(np, "fsl,cpm2-spi"))
+		pdata->flags = SPI_CPM_MODE | SPI_CPM2;
+	else if (of_device_is_compatible(np, "fsl,cpm1-spi"))
+		pdata->flags = SPI_CPM_MODE | SPI_CPM1;
 
 	ret = of_mpc8xxx_spi_get_chipselects(dev);
 	if (ret)
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 46b8c5c..5a143b9 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -148,7 +148,8 @@
 {
 	u8 bits_per_word;
 
-	bits_per_word = (t) ? t->bits_per_word : spi->bits_per_word;
+	bits_per_word = (t && t->bits_per_word)
+			 ? t->bits_per_word : spi->bits_per_word;
 	if (bits_per_word != 8) {
 		dev_err(&spi->dev, "%s, unsupported bits_per_word=%d\n",
 			__func__, bits_per_word);
diff --git a/drivers/usb/gadget/fsl_qe_udc.h b/drivers/usb/gadget/fsl_qe_udc.h
index 31b2710..bea5b82 100644
--- a/drivers/usb/gadget/fsl_qe_udc.h
+++ b/drivers/usb/gadget/fsl_qe_udc.h
@@ -419,19 +419,4 @@
 #define CPM_USB_RESTART_TX_OPCODE 0x0b
 #define CPM_USB_EP_SHIFT 5
 
-#ifndef CONFIG_CPM
-inline int cpm_command(u32 command, u8 opcode)
-{
-	return -EOPNOTSUPP;
-}
-#endif
-
-#ifndef CONFIG_QUICC_ENGINE
-inline int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol,
-	u32 cmd_input)
-{
-	return -EOPNOTSUPP;
-}
-#endif
-
 #endif  /* __FSL_QE_UDC_H */
diff --git a/drivers/video/offb.c b/drivers/video/offb.c
index 4d8c54c..b043ac8 100644
--- a/drivers/video/offb.c
+++ b/drivers/video/offb.c
@@ -282,8 +282,17 @@
 	return 0;
 }
 
+static void offb_destroy(struct fb_info *info)
+{
+	if (info->screen_base)
+		iounmap(info->screen_base);
+	release_mem_region(info->aperture_base, info->aperture_size);
+	framebuffer_release(info);
+}
+
 static struct fb_ops offb_ops = {
 	.owner		= THIS_MODULE,
+	.fb_destroy	= offb_destroy,
 	.fb_setcolreg	= offb_setcolreg,
 	.fb_set_par	= offb_set_par,
 	.fb_blank	= offb_blank,
@@ -482,10 +491,14 @@
 	var->sync = 0;
 	var->vmode = FB_VMODE_NONINTERLACED;
 
+	/* set offb aperture size for generic probing */
+	info->aperture_base = address;
+	info->aperture_size = fix->smem_len;
+
 	info->fbops = &offb_ops;
 	info->screen_base = ioremap(address, fix->smem_len);
 	info->pseudo_palette = (void *) (info + 1);
-	info->flags = FBINFO_DEFAULT | foreign_endian;
+	info->flags = FBINFO_DEFAULT | FBINFO_MISC_FIRMWARE | foreign_endian;
 
 	fb_alloc_cmap(&info->cmap, 256, 0);
 
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 3711b88..d958b76 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -861,8 +861,10 @@
 	  Watchdog timer found in a number of GE Fanuc single board computers.
 
 config MPC5200_WDT
-	tristate "MPC5200 Watchdog Timer"
+	bool "MPC52xx Watchdog Timer"
 	depends on PPC_MPC52xx
+	help
+	  Use General Purpose Timer (GPT) 0 on the MPC5200 as Watchdog.
 
 config 8xxx_WDT
 	tristate "MPC8xxx Platform Watchdog Timer"
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 699199b..89c045d 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -118,7 +118,6 @@
 
 # POWERPC Architecture
 obj-$(CONFIG_GEF_WDT) += gef_wdt.o
-obj-$(CONFIG_MPC5200_WDT) += mpc5200_wdt.o
 obj-$(CONFIG_8xxx_WDT) += mpc8xxx_wdt.o
 obj-$(CONFIG_MV64X60_WDT) += mv64x60_wdt.o
 obj-$(CONFIG_PIKA_WDT) += pika_wdt.o
diff --git a/drivers/watchdog/mpc5200_wdt.c b/drivers/watchdog/mpc5200_wdt.c
deleted file mode 100644
index fa9c47c..0000000
--- a/drivers/watchdog/mpc5200_wdt.c
+++ /dev/null
@@ -1,293 +0,0 @@
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/miscdevice.h>
-#include <linux/watchdog.h>
-#include <linux/io.h>
-#include <linux/spinlock.h>
-#include <linux/of_platform.h>
-#include <linux/uaccess.h>
-#include <asm/mpc52xx.h>
-
-
-#define GPT_MODE_WDT		(1 << 15)
-#define GPT_MODE_CE		(1 << 12)
-#define GPT_MODE_MS_TIMER	(0x4)
-
-
-struct mpc5200_wdt {
-	unsigned count;	/* timer ticks before watchdog kicks in */
-	long ipb_freq;
-	struct miscdevice miscdev;
-	struct resource mem;
-	struct mpc52xx_gpt __iomem *regs;
-	spinlock_t io_lock;
-};
-
-/* is_active stores wether or not the /dev/watchdog device is opened */
-static unsigned long is_active;
-
-/* misc devices don't provide a way, to get back to 'dev' or 'miscdev' from
- * file operations, which sucks. But there can be max 1 watchdog anyway, so...
- */
-static struct mpc5200_wdt *wdt_global;
-
-
-/* helper to calculate timeout in timer counts */
-static void mpc5200_wdt_set_timeout(struct mpc5200_wdt *wdt, int timeout)
-{
-	/* use biggest prescaler of 64k */
-	wdt->count = (wdt->ipb_freq + 0xffff) / 0x10000 * timeout;
-
-	if (wdt->count > 0xffff)
-		wdt->count = 0xffff;
-}
-/* return timeout in seconds (calculated from timer count) */
-static int mpc5200_wdt_get_timeout(struct mpc5200_wdt *wdt)
-{
-	return wdt->count * 0x10000 / wdt->ipb_freq;
-}
-
-
-/* watchdog operations */
-static int mpc5200_wdt_start(struct mpc5200_wdt *wdt)
-{
-	spin_lock(&wdt->io_lock);
-	/* disable */
-	out_be32(&wdt->regs->mode, 0);
-	/* set timeout, with maximum prescaler */
-	out_be32(&wdt->regs->count, 0x0 | wdt->count);
-	/* enable watchdog */
-	out_be32(&wdt->regs->mode, GPT_MODE_CE | GPT_MODE_WDT |
-						GPT_MODE_MS_TIMER);
-	spin_unlock(&wdt->io_lock);
-
-	return 0;
-}
-static int mpc5200_wdt_ping(struct mpc5200_wdt *wdt)
-{
-	spin_lock(&wdt->io_lock);
-	/* writing A5 to OCPW resets the watchdog */
-	out_be32(&wdt->regs->mode, 0xA5000000 |
-				(0xffffff & in_be32(&wdt->regs->mode)));
-	spin_unlock(&wdt->io_lock);
-	return 0;
-}
-static int mpc5200_wdt_stop(struct mpc5200_wdt *wdt)
-{
-	spin_lock(&wdt->io_lock);
-	/* disable */
-	out_be32(&wdt->regs->mode, 0);
-	spin_unlock(&wdt->io_lock);
-	return 0;
-}
-
-
-/* file operations */
-static ssize_t mpc5200_wdt_write(struct file *file, const char __user *data,
-		size_t len, loff_t *ppos)
-{
-	struct mpc5200_wdt *wdt = file->private_data;
-	mpc5200_wdt_ping(wdt);
-	return 0;
-}
-static struct watchdog_info mpc5200_wdt_info = {
-	.options	= WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
-	.identity	= "mpc5200 watchdog on GPT0",
-};
-static long mpc5200_wdt_ioctl(struct file *file, unsigned int cmd,
-							unsigned long arg)
-{
-	struct mpc5200_wdt *wdt = file->private_data;
-	int __user *data = (int __user *)arg;
-	int timeout;
-	int ret = 0;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		ret = copy_to_user(data, &mpc5200_wdt_info,
-						sizeof(mpc5200_wdt_info));
-		if (ret)
-			ret = -EFAULT;
-		break;
-
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		ret = put_user(0, data);
-		break;
-
-	case WDIOC_KEEPALIVE:
-		mpc5200_wdt_ping(wdt);
-		break;
-
-	case WDIOC_SETTIMEOUT:
-		ret = get_user(timeout, data);
-		if (ret)
-			break;
-		mpc5200_wdt_set_timeout(wdt, timeout);
-		mpc5200_wdt_start(wdt);
-		/* fall through and return the timeout */
-
-	case WDIOC_GETTIMEOUT:
-		timeout = mpc5200_wdt_get_timeout(wdt);
-		ret = put_user(timeout, data);
-		break;
-
-	default:
-		ret = -ENOTTY;
-	}
-	return ret;
-}
-
-static int mpc5200_wdt_open(struct inode *inode, struct file *file)
-{
-	/* /dev/watchdog can only be opened once */
-	if (test_and_set_bit(0, &is_active))
-		return -EBUSY;
-
-	/* Set and activate the watchdog */
-	mpc5200_wdt_set_timeout(wdt_global, 30);
-	mpc5200_wdt_start(wdt_global);
-	file->private_data = wdt_global;
-	return nonseekable_open(inode, file);
-}
-static int mpc5200_wdt_release(struct inode *inode, struct file *file)
-{
-#if WATCHDOG_NOWAYOUT == 0
-	struct mpc5200_wdt *wdt = file->private_data;
-	mpc5200_wdt_stop(wdt);
-	wdt->count = 0;		/* == disabled */
-#endif
-	clear_bit(0, &is_active);
-	return 0;
-}
-
-static const struct file_operations mpc5200_wdt_fops = {
-	.owner	= THIS_MODULE,
-	.write	= mpc5200_wdt_write,
-	.unlocked_ioctl	= mpc5200_wdt_ioctl,
-	.open	= mpc5200_wdt_open,
-	.release = mpc5200_wdt_release,
-};
-
-/* module operations */
-static int mpc5200_wdt_probe(struct of_device *op,
-					const struct of_device_id *match)
-{
-	struct mpc5200_wdt *wdt;
-	int err;
-	const void *has_wdt;
-	int size;
-
-	has_wdt = of_get_property(op->node, "has-wdt", NULL);
-	if (!has_wdt)
-		has_wdt = of_get_property(op->node, "fsl,has-wdt", NULL);
-	if (!has_wdt)
-		return -ENODEV;
-
-	wdt = kzalloc(sizeof(*wdt), GFP_KERNEL);
-	if (!wdt)
-		return -ENOMEM;
-
-	wdt->ipb_freq = mpc5xxx_get_bus_frequency(op->node);
-
-	err = of_address_to_resource(op->node, 0, &wdt->mem);
-	if (err)
-		goto out_free;
-	size = wdt->mem.end - wdt->mem.start + 1;
-	if (!request_mem_region(wdt->mem.start, size, "mpc5200_wdt")) {
-		err = -ENODEV;
-		goto out_free;
-	}
-	wdt->regs = ioremap(wdt->mem.start, size);
-	if (!wdt->regs) {
-		err = -ENODEV;
-		goto out_release;
-	}
-
-	dev_set_drvdata(&op->dev, wdt);
-	spin_lock_init(&wdt->io_lock);
-
-	wdt->miscdev = (struct miscdevice) {
-		.minor	= WATCHDOG_MINOR,
-		.name	= "watchdog",
-		.fops	= &mpc5200_wdt_fops,
-		.parent = &op->dev,
-	};
-	wdt_global = wdt;
-	err = misc_register(&wdt->miscdev);
-	if (!err)
-		return 0;
-
-	iounmap(wdt->regs);
-out_release:
-	release_mem_region(wdt->mem.start, size);
-out_free:
-	kfree(wdt);
-	return err;
-}
-
-static int mpc5200_wdt_remove(struct of_device *op)
-{
-	struct mpc5200_wdt *wdt = dev_get_drvdata(&op->dev);
-
-	mpc5200_wdt_stop(wdt);
-	misc_deregister(&wdt->miscdev);
-	iounmap(wdt->regs);
-	release_mem_region(wdt->mem.start, wdt->mem.end - wdt->mem.start + 1);
-	kfree(wdt);
-
-	return 0;
-}
-static int mpc5200_wdt_suspend(struct of_device *op, pm_message_t state)
-{
-	struct mpc5200_wdt *wdt = dev_get_drvdata(&op->dev);
-	mpc5200_wdt_stop(wdt);
-	return 0;
-}
-static int mpc5200_wdt_resume(struct of_device *op)
-{
-	struct mpc5200_wdt *wdt = dev_get_drvdata(&op->dev);
-	if (wdt->count)
-		mpc5200_wdt_start(wdt);
-	return 0;
-}
-static int mpc5200_wdt_shutdown(struct of_device *op)
-{
-	struct mpc5200_wdt *wdt = dev_get_drvdata(&op->dev);
-	mpc5200_wdt_stop(wdt);
-	return 0;
-}
-
-static struct of_device_id mpc5200_wdt_match[] = {
-	{ .compatible = "mpc5200-gpt", },
-	{ .compatible = "fsl,mpc5200-gpt", },
-	{},
-};
-static struct of_platform_driver mpc5200_wdt_driver = {
-	.owner		= THIS_MODULE,
-	.name		= "mpc5200-gpt-wdt",
-	.match_table	= mpc5200_wdt_match,
-	.probe		= mpc5200_wdt_probe,
-	.remove		= mpc5200_wdt_remove,
-	.suspend	= mpc5200_wdt_suspend,
-	.resume		= mpc5200_wdt_resume,
-	.shutdown	= mpc5200_wdt_shutdown,
-};
-
-
-static int __init mpc5200_wdt_init(void)
-{
-	return of_register_platform_driver(&mpc5200_wdt_driver);
-}
-
-static void __exit mpc5200_wdt_exit(void)
-{
-	of_unregister_platform_driver(&mpc5200_wdt_driver);
-}
-
-module_init(mpc5200_wdt_init);
-module_exit(mpc5200_wdt_exit);
-
-MODULE_AUTHOR("Domen Puncer <domen.puncer@telargo.com>");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 7ba79a5..123257b 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <asm/prom.h>
@@ -25,26 +26,27 @@
 /*
  * Supply data on a read from /proc/device-tree/node/property.
  */
-static int property_read_proc(char *page, char **start, off_t off,
-			      int count, int *eof, void *data)
+static int property_proc_show(struct seq_file *m, void *v)
 {
-	struct property *pp = data;
-	int n;
+	struct property *pp = m->private;
 
-	if (off >= pp->length) {
-		*eof = 1;
-		return 0;
-	}
-	n = pp->length - off;
-	if (n > count)
-		n = count;
-	else
-		*eof = 1;
-	memcpy(page, (char *)pp->value + off, n);
-	*start = page;
-	return n;
+	seq_write(m, pp->value, pp->length);
+	return 0;
 }
 
+static int property_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, property_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations property_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= property_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /*
  * For a node with a name like "gc@10", we make symlinks called "gc"
  * and "@10" to it.
@@ -63,10 +65,9 @@
 	 * Unfortunately proc_register puts each new entry
 	 * at the beginning of the list.  So we rearrange them.
 	 */
-	ent = create_proc_read_entry(name,
-				     strncmp(name, "security-", 9)
-				     ? S_IRUGO : S_IRUSR, de,
-				     property_read_proc, pp);
+	ent = proc_create_data(name,
+			       strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR,
+			       de, &property_proc_fops, pp);
 	if (ent == NULL)
 		return NULL;
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 4753619..e287863 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -43,6 +43,8 @@
 
 #ifdef CONFIG_HOTPLUG_CPU
 extern void unregister_cpu(struct cpu *cpu);
+extern ssize_t arch_cpu_probe(const char *, size_t);
+extern ssize_t arch_cpu_release(const char *, size_t);
 #endif
 struct notifier_block;
 
@@ -115,6 +117,19 @@
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
 int cpu_down(unsigned int cpu);
 
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+extern void cpu_hotplug_driver_lock(void);
+extern void cpu_hotplug_driver_unlock(void);
+#else
+static inline void cpu_hotplug_driver_lock(void)
+{
+}
+
+static inline void cpu_hotplug_driver_unlock(void)
+{
+}
+#endif
+
 #else		/* CONFIG_HOTPLUG_CPU */
 
 #define get_online_cpus()	do { } while (0)
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 43fc95d..28e33fe 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -74,7 +74,12 @@
 struct fsl_spi_platform_data {
 	u32 	initial_spmode;	/* initial SPMODE value */
 	s16	bus_num;
-	bool	qe_mode;
+	unsigned int flags;
+#define SPI_QE_CPU_MODE		(1 << 0) /* QE CPU ("PIO") mode */
+#define SPI_CPM_MODE		(1 << 1) /* CPM/QE ("DMA") mode */
+#define SPI_CPM1		(1 << 2) /* SPI unit is in CPM1 block */
+#define SPI_CPM2		(1 << 3) /* SPI unit is in CPM2 block */
+#define SPI_QE			(1 << 4) /* SPI unit is in QE block */
 	/* board specific information */
 	u16	max_chipselect;
 	void	(*cs_control)(struct spi_device *spi, bool on);
@@ -90,6 +95,10 @@
  * lead to a deep sleep (i.e. power removed from the core,
  * instead of just the clock).
  */
+#if defined(CONFIG_PPC_83xx) && defined(CONFIG_SUSPEND)
 int fsl_deep_sleep(void);
+#else
+static inline int fsl_deep_sleep(void) { return 0; }
+#endif
 
 #endif /* _FSL_DEVICE_H_ */
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2d241da..a24de0b 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -496,6 +496,7 @@
 #define KVM_CAP_VCPU_EVENTS 41
 #endif
 #define KVM_CAP_S390_PSW 42
+#define KVM_CAP_PPC_SEGSTATE 43
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index eae1f86..cca8a04 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2295,6 +2295,20 @@
 #define PCI_DEVICE_ID_MPC8536		0x0051
 #define PCI_DEVICE_ID_P2020E		0x0070
 #define PCI_DEVICE_ID_P2020		0x0071
+#define PCI_DEVICE_ID_P2010E		0x0078
+#define PCI_DEVICE_ID_P2010		0x0079
+#define PCI_DEVICE_ID_P1020E		0x0100
+#define PCI_DEVICE_ID_P1020		0x0101
+#define PCI_DEVICE_ID_P1011E		0x0108
+#define PCI_DEVICE_ID_P1011		0x0109
+#define PCI_DEVICE_ID_P1022E		0x0110
+#define PCI_DEVICE_ID_P1022		0x0111
+#define PCI_DEVICE_ID_P1013E		0x0118
+#define PCI_DEVICE_ID_P1013		0x0119
+#define PCI_DEVICE_ID_P4080E		0x0400
+#define PCI_DEVICE_ID_P4080		0x0401
+#define PCI_DEVICE_ID_P4040E		0x0408
+#define PCI_DEVICE_ID_P4040		0x0409
 #define PCI_DEVICE_ID_MPC8641		0x7010
 #define PCI_DEVICE_ID_MPC8641D		0x7011
 #define PCI_DEVICE_ID_MPC8610		0x7018
diff --git a/include/linux/spi/mpc52xx_spi.h b/include/linux/spi/mpc52xx_spi.h
new file mode 100644
index 0000000..d1004cf
--- /dev/null
+++ b/include/linux/spi/mpc52xx_spi.h
@@ -0,0 +1,10 @@
+
+#ifndef INCLUDE_MPC5200_SPI_H
+#define INCLUDE_MPC5200_SPI_H
+
+extern void mpc52xx_spi_set_premessage_hook(struct spi_master *master,
+					    void (*hook)(struct spi_message *m,
+							 void *context),
+					    void *hook_context);
+
+#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f92ba13..e1f2bf8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -49,6 +49,7 @@
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm-generic/bitops/le.h>
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 #include "coalesced_mmio.h"
@@ -1071,8 +1072,8 @@
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 
 		/* avoid RMW */
-		if (!test_bit(rel_gfn, memslot->dirty_bitmap))
-			set_bit(rel_gfn, memslot->dirty_bitmap);
+		if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap))
+			generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
 	}
 }