Merge tag 'msi-3.12-2' into for-3.12/soc

pci msi changes for v3.12 (round 2)

 - fix build breakage for s390 allyesconfig due to !HAVE_GENERIC_HARDIRQS
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index ef3a8da..e963db2 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -2,18 +2,24 @@
 	bool "NVIDIA Tegra" if ARCH_MULTI_V7
 	select ARCH_HAS_CPUFREQ
 	select ARCH_REQUIRE_GPIOLIB
+	select ARM_GIC
 	select CLKDEV_LOOKUP
 	select CLKSRC_MMIO
 	select CLKSRC_OF
 	select COMMON_CLK
+	select CPU_V7
 	select GENERIC_CLOCKEVENTS
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if LOCAL_TIMERS
 	select HAVE_CLK
 	select HAVE_SMP
 	select MIGHT_HAVE_CACHE_L2X0
+	select PINCTRL
 	select SOC_BUS
 	select SPARSE_IRQ
+	select USB_ARCH_HAS_EHCI if USB_SUPPORT
+	select USB_ULPI if USB_PHY
+	select USB_ULPI_VIEWPORT if USB_PHY
 	select USE_OF
 	help
 	  This enables support for NVIDIA Tegra based systems.
@@ -27,15 +33,9 @@
 	select ARM_ERRATA_720789
 	select ARM_ERRATA_754327 if SMP
 	select ARM_ERRATA_764369 if SMP
-	select ARM_GIC
-	select CPU_V7
-	select PINCTRL
 	select PINCTRL_TEGRA20
 	select PL310_ERRATA_727915 if CACHE_L2X0
 	select PL310_ERRATA_769419 if CACHE_L2X0
-	select USB_ARCH_HAS_EHCI if USB_SUPPORT
-	select USB_ULPI if USB_PHY
-	select USB_ULPI_VIEWPORT if USB_PHY
 	help
 	  Support for NVIDIA Tegra AP20 and T20 processors, based on the
 	  ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
@@ -44,14 +44,8 @@
 	bool "Enable support for Tegra30 family"
 	select ARM_ERRATA_754322
 	select ARM_ERRATA_764369 if SMP
-	select ARM_GIC
-	select CPU_V7
-	select PINCTRL
 	select PINCTRL_TEGRA30
 	select PL310_ERRATA_769419 if CACHE_L2X0
-	select USB_ARCH_HAS_EHCI if USB_SUPPORT
-	select USB_ULPI if USB_PHY
-	select USB_ULPI_VIEWPORT if USB_PHY
 	help
 	  Support for NVIDIA Tegra T30 processor family, based on the
 	  ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
@@ -59,10 +53,8 @@
 config ARCH_TEGRA_114_SOC
 	bool "Enable support for Tegra114 family"
 	select HAVE_ARM_ARCH_TIMER
-	select ARM_GIC
+	select ARM_ERRATA_798181
 	select ARM_L1_CACHE_SHIFT_6
-	select CPU_V7
-	select PINCTRL
 	select PINCTRL_TEGRA114
 	help
 	  Support for NVIDIA Tegra T114 processor family, based on the
diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile
index 98b184e..f4e7063 100644
--- a/arch/arm/mach-tegra/Makefile
+++ b/arch/arm/mach-tegra/Makefile
@@ -17,11 +17,13 @@
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= tegra20_speedo.o
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= tegra2_emc.o
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= sleep-tegra20.o
+obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= pm-tegra20.o
 ifeq ($(CONFIG_CPU_IDLE),y)
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= cpuidle-tegra20.o
 endif
 obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= tegra30_speedo.o
 obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= sleep-tegra30.o
+obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= pm-tegra30.o
 ifeq ($(CONFIG_CPU_IDLE),y)
 obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= cpuidle-tegra30.o
 endif
@@ -31,6 +33,7 @@
 
 obj-$(CONFIG_ARCH_TEGRA_114_SOC)	+= tegra114_speedo.o
 obj-$(CONFIG_ARCH_TEGRA_114_SOC)	+= sleep-tegra30.o
+obj-$(CONFIG_ARCH_TEGRA_114_SOC)	+= pm-tegra30.o
 ifeq ($(CONFIG_CPU_IDLE),y)
 obj-$(CONFIG_ARCH_TEGRA_114_SOC)	+= cpuidle-tegra114.o
 endif
diff --git a/arch/arm/mach-tegra/common.h b/arch/arm/mach-tegra/common.h
index 32f8eb3..5900cc4 100644
--- a/arch/arm/mach-tegra/common.h
+++ b/arch/arm/mach-tegra/common.h
@@ -2,4 +2,3 @@
 
 extern int tegra_cpu_kill(unsigned int cpu);
 extern void tegra_cpu_die(unsigned int cpu);
-extern int tegra_cpu_disable(unsigned int cpu);
diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c
index 1d1c602..e0b8730 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra114.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra114.c
@@ -17,15 +17,64 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/clockchips.h>
 
 #include <asm/cpuidle.h>
+#include <asm/suspend.h>
+#include <asm/smp_plat.h>
+
+#include "pm.h"
+#include "sleep.h"
+
+#ifdef CONFIG_PM_SLEEP
+#define TEGRA114_MAX_STATES 2
+#else
+#define TEGRA114_MAX_STATES 1
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra114_idle_power_down(struct cpuidle_device *dev,
+				    struct cpuidle_driver *drv,
+				    int index)
+{
+	local_fiq_disable();
+
+	tegra_set_cpu_in_lp2();
+	cpu_pm_enter();
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+
+	cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+
+	cpu_pm_exit();
+	tegra_clear_cpu_in_lp2();
+
+	local_fiq_enable();
+
+	return index;
+}
+#endif
 
 static struct cpuidle_driver tegra_idle_driver = {
 	.name = "tegra_idle",
 	.owner = THIS_MODULE,
-	.state_count = 1,
+	.state_count = TEGRA114_MAX_STATES,
 	.states = {
 		[0] = ARM_CPUIDLE_WFI_STATE_PWR(600),
+#ifdef CONFIG_PM_SLEEP
+		[1] = {
+			.enter			= tegra114_idle_power_down,
+			.exit_latency		= 500,
+			.target_residency	= 1000,
+			.power_usage		= 0,
+			.flags			= CPUIDLE_FLAG_TIME_VALID,
+			.name			= "powered-down",
+			.desc			= "CPU power gated",
+		},
+#endif
 	},
 };
 
diff --git a/arch/arm/mach-tegra/flowctrl.c b/arch/arm/mach-tegra/flowctrl.c
index b477ef3..5348543 100644
--- a/arch/arm/mach-tegra/flowctrl.c
+++ b/arch/arm/mach-tegra/flowctrl.c
@@ -86,6 +86,7 @@
 		reg |= TEGRA20_FLOW_CTRL_CSR_WFE_CPU0 << cpuid;
 		break;
 	case TEGRA30:
+	case TEGRA114:
 		/* clear wfe bitmap */
 		reg &= ~TEGRA30_FLOW_CTRL_CSR_WFE_BITMAP;
 		/* clear wfi bitmap */
@@ -123,6 +124,7 @@
 		reg &= ~TEGRA20_FLOW_CTRL_CSR_WFI_BITMAP;
 		break;
 	case TEGRA30:
+	case TEGRA114:
 		/* clear wfe bitmap */
 		reg &= ~TEGRA30_FLOW_CTRL_CSR_WFE_BITMAP;
 		/* clear wfi bitmap */
diff --git a/arch/arm/mach-tegra/flowctrl.h b/arch/arm/mach-tegra/flowctrl.h
index 7a29bae..c89aac6 100644
--- a/arch/arm/mach-tegra/flowctrl.h
+++ b/arch/arm/mach-tegra/flowctrl.h
@@ -28,9 +28,18 @@
 #define FLOW_CTRL_SCLK_RESUME		(1 << 27)
 #define FLOW_CTRL_HALT_CPU_IRQ		(1 << 10)
 #define	FLOW_CTRL_HALT_CPU_FIQ		(1 << 8)
+#define FLOW_CTRL_HALT_LIC_IRQ		(1 << 11)
+#define FLOW_CTRL_HALT_LIC_FIQ		(1 << 10)
+#define FLOW_CTRL_HALT_GIC_IRQ		(1 << 9)
+#define FLOW_CTRL_HALT_GIC_FIQ		(1 << 8)
 #define FLOW_CTRL_CPU0_CSR		0x8
 #define	FLOW_CTRL_CSR_INTR_FLAG		(1 << 15)
 #define FLOW_CTRL_CSR_EVENT_FLAG	(1 << 14)
+#define FLOW_CTRL_CSR_ENABLE_EXT_CRAIL	(1 << 13)
+#define FLOW_CTRL_CSR_ENABLE_EXT_NCPU	(1 << 12)
+#define FLOW_CTRL_CSR_ENABLE_EXT_MASK ( \
+		FLOW_CTRL_CSR_ENABLE_EXT_NCPU | \
+		FLOW_CTRL_CSR_ENABLE_EXT_CRAIL)
 #define FLOW_CTRL_CSR_ENABLE		(1 << 0)
 #define FLOW_CTRL_HALT_CPU1_EVENTS	0x14
 #define FLOW_CTRL_CPU1_CSR		0x18
diff --git a/arch/arm/mach-tegra/headsmp.S b/arch/arm/mach-tegra/headsmp.S
index 045c16f..2072e73 100644
--- a/arch/arm/mach-tegra/headsmp.S
+++ b/arch/arm/mach-tegra/headsmp.S
@@ -6,6 +6,7 @@
         .section ".text.head", "ax"
 
 ENTRY(tegra_secondary_startup)
-        bl      v7_invalidate_l1
+        check_cpu_part_num 0xc09, r8, r9
+        bleq    v7_invalidate_l1
         b       secondary_startup
 ENDPROC(tegra_secondary_startup)
diff --git a/arch/arm/mach-tegra/hotplug.c b/arch/arm/mach-tegra/hotplug.c
index a52c10e..04de2e8 100644
--- a/arch/arm/mach-tegra/hotplug.c
+++ b/arch/arm/mach-tegra/hotplug.c
@@ -37,7 +37,7 @@
 void __ref tegra_cpu_die(unsigned int cpu)
 {
 	/* Clean L1 data cache */
-	tegra_disable_clean_inv_dcache();
+	tegra_disable_clean_inv_dcache(TEGRA_FLUSH_CACHE_LOUIS);
 
 	/* Shut down the current CPU. */
 	tegra_hotplug_shutdown();
@@ -46,17 +46,6 @@
 	BUG();
 }
 
-int tegra_cpu_disable(unsigned int cpu)
-{
-	switch (tegra_chip_id) {
-	case TEGRA20:
-	case TEGRA30:
-		return cpu == 0 ? -EPERM : 0;
-	default:
-		return 0;
-	}
-}
-
 void __init tegra_hotplug_init(void)
 {
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
diff --git a/arch/arm/mach-tegra/iomap.h b/arch/arm/mach-tegra/iomap.h
index 399fbca..aba3629 100644
--- a/arch/arm/mach-tegra/iomap.h
+++ b/arch/arm/mach-tegra/iomap.h
@@ -24,6 +24,8 @@
 #define TEGRA_IRAM_BASE			0x40000000
 #define TEGRA_IRAM_SIZE			SZ_256K
 
+#define TEGRA_IRAM_CODE_AREA		(TEGRA_IRAM_BASE + SZ_4K)
+
 #define TEGRA_HOST1X_BASE		0x50000000
 #define TEGRA_HOST1X_SIZE		0x24000
 
@@ -237,6 +239,12 @@
 #define TEGRA_KFUSE_BASE		0x7000FC00
 #define TEGRA_KFUSE_SIZE		SZ_1K
 
+#define TEGRA_EMC0_BASE			0x7001A000
+#define TEGRA_EMC0_SIZE			SZ_2K
+
+#define TEGRA_EMC1_BASE			0x7001A800
+#define TEGRA_EMC1_SIZE			SZ_2K
+
 #define TEGRA_CSITE_BASE		0x70040000
 #define TEGRA_CSITE_SIZE		SZ_256K
 
diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c
index 0de4eed..1a74d56 100644
--- a/arch/arm/mach-tegra/irq.c
+++ b/arch/arm/mach-tegra/irq.c
@@ -18,10 +18,12 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/cpu_pm.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/irqchip/arm-gic.h>
 #include <linux/syscore_ops.h>
 
@@ -65,6 +67,7 @@
 static u32 cpu_iep[TEGRA_MAX_NUM_ICTLRS];
 
 static u32 ictlr_wake_mask[TEGRA_MAX_NUM_ICTLRS];
+static void __iomem *tegra_gic_cpu_base;
 #endif
 
 bool tegra_pending_sgi(void)
@@ -213,8 +216,43 @@
 
 	return 0;
 }
+
+static int tegra_gic_notifier(struct notifier_block *self,
+			      unsigned long cmd, void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		writel_relaxed(0x1E0, tegra_gic_cpu_base + GIC_CPU_CTRL);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block tegra_gic_notifier_block = {
+	.notifier_call = tegra_gic_notifier,
+};
+
+static const struct of_device_id tegra114_dt_gic_match[] __initconst = {
+	{ .compatible = "arm,cortex-a15-gic" },
+	{ }
+};
+
+static void tegra114_gic_cpu_pm_registration(void)
+{
+	struct device_node *dn;
+
+	dn = of_find_matching_node(NULL, tegra114_dt_gic_match);
+	if (!dn)
+		return;
+
+	tegra_gic_cpu_base = of_iomap(dn, 1);
+
+	cpu_pm_register_notifier(&tegra_gic_notifier_block);
+}
 #else
 #define tegra_set_wake NULL
+static void tegra114_gic_cpu_pm_registration(void) { }
 #endif
 
 void __init tegra_init_irq(void)
@@ -252,4 +290,6 @@
 	if (!of_have_populated_dt())
 		gic_init(0, 29, distbase,
 			IO_ADDRESS(TEGRA_ARM_PERIF_BASE + 0x100));
+
+	tegra114_gic_cpu_pm_registration();
 }
diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index 24db4ac..554aedc 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -196,6 +196,5 @@
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_kill		= tegra_cpu_kill,
 	.cpu_die		= tegra_cpu_die,
-	.cpu_disable		= tegra_cpu_disable,
 #endif
 };
diff --git a/arch/arm/mach-tegra/pm-tegra20.c b/arch/arm/mach-tegra/pm-tegra20.c
new file mode 100644
index 0000000..d65e1d7
--- /dev/null
+++ b/arch/arm/mach-tegra/pm-tegra20.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2013, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+
+#include "pm.h"
+
+#ifdef CONFIG_PM_SLEEP
+extern u32 tegra20_iram_start, tegra20_iram_end;
+extern void tegra20_sleep_core_finish(unsigned long);
+
+void tegra20_lp1_iram_hook(void)
+{
+	tegra_lp1_iram.start_addr = &tegra20_iram_start;
+	tegra_lp1_iram.end_addr = &tegra20_iram_end;
+}
+
+void tegra20_sleep_core_init(void)
+{
+	tegra_sleep_core_finish = tegra20_sleep_core_finish;
+}
+#endif
diff --git a/arch/arm/mach-tegra/pm-tegra30.c b/arch/arm/mach-tegra/pm-tegra30.c
new file mode 100644
index 0000000..8fa326d
--- /dev/null
+++ b/arch/arm/mach-tegra/pm-tegra30.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2013, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+
+#include "pm.h"
+
+#ifdef CONFIG_PM_SLEEP
+extern u32 tegra30_iram_start, tegra30_iram_end;
+extern void tegra30_sleep_core_finish(unsigned long);
+
+void tegra30_lp1_iram_hook(void)
+{
+	tegra_lp1_iram.start_addr = &tegra30_iram_start;
+	tegra_lp1_iram.end_addr = &tegra30_iram_end;
+}
+
+void tegra30_sleep_core_init(void)
+{
+	tegra_sleep_core_finish = tegra30_sleep_core_finish;
+}
+#endif
diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
index 94e69be..eaf6bd3 100644
--- a/arch/arm/mach-tegra/pm.c
+++ b/arch/arm/mach-tegra/pm.c
@@ -37,12 +37,18 @@
 #include "reset.h"
 #include "flowctrl.h"
 #include "fuse.h"
+#include "pm.h"
 #include "pmc.h"
 #include "sleep.h"
 
 #ifdef CONFIG_PM_SLEEP
 static DEFINE_SPINLOCK(tegra_lp2_lock);
+static u32 iram_save_size;
+static void *iram_save_addr;
+struct tegra_lp1_iram tegra_lp1_iram;
 void (*tegra_tear_down_cpu)(void);
+void (*tegra_sleep_core_finish)(unsigned long v2p);
+static int (*tegra_sleep_func)(unsigned long v2p);
 
 static void tegra_tear_down_cpu_init(void)
 {
@@ -52,7 +58,9 @@
 			tegra_tear_down_cpu = tegra20_tear_down_cpu;
 		break;
 	case TEGRA30:
-		if (IS_ENABLED(CONFIG_ARCH_TEGRA_3x_SOC))
+	case TEGRA114:
+		if (IS_ENABLED(CONFIG_ARCH_TEGRA_3x_SOC) ||
+		    IS_ENABLED(CONFIG_ARCH_TEGRA_114_SOC))
 			tegra_tear_down_cpu = tegra30_tear_down_cpu;
 		break;
 	}
@@ -171,19 +179,109 @@
 enum tegra_suspend_mode tegra_pm_validate_suspend_mode(
 				enum tegra_suspend_mode mode)
 {
-	/* Tegra114 didn't support any suspending mode yet. */
-	if (tegra_chip_id == TEGRA114)
-		return TEGRA_SUSPEND_NONE;
-
 	/*
-	 * The Tegra devices only support suspending to LP2 currently.
+	 * The Tegra devices support suspending to LP1 or lower currently.
 	 */
-	if (mode > TEGRA_SUSPEND_LP2)
-		return TEGRA_SUSPEND_LP2;
+	if (mode > TEGRA_SUSPEND_LP1)
+		return TEGRA_SUSPEND_LP1;
 
 	return mode;
 }
 
+static int tegra_sleep_core(unsigned long v2p)
+{
+	setup_mm_for_reboot();
+	tegra_sleep_core_finish(v2p);
+
+	/* should never here */
+	BUG();
+
+	return 0;
+}
+
+/*
+ * tegra_lp1_iram_hook
+ *
+ * Hooking the address of LP1 reset vector and SDRAM self-refresh code in
+ * SDRAM. These codes not be copied to IRAM in this fuction. We need to
+ * copy these code to IRAM before LP0/LP1 suspend and restore the content
+ * of IRAM after resume.
+ */
+static bool tegra_lp1_iram_hook(void)
+{
+	switch (tegra_chip_id) {
+	case TEGRA20:
+		if (IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC))
+			tegra20_lp1_iram_hook();
+		break;
+	case TEGRA30:
+	case TEGRA114:
+		if (IS_ENABLED(CONFIG_ARCH_TEGRA_3x_SOC) ||
+		    IS_ENABLED(CONFIG_ARCH_TEGRA_114_SOC))
+			tegra30_lp1_iram_hook();
+		break;
+	default:
+		break;
+	}
+
+	if (!tegra_lp1_iram.start_addr || !tegra_lp1_iram.end_addr)
+		return false;
+
+	iram_save_size = tegra_lp1_iram.end_addr - tegra_lp1_iram.start_addr;
+	iram_save_addr = kmalloc(iram_save_size, GFP_KERNEL);
+	if (!iram_save_addr)
+		return false;
+
+	return true;
+}
+
+static bool tegra_sleep_core_init(void)
+{
+	switch (tegra_chip_id) {
+	case TEGRA20:
+		if (IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC))
+			tegra20_sleep_core_init();
+		break;
+	case TEGRA30:
+	case TEGRA114:
+		if (IS_ENABLED(CONFIG_ARCH_TEGRA_3x_SOC) ||
+		    IS_ENABLED(CONFIG_ARCH_TEGRA_114_SOC))
+			tegra30_sleep_core_init();
+		break;
+	default:
+		break;
+	}
+
+	if (!tegra_sleep_core_finish)
+		return false;
+
+	return true;
+}
+
+static void tegra_suspend_enter_lp1(void)
+{
+	tegra_pmc_suspend();
+
+	/* copy the reset vector & SDRAM shutdown code into IRAM */
+	memcpy(iram_save_addr, IO_ADDRESS(TEGRA_IRAM_CODE_AREA),
+		iram_save_size);
+	memcpy(IO_ADDRESS(TEGRA_IRAM_CODE_AREA), tegra_lp1_iram.start_addr,
+		iram_save_size);
+
+	*((u32 *)tegra_cpu_lp1_mask) = 1;
+}
+
+static void tegra_suspend_exit_lp1(void)
+{
+	tegra_pmc_resume();
+
+	/* restore IRAM */
+	memcpy(IO_ADDRESS(TEGRA_IRAM_CODE_AREA), iram_save_addr,
+		iram_save_size);
+
+	*(u32 *)tegra_cpu_lp1_mask = 0;
+}
+
 static const char *lp_state[TEGRA_MAX_SUSPEND_MODE] = {
 	[TEGRA_SUSPEND_NONE] = "none",
 	[TEGRA_SUSPEND_LP2] = "LP2",
@@ -207,6 +305,9 @@
 
 	suspend_cpu_complex();
 	switch (mode) {
+	case TEGRA_SUSPEND_LP1:
+		tegra_suspend_enter_lp1();
+		break;
 	case TEGRA_SUSPEND_LP2:
 		tegra_set_cpu_in_lp2();
 		break;
@@ -214,9 +315,12 @@
 		break;
 	}
 
-	cpu_suspend(PHYS_OFFSET - PAGE_OFFSET, &tegra_sleep_cpu);
+	cpu_suspend(PHYS_OFFSET - PAGE_OFFSET, tegra_sleep_func);
 
 	switch (mode) {
+	case TEGRA_SUSPEND_LP1:
+		tegra_suspend_exit_lp1();
+		break;
 	case TEGRA_SUSPEND_LP2:
 		tegra_clear_cpu_in_lp2();
 		break;
@@ -237,12 +341,36 @@
 
 void __init tegra_init_suspend(void)
 {
-	if (tegra_pmc_get_suspend_mode() == TEGRA_SUSPEND_NONE)
+	enum tegra_suspend_mode mode = tegra_pmc_get_suspend_mode();
+
+	if (mode == TEGRA_SUSPEND_NONE)
 		return;
 
 	tegra_tear_down_cpu_init();
 	tegra_pmc_suspend_init();
 
+	if (mode >= TEGRA_SUSPEND_LP1) {
+		if (!tegra_lp1_iram_hook() || !tegra_sleep_core_init()) {
+			pr_err("%s: unable to allocate memory for SDRAM"
+			       "self-refresh -- LP0/LP1 unavailable\n",
+			       __func__);
+			tegra_pmc_set_suspend_mode(TEGRA_SUSPEND_LP2);
+			mode = TEGRA_SUSPEND_LP2;
+		}
+	}
+
+	/* set up sleep function for cpu_suspend */
+	switch (mode) {
+	case TEGRA_SUSPEND_LP1:
+		tegra_sleep_func = tegra_sleep_core;
+		break;
+	case TEGRA_SUSPEND_LP2:
+		tegra_sleep_func = tegra_sleep_cpu;
+		break;
+	default:
+		break;
+	}
+
 	suspend_set_ops(&tegra_suspend_ops);
 }
 #endif
diff --git a/arch/arm/mach-tegra/pm.h b/arch/arm/mach-tegra/pm.h
index 94c4b9d..fe204e5 100644
--- a/arch/arm/mach-tegra/pm.h
+++ b/arch/arm/mach-tegra/pm.h
@@ -23,6 +23,18 @@
 
 #include "pmc.h"
 
+struct tegra_lp1_iram {
+	void	*start_addr;
+	void	*end_addr;
+};
+extern struct tegra_lp1_iram tegra_lp1_iram;
+extern void (*tegra_sleep_core_finish)(unsigned long v2p);
+
+void tegra20_lp1_iram_hook(void);
+void tegra20_sleep_core_init(void);
+void tegra30_lp1_iram_hook(void);
+void tegra30_sleep_core_init(void);
+
 extern unsigned long l2x0_saved_regs_addr;
 
 void save_cpu_arch_register(void);
diff --git a/arch/arm/mach-tegra/pmc.c b/arch/arm/mach-tegra/pmc.c
index eb3fa4a..8acb881 100644
--- a/arch/arm/mach-tegra/pmc.c
+++ b/arch/arm/mach-tegra/pmc.c
@@ -21,11 +21,14 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 
+#include "flowctrl.h"
 #include "fuse.h"
 #include "pm.h"
 #include "pmc.h"
 #include "sleep.h"
 
+#define TEGRA_POWER_SYSCLK_POLARITY	(1 << 10)  /* sys clk polarity */
+#define TEGRA_POWER_SYSCLK_OE		(1 << 11)  /* system clock enable */
 #define TEGRA_POWER_EFFECT_LP0		(1 << 14)  /* LP0 when CPU pwr gated */
 #define TEGRA_POWER_CPU_PWRREQ_POLARITY	(1 << 15)  /* CPU pwr req polarity */
 #define TEGRA_POWER_CPU_PWRREQ_OE	(1 << 16)  /* CPU pwr req enable */
@@ -193,16 +196,50 @@
 	return pmc_pm_data.suspend_mode;
 }
 
+void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode)
+{
+	if (mode < TEGRA_SUSPEND_NONE || mode >= TEGRA_MAX_SUSPEND_MODE)
+		return;
+
+	pmc_pm_data.suspend_mode = mode;
+}
+
+void tegra_pmc_suspend(void)
+{
+	tegra_pmc_writel(virt_to_phys(tegra_resume), PMC_SCRATCH41);
+}
+
+void tegra_pmc_resume(void)
+{
+	tegra_pmc_writel(0x0, PMC_SCRATCH41);
+}
+
 void tegra_pmc_pm_set(enum tegra_suspend_mode mode)
 {
-	u32 reg;
+	u32 reg, csr_reg;
 	unsigned long rate = 0;
 
 	reg = tegra_pmc_readl(PMC_CTRL);
 	reg |= TEGRA_POWER_CPU_PWRREQ_OE;
 	reg &= ~TEGRA_POWER_EFFECT_LP0;
 
+	switch (tegra_chip_id) {
+	case TEGRA20:
+	case TEGRA30:
+		break;
+	default:
+		/* Turn off CRAIL */
+		csr_reg = flowctrl_read_cpu_csr(0);
+		csr_reg &= ~FLOW_CTRL_CSR_ENABLE_EXT_MASK;
+		csr_reg |= FLOW_CTRL_CSR_ENABLE_EXT_CRAIL;
+		flowctrl_write_cpu_csr(0, csr_reg);
+		break;
+	}
+
 	switch (mode) {
+	case TEGRA_SUSPEND_LP1:
+		rate = 32768;
+		break;
 	case TEGRA_SUSPEND_LP2:
 		rate = clk_get_rate(tegra_pclk);
 		break;
@@ -224,6 +261,20 @@
 	reg = tegra_pmc_readl(PMC_CTRL);
 	reg |= TEGRA_POWER_CPU_PWRREQ_OE;
 	tegra_pmc_writel(reg, PMC_CTRL);
+
+	reg = tegra_pmc_readl(PMC_CTRL);
+
+	if (!pmc_pm_data.sysclkreq_high)
+		reg |= TEGRA_POWER_SYSCLK_POLARITY;
+	else
+		reg &= ~TEGRA_POWER_SYSCLK_POLARITY;
+
+	/* configure the output polarity while the request is tristated */
+	tegra_pmc_writel(reg, PMC_CTRL);
+
+	/* now enable the request */
+	reg |= TEGRA_POWER_SYSCLK_OE;
+	tegra_pmc_writel(reg, PMC_CTRL);
 }
 #endif
 
diff --git a/arch/arm/mach-tegra/pmc.h b/arch/arm/mach-tegra/pmc.h
index e1c2df2..549f8c7 100644
--- a/arch/arm/mach-tegra/pmc.h
+++ b/arch/arm/mach-tegra/pmc.h
@@ -28,6 +28,9 @@
 
 #ifdef CONFIG_PM_SLEEP
 enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void);
+void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode);
+void tegra_pmc_suspend(void);
+void tegra_pmc_resume(void);
 void tegra_pmc_pm_set(enum tegra_suspend_mode mode);
 void tegra_pmc_suspend_init(void);
 #endif
diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S
index 39dc9e7..f527b2c 100644
--- a/arch/arm/mach-tegra/reset-handler.S
+++ b/arch/arm/mach-tegra/reset-handler.S
@@ -40,9 +40,12 @@
  *	  re-enabling sdram.
  *
  *	r6: SoC ID
+ *	r8: CPU part number
  */
 ENTRY(tegra_resume)
-	bl	v7_invalidate_l1
+	check_cpu_part_num 0xc09, r8, r9
+	bleq	v7_invalidate_l1
+	blne	tegra_init_l2_for_a15
 
 	cpu_id	r0
 	tegra_get_soc_id TEGRA_APB_MISC_BASE, r6
@@ -70,7 +73,8 @@
 	str	r1, [r2]
 1:
 
-	check_cpu_part_num 0xc09, r8, r9
+	mov32	r9, 0xc09
+	cmp	r8, r9
 	bne	not_ca9
 #ifdef CONFIG_HAVE_ARM_SCU
 	/* enable SCU */
@@ -178,6 +182,19 @@
 1:
 #endif
 
+	/* Waking up from LP1? */
+	ldr	r8, [r12, #RESET_DATA(MASK_LP1)]
+	tst	r8, r11				@ if in_lp1
+	beq	__is_not_lp1
+	cmp	r10, #0
+	bne	__die				@ only CPU0 can be here
+	ldr	lr, [r12, #RESET_DATA(STARTUP_LP1)]
+	cmp	lr, #0
+	bleq	__die				@ no LP1 startup handler
+ THUMB(	add	lr, lr, #1 )			@ switch to Thumb mode
+	bx	lr
+__is_not_lp1:
+
 	/* Waking up from LP2? */
 	ldr	r9, [r12, #RESET_DATA(MASK_LP2)]
 	tst	r9, r11				@ if in_lp2
diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c
index 1ac434e..fd0bbf8 100644
--- a/arch/arm/mach-tegra/reset.c
+++ b/arch/arm/mach-tegra/reset.c
@@ -81,6 +81,8 @@
 #endif
 
 #ifdef CONFIG_PM_SLEEP
+	__tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_LP1] =
+		TEGRA_IRAM_CODE_AREA;
 	__tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_LP2] =
 		virt_to_phys((void *)tegra_resume);
 #endif
diff --git a/arch/arm/mach-tegra/reset.h b/arch/arm/mach-tegra/reset.h
index c90d8e9..76a9343 100644
--- a/arch/arm/mach-tegra/reset.h
+++ b/arch/arm/mach-tegra/reset.h
@@ -39,6 +39,10 @@
 void tegra_secondary_startup(void);
 
 #ifdef CONFIG_PM_SLEEP
+#define tegra_cpu_lp1_mask \
+	(IO_ADDRESS(TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + \
+	((u32)&__tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_LP1] - \
+	 (u32)__tegra_cpu_reset_handler_start)))
 #define tegra_cpu_lp2_mask \
 	(IO_ADDRESS(TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET + \
 	((u32)&__tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_LP2] - \
diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S
index e3f2417..5c3bd11 100644
--- a/arch/arm/mach-tegra/sleep-tegra20.S
+++ b/arch/arm/mach-tegra/sleep-tegra20.S
@@ -23,10 +23,49 @@
 #include <asm/assembler.h>
 #include <asm/proc-fns.h>
 #include <asm/cp15.h>
+#include <asm/cache.h>
 
 #include "sleep.h"
 #include "flowctrl.h"
 
+#define EMC_CFG				0xc
+#define EMC_ADR_CFG			0x10
+#define EMC_REFRESH			0x70
+#define EMC_NOP				0xdc
+#define EMC_SELF_REF			0xe0
+#define EMC_REQ_CTRL			0x2b0
+#define EMC_EMC_STATUS			0x2b4
+
+#define CLK_RESET_CCLK_BURST		0x20
+#define CLK_RESET_CCLK_DIVIDER		0x24
+#define CLK_RESET_SCLK_BURST		0x28
+#define CLK_RESET_SCLK_DIVIDER		0x2c
+#define CLK_RESET_PLLC_BASE		0x80
+#define CLK_RESET_PLLM_BASE		0x90
+#define CLK_RESET_PLLP_BASE		0xa0
+
+#define APB_MISC_XM2CFGCPADCTRL		0x8c8
+#define APB_MISC_XM2CFGDPADCTRL		0x8cc
+#define APB_MISC_XM2CLKCFGPADCTRL	0x8d0
+#define APB_MISC_XM2COMPPADCTRL		0x8d4
+#define APB_MISC_XM2VTTGENPADCTRL	0x8d8
+#define APB_MISC_XM2CFGCPADCTRL2	0x8e4
+#define APB_MISC_XM2CFGDPADCTRL2	0x8e8
+
+.macro pll_enable, rd, r_car_base, pll_base
+	ldr	\rd, [\r_car_base, #\pll_base]
+	tst	\rd, #(1 << 30)
+	orreq	\rd, \rd, #(1 << 30)
+	streq	\rd, [\r_car_base, #\pll_base]
+.endm
+
+.macro emc_device_mask, rd, base
+	ldr	\rd, [\base, #EMC_ADR_CFG]
+	tst	\rd, #(0x3 << 24)
+	moveq	\rd, #(0x1 << 8)		@ just 1 device
+	movne	\rd, #(0x3 << 8)		@ 2 devices
+.endm
+
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP)
 /*
  * tegra20_hotplug_shutdown(void)
@@ -181,6 +220,28 @@
 ENDPROC(tegra20_cpu_is_resettable_soon)
 
 /*
+ * tegra20_sleep_core_finish(unsigned long v2p)
+ *
+ * Enters suspend in LP0 or LP1 by turning off the mmu and jumping to
+ * tegra20_tear_down_core in IRAM
+ */
+ENTRY(tegra20_sleep_core_finish)
+	/* Flush, disable the L1 data cache and exit SMP */
+	bl	tegra_disable_clean_inv_dcache
+
+	mov32	r3, tegra_shut_off_mmu
+	add	r3, r3, r0
+
+	mov32	r0, tegra20_tear_down_core
+	mov32	r1, tegra20_iram_start
+	sub	r0, r0, r1
+	mov32	r1, TEGRA_IRAM_CODE_AREA
+	add	r0, r0, r1
+
+	mov	pc, r3
+ENDPROC(tegra20_sleep_core_finish)
+
+/*
  * tegra20_sleep_cpu_secondary_finish(unsigned long v2p)
  *
  * Enters WFI on secondary CPU by exiting coherency.
@@ -191,6 +252,7 @@
 	mrc	p15, 0, r11, c1, c0, 1  @ save actlr before exiting coherency
 
 	/* Flush and disable the L1 data cache */
+	mov	r0, #TEGRA_FLUSH_CACHE_LOUIS
 	bl	tegra_disable_clean_inv_dcache
 
 	mov32	r0, TEGRA_PMC_VIRT + PMC_SCRATCH41
@@ -250,6 +312,150 @@
 	b	tegra20_enter_sleep
 ENDPROC(tegra20_tear_down_cpu)
 
+/* START OF ROUTINES COPIED TO IRAM */
+	.align L1_CACHE_SHIFT
+	.globl tegra20_iram_start
+tegra20_iram_start:
+
+/*
+ * tegra20_lp1_reset
+ *
+ * reset vector for LP1 restore; copied into IRAM during suspend.
+ * Brings the system back up to a safe staring point (SDRAM out of
+ * self-refresh, PLLC, PLLM and PLLP reenabled, CPU running on PLLP,
+ * system clock running on the same PLL that it suspended at), and
+ * jumps to tegra_resume to restore virtual addressing and PLLX.
+ * The physical address of tegra_resume expected to be stored in
+ * PMC_SCRATCH41.
+ *
+ * NOTE: THIS *MUST* BE RELOCATED TO TEGRA_IRAM_CODE_AREA.
+ */
+ENTRY(tegra20_lp1_reset)
+	/*
+	 * The CPU and system bus are running at 32KHz and executing from
+	 * IRAM when this code is executed; immediately switch to CLKM and
+	 * enable PLLM, PLLP, PLLC.
+	 */
+	mov32	r0, TEGRA_CLK_RESET_BASE
+
+	mov	r1, #(1 << 28)
+	str	r1, [r0, #CLK_RESET_SCLK_BURST]
+	str	r1, [r0, #CLK_RESET_CCLK_BURST]
+	mov	r1, #0
+	str	r1, [r0, #CLK_RESET_CCLK_DIVIDER]
+	str	r1, [r0, #CLK_RESET_SCLK_DIVIDER]
+
+	pll_enable r1, r0, CLK_RESET_PLLM_BASE
+	pll_enable r1, r0, CLK_RESET_PLLP_BASE
+	pll_enable r1, r0, CLK_RESET_PLLC_BASE
+
+	adr	r2, tegra20_sdram_pad_address
+	adr	r4, tegra20_sdram_pad_save
+	mov	r5, #0
+
+	ldr	r6, tegra20_sdram_pad_size
+padload:
+	ldr	r7, [r2, r5]		@ r7 is the addr in the pad_address
+
+	ldr	r1, [r4, r5]
+	str	r1, [r7]		@ restore the value in pad_save
+
+	add	r5, r5, #4
+	cmp	r6, r5
+	bne	padload
+
+padload_done:
+	/* 255uS delay for PLL stabilization */
+	mov32	r7, TEGRA_TMRUS_BASE
+	ldr	r1, [r7]
+	add	r1, r1, #0xff
+	wait_until r1, r7, r9
+
+	adr	r4, tegra20_sclk_save
+	ldr	r4, [r4]
+	str	r4, [r0, #CLK_RESET_SCLK_BURST]
+	mov32	r4, ((1 << 28) | (4))	@ burst policy is PLLP
+	str	r4, [r0, #CLK_RESET_CCLK_BURST]
+
+	mov32	r0, TEGRA_EMC_BASE
+	ldr	r1, [r0, #EMC_CFG]
+	bic	r1, r1, #(1 << 31)	@ disable DRAM_CLK_STOP
+	str	r1, [r0, #EMC_CFG]
+
+	mov	r1, #0
+	str	r1, [r0, #EMC_SELF_REF]	@ take DRAM out of self refresh
+	mov	r1, #1
+	str	r1, [r0, #EMC_NOP]
+	str	r1, [r0, #EMC_NOP]
+	str	r1, [r0, #EMC_REFRESH]
+
+	emc_device_mask r1, r0
+
+exit_selfrefresh_loop:
+	ldr	r2, [r0, #EMC_EMC_STATUS]
+	ands	r2, r2, r1
+	bne	exit_selfrefresh_loop
+
+	mov	r1, #0			@ unstall all transactions
+	str	r1, [r0, #EMC_REQ_CTRL]
+
+	mov32	r0, TEGRA_PMC_BASE
+	ldr	r0, [r0, #PMC_SCRATCH41]
+	mov	pc, r0			@ jump to tegra_resume
+ENDPROC(tegra20_lp1_reset)
+
+/*
+ * tegra20_tear_down_core
+ *
+ * copied into and executed from IRAM
+ * puts memory in self-refresh for LP0 and LP1
+ */
+tegra20_tear_down_core:
+	bl	tegra20_sdram_self_refresh
+	bl	tegra20_switch_cpu_to_clk32k
+	b	tegra20_enter_sleep
+
+/*
+ * tegra20_switch_cpu_to_clk32k
+ *
+ * In LP0 and LP1 all PLLs will be turned off. Switch the CPU and system clock
+ * to the 32KHz clock.
+ */
+tegra20_switch_cpu_to_clk32k:
+	/*
+	 * start by switching to CLKM to safely disable PLLs, then switch to
+	 * CLKS.
+	 */
+	mov	r0, #(1 << 28)
+	str	r0, [r5, #CLK_RESET_SCLK_BURST]
+	str	r0, [r5, #CLK_RESET_CCLK_BURST]
+	mov	r0, #0
+	str	r0, [r5, #CLK_RESET_CCLK_DIVIDER]
+	str	r0, [r5, #CLK_RESET_SCLK_DIVIDER]
+
+	/* 2uS delay delay between changing SCLK and disabling PLLs */
+	mov32	r7, TEGRA_TMRUS_BASE
+	ldr	r1, [r7]
+	add	r1, r1, #2
+	wait_until r1, r7, r9
+
+	/* disable PLLM, PLLP and PLLC */
+	ldr	r0, [r5, #CLK_RESET_PLLM_BASE]
+	bic	r0, r0, #(1 << 30)
+	str	r0, [r5, #CLK_RESET_PLLM_BASE]
+	ldr	r0, [r5, #CLK_RESET_PLLP_BASE]
+	bic	r0, r0, #(1 << 30)
+	str	r0, [r5, #CLK_RESET_PLLP_BASE]
+	ldr	r0, [r5, #CLK_RESET_PLLC_BASE]
+	bic	r0, r0, #(1 << 30)
+	str	r0, [r5, #CLK_RESET_PLLC_BASE]
+
+	/* switch to CLKS */
+	mov	r0, #0	/* brust policy = 32KHz */
+	str	r0, [r5, #CLK_RESET_SCLK_BURST]
+
+	mov	pc, lr
+
 /*
  * tegra20_enter_sleep
  *
@@ -274,4 +480,95 @@
 	isb
 	b	halted
 
+/*
+ * tegra20_sdram_self_refresh
+ *
+ * called with MMU off and caches disabled
+ * puts sdram in self refresh
+ * must be executed from IRAM
+ */
+tegra20_sdram_self_refresh:
+	mov32	r1, TEGRA_EMC_BASE	@ r1 reserved for emc base addr
+
+	mov	r2, #3
+	str	r2, [r1, #EMC_REQ_CTRL]	@ stall incoming DRAM requests
+
+emcidle:
+	ldr	r2, [r1, #EMC_EMC_STATUS]
+	tst	r2, #4
+	beq	emcidle
+
+	mov	r2, #1
+	str	r2, [r1, #EMC_SELF_REF]
+
+	emc_device_mask r2, r1
+
+emcself:
+	ldr	r3, [r1, #EMC_EMC_STATUS]
+	and	r3, r3, r2
+	cmp	r3, r2
+	bne	emcself			@ loop until DDR in self-refresh
+
+	adr	r2, tegra20_sdram_pad_address
+	adr	r3, tegra20_sdram_pad_safe
+	adr	r4, tegra20_sdram_pad_save
+	mov	r5, #0
+
+	ldr	r6, tegra20_sdram_pad_size
+padsave:
+	ldr	r0, [r2, r5]		@ r0 is the addr in the pad_address
+
+	ldr	r1, [r0]
+	str	r1, [r4, r5]		@ save the content of the addr
+
+	ldr	r1, [r3, r5]
+	str	r1, [r0]		@ set the save val to the addr
+
+	add	r5, r5, #4
+	cmp	r6, r5
+	bne	padsave
+padsave_done:
+
+	mov32	r5, TEGRA_CLK_RESET_BASE
+	ldr	r0, [r5, #CLK_RESET_SCLK_BURST]
+	adr	r2, tegra20_sclk_save
+	str	r0, [r2]
+	dsb
+	mov	pc, lr
+
+tegra20_sdram_pad_address:
+	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL
+	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGDPADCTRL
+	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CLKCFGPADCTRL
+	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2COMPPADCTRL
+	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2VTTGENPADCTRL
+	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL2
+	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGDPADCTRL2
+
+tegra20_sdram_pad_size:
+	.word	tegra20_sdram_pad_size - tegra20_sdram_pad_address
+
+tegra20_sdram_pad_safe:
+	.word	0x8
+	.word	0x8
+	.word	0x0
+	.word	0x8
+	.word	0x5500
+	.word	0x08080040
+	.word	0x0
+
+tegra20_sclk_save:
+	.word	0x0
+
+tegra20_sdram_pad_save:
+	.rept (tegra20_sdram_pad_size - tegra20_sdram_pad_address) / 4
+	.long	0
+	.endr
+
+	.ltorg
+/* dummy symbol for end of IRAM */
+	.align L1_CACHE_SHIFT
+	.globl tegra20_iram_end
+tegra20_iram_end:
+	b	.
 #endif
diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S
index ada8821..63fa91b 100644
--- a/arch/arm/mach-tegra/sleep-tegra30.S
+++ b/arch/arm/mach-tegra/sleep-tegra30.S
@@ -18,13 +18,118 @@
 
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
+#include <asm/cache.h>
 
 #include "fuse.h"
 #include "sleep.h"
 #include "flowctrl.h"
 
+#define EMC_CFG				0xc
+#define EMC_ADR_CFG			0x10
+#define EMC_TIMING_CONTROL		0x28
+#define EMC_REFRESH			0x70
+#define EMC_NOP				0xdc
+#define EMC_SELF_REF			0xe0
+#define EMC_MRW				0xe8
+#define EMC_FBIO_CFG5			0x104
+#define EMC_AUTO_CAL_CONFIG		0x2a4
+#define EMC_AUTO_CAL_INTERVAL		0x2a8
+#define EMC_AUTO_CAL_STATUS		0x2ac
+#define EMC_REQ_CTRL			0x2b0
+#define EMC_CFG_DIG_DLL			0x2bc
+#define EMC_EMC_STATUS			0x2b4
+#define EMC_ZCAL_INTERVAL		0x2e0
+#define EMC_ZQ_CAL			0x2ec
+#define EMC_XM2VTTGENPADCTRL		0x310
+#define EMC_XM2VTTGENPADCTRL2		0x314
+
+#define PMC_CTRL			0x0
+#define PMC_CTRL_SIDE_EFFECT_LP0 (1 << 14) /* enter LP0 when CPU pwr gated */
+
+#define PMC_PLLP_WB0_OVERRIDE		0xf8
+#define PMC_IO_DPD_REQ			0x1b8
+#define PMC_IO_DPD_STATUS		0x1bc
+
+#define CLK_RESET_CCLK_BURST		0x20
+#define CLK_RESET_CCLK_DIVIDER		0x24
+#define CLK_RESET_SCLK_BURST		0x28
+#define CLK_RESET_SCLK_DIVIDER		0x2c
+
+#define CLK_RESET_PLLC_BASE		0x80
+#define CLK_RESET_PLLC_MISC		0x8c
+#define CLK_RESET_PLLM_BASE		0x90
+#define CLK_RESET_PLLM_MISC		0x9c
+#define CLK_RESET_PLLP_BASE		0xa0
+#define CLK_RESET_PLLP_MISC		0xac
+#define CLK_RESET_PLLA_BASE		0xb0
+#define CLK_RESET_PLLA_MISC		0xbc
+#define CLK_RESET_PLLX_BASE		0xe0
+#define CLK_RESET_PLLX_MISC		0xe4
+#define CLK_RESET_PLLX_MISC3		0x518
+#define CLK_RESET_PLLX_MISC3_IDDQ	3
+#define CLK_RESET_PLLM_MISC_IDDQ	5
+#define CLK_RESET_PLLC_MISC_IDDQ	26
+
+#define CLK_RESET_CLK_SOURCE_MSELECT	0x3b4
+
+#define MSELECT_CLKM			(0x3 << 30)
+
+#define LOCK_DELAY 50 /* safety delay after lock is detected */
+
 #define TEGRA30_POWER_HOTPLUG_SHUTDOWN	(1 << 27) /* Hotplug shutdown */
 
+.macro emc_device_mask, rd, base
+	ldr	\rd, [\base, #EMC_ADR_CFG]
+	tst	\rd, #0x1
+	moveq	\rd, #(0x1 << 8)		@ just 1 device
+	movne	\rd, #(0x3 << 8)		@ 2 devices
+.endm
+
+.macro emc_timing_update, rd, base
+	mov	\rd, #1
+	str	\rd, [\base, #EMC_TIMING_CONTROL]
+1001:
+	ldr	\rd, [\base, #EMC_EMC_STATUS]
+	tst	\rd, #(0x1<<23)	@ wait EMC_STATUS_TIMING_UPDATE_STALLED is clear
+	bne	1001b
+.endm
+
+.macro pll_enable, rd, r_car_base, pll_base, pll_misc
+	ldr	\rd, [\r_car_base, #\pll_base]
+	tst	\rd, #(1 << 30)
+	orreq	\rd, \rd, #(1 << 30)
+	streq	\rd, [\r_car_base, #\pll_base]
+	/* Enable lock detector */
+	.if	\pll_misc
+	ldr	\rd, [\r_car_base, #\pll_misc]
+	bic	\rd, \rd, #(1 << 18)
+	str	\rd, [\r_car_base, #\pll_misc]
+	ldr	\rd, [\r_car_base, #\pll_misc]
+	ldr	\rd, [\r_car_base, #\pll_misc]
+	orr	\rd, \rd, #(1 << 18)
+	str	\rd, [\r_car_base, #\pll_misc]
+	.endif
+.endm
+
+.macro pll_locked, rd, r_car_base, pll_base
+1:
+	ldr	\rd, [\r_car_base, #\pll_base]
+	tst	\rd, #(1 << 27)
+	beq	1b
+.endm
+
+.macro pll_iddq_exit, rd, car, iddq, iddq_bit
+	ldr	\rd, [\car, #\iddq]
+	bic	\rd, \rd, #(1<<\iddq_bit)
+	str	\rd, [\car, #\iddq]
+.endm
+
+.macro pll_iddq_entry, rd, car, iddq, iddq_bit
+	ldr	\rd, [\car, #\iddq]
+	orr	\rd, \rd, #(1<<\iddq_bit)
+	str	\rd, [\car, #\iddq]
+.endm
+
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP)
 /*
  * tegra30_hotplug_shutdown(void)
@@ -99,6 +204,8 @@
 	cmp	r10, #TEGRA30
 	moveq   r3, #FLOW_CTRL_WAIT_FOR_INTERRUPT	@ For LP2
 	movne	r3, #FLOW_CTRL_WAITEVENT
+	orrne	r3, r3, #FLOW_CTRL_HALT_GIC_IRQ
+	orrne	r3, r3, #FLOW_CTRL_HALT_GIC_FIQ
 flow_ctrl_done:
 	cmp	r10, #TEGRA30
 	str	r3, [r2]
@@ -127,6 +234,41 @@
 
 #ifdef CONFIG_PM_SLEEP
 /*
+ * tegra30_sleep_core_finish(unsigned long v2p)
+ *
+ * Enters suspend in LP0 or LP1 by turning off the MMU and jumping to
+ * tegra30_tear_down_core in IRAM
+ */
+ENTRY(tegra30_sleep_core_finish)
+	/* Flush, disable the L1 data cache and exit SMP */
+	bl	tegra_disable_clean_inv_dcache
+
+	/*
+	 * Preload all the address literals that are needed for the
+	 * CPU power-gating process, to avoid loading from SDRAM which
+	 * are not supported once SDRAM is put into self-refresh.
+	 * LP0 / LP1 use physical address, since the MMU needs to be
+	 * disabled before putting SDRAM into self-refresh to avoid
+	 * memory access due to page table walks.
+	 */
+	mov32	r4, TEGRA_PMC_BASE
+	mov32	r5, TEGRA_CLK_RESET_BASE
+	mov32	r6, TEGRA_FLOW_CTRL_BASE
+	mov32	r7, TEGRA_TMRUS_BASE
+
+	mov32	r3, tegra_shut_off_mmu
+	add	r3, r3, r0
+
+	mov32	r0, tegra30_tear_down_core
+	mov32	r1, tegra30_iram_start
+	sub	r0, r0, r1
+	mov32	r1, TEGRA_IRAM_CODE_AREA
+	add	r0, r0, r1
+
+	mov	pc, r3
+ENDPROC(tegra30_sleep_core_finish)
+
+/*
  * tegra30_sleep_cpu_secondary_finish(unsigned long v2p)
  *
  * Enters LP2 on secondary CPU by exiting coherency and powergating the CPU.
@@ -135,6 +277,7 @@
 	mov	r7, lr
 
 	/* Flush and disable the L1 data cache */
+	mov 	r0, #TEGRA_FLUSH_CACHE_LOUIS
 	bl	tegra_disable_clean_inv_dcache
 
 	/* Powergate this CPU. */
@@ -155,6 +298,351 @@
 	b	tegra30_enter_sleep
 ENDPROC(tegra30_tear_down_cpu)
 
+/* START OF ROUTINES COPIED TO IRAM */
+	.align L1_CACHE_SHIFT
+	.globl tegra30_iram_start
+tegra30_iram_start:
+
+/*
+ * tegra30_lp1_reset
+ *
+ * reset vector for LP1 restore; copied into IRAM during suspend.
+ * Brings the system back up to a safe staring point (SDRAM out of
+ * self-refresh, PLLC, PLLM and PLLP reenabled, CPU running on PLLX,
+ * system clock running on the same PLL that it suspended at), and
+ * jumps to tegra_resume to restore virtual addressing.
+ * The physical address of tegra_resume expected to be stored in
+ * PMC_SCRATCH41.
+ *
+ * NOTE: THIS *MUST* BE RELOCATED TO TEGRA_IRAM_CODE_AREA.
+ */
+ENTRY(tegra30_lp1_reset)
+	/*
+	 * The CPU and system bus are running at 32KHz and executing from
+	 * IRAM when this code is executed; immediately switch to CLKM and
+	 * enable PLLP, PLLM, PLLC, PLLA and PLLX.
+	 */
+	mov32	r0, TEGRA_CLK_RESET_BASE
+
+	mov	r1, #(1 << 28)
+	str	r1, [r0, #CLK_RESET_SCLK_BURST]
+	str	r1, [r0, #CLK_RESET_CCLK_BURST]
+	mov	r1, #0
+	str	r1, [r0, #CLK_RESET_CCLK_DIVIDER]
+	str	r1, [r0, #CLK_RESET_SCLK_DIVIDER]
+
+	tegra_get_soc_id TEGRA_APB_MISC_BASE, r10
+	cmp	r10, #TEGRA30
+	beq	_no_pll_iddq_exit
+
+	pll_iddq_exit r1, r0, CLK_RESET_PLLM_MISC, CLK_RESET_PLLM_MISC_IDDQ
+	pll_iddq_exit r1, r0, CLK_RESET_PLLC_MISC, CLK_RESET_PLLC_MISC_IDDQ
+	pll_iddq_exit r1, r0, CLK_RESET_PLLX_MISC3, CLK_RESET_PLLX_MISC3_IDDQ
+
+	mov32	r7, TEGRA_TMRUS_BASE
+	ldr	r1, [r7]
+	add	r1, r1, #2
+	wait_until r1, r7, r3
+
+	/* enable PLLM via PMC */
+	mov32	r2, TEGRA_PMC_BASE
+	ldr	r1, [r2, #PMC_PLLP_WB0_OVERRIDE]
+	orr	r1, r1, #(1 << 12)
+	str	r1, [r2, #PMC_PLLP_WB0_OVERRIDE]
+
+	pll_enable r1, r0, CLK_RESET_PLLM_BASE, 0
+	pll_enable r1, r0, CLK_RESET_PLLC_BASE, 0
+	pll_enable r1, r0, CLK_RESET_PLLX_BASE, 0
+
+	b	_pll_m_c_x_done
+
+_no_pll_iddq_exit:
+	/* enable PLLM via PMC */
+	mov32	r2, TEGRA_PMC_BASE
+	ldr	r1, [r2, #PMC_PLLP_WB0_OVERRIDE]
+	orr	r1, r1, #(1 << 12)
+	str	r1, [r2, #PMC_PLLP_WB0_OVERRIDE]
+
+	pll_enable r1, r0, CLK_RESET_PLLM_BASE, CLK_RESET_PLLM_MISC
+	pll_enable r1, r0, CLK_RESET_PLLC_BASE, CLK_RESET_PLLC_MISC
+	pll_enable r1, r0, CLK_RESET_PLLX_BASE, CLK_RESET_PLLX_MISC
+
+_pll_m_c_x_done:
+	pll_enable r1, r0, CLK_RESET_PLLP_BASE, CLK_RESET_PLLP_MISC
+	pll_enable r1, r0, CLK_RESET_PLLA_BASE, CLK_RESET_PLLA_MISC
+
+	pll_locked r1, r0, CLK_RESET_PLLM_BASE
+	pll_locked r1, r0, CLK_RESET_PLLP_BASE
+	pll_locked r1, r0, CLK_RESET_PLLA_BASE
+	pll_locked r1, r0, CLK_RESET_PLLC_BASE
+	pll_locked r1, r0, CLK_RESET_PLLX_BASE
+
+	mov32	r7, TEGRA_TMRUS_BASE
+	ldr	r1, [r7]
+	add	r1, r1, #LOCK_DELAY
+	wait_until r1, r7, r3
+
+	adr	r5, tegra30_sdram_pad_save
+
+	ldr	r4, [r5, #0x18]		@ restore CLK_SOURCE_MSELECT
+	str	r4, [r0, #CLK_RESET_CLK_SOURCE_MSELECT]
+
+	ldr	r4, [r5, #0x1C]		@ restore SCLK_BURST
+	str	r4, [r0, #CLK_RESET_SCLK_BURST]
+
+	cmp	r10, #TEGRA30
+	movweq	r4, #:lower16:((1 << 28) | (0x8))	@ burst policy is PLLX
+	movteq	r4, #:upper16:((1 << 28) | (0x8))
+	movwne	r4, #:lower16:((1 << 28) | (0xe))
+	movtne	r4, #:upper16:((1 << 28) | (0xe))
+	str	r4, [r0, #CLK_RESET_CCLK_BURST]
+
+	/* Restore pad power state to normal */
+	ldr	r1, [r5, #0x14]		@ PMC_IO_DPD_STATUS
+	mvn	r1, r1
+	bic	r1, r1, #(1 << 31)
+	orr	r1, r1, #(1 << 30)
+	str	r1, [r2, #PMC_IO_DPD_REQ]	@ DPD_OFF
+
+	cmp	r10, #TEGRA30
+	movweq	r0, #:lower16:TEGRA_EMC_BASE	@ r0 reserved for emc base
+	movteq	r0, #:upper16:TEGRA_EMC_BASE
+	movwne	r0, #:lower16:TEGRA_EMC0_BASE
+	movtne	r0, #:upper16:TEGRA_EMC0_BASE
+
+exit_self_refresh:
+	ldr	r1, [r5, #0xC]		@ restore EMC_XM2VTTGENPADCTRL
+	str	r1, [r0, #EMC_XM2VTTGENPADCTRL]
+	ldr	r1, [r5, #0x10]		@ restore EMC_XM2VTTGENPADCTRL2
+	str	r1, [r0, #EMC_XM2VTTGENPADCTRL2]
+	ldr	r1, [r5, #0x8]		@ restore EMC_AUTO_CAL_INTERVAL
+	str	r1, [r0, #EMC_AUTO_CAL_INTERVAL]
+
+	/* Relock DLL */
+	ldr	r1, [r0, #EMC_CFG_DIG_DLL]
+	orr	r1, r1, #(1 << 30)	@ set DLL_RESET
+	str	r1, [r0, #EMC_CFG_DIG_DLL]
+
+	emc_timing_update r1, r0
+
+	cmp	r10, #TEGRA114
+	movweq	r1, #:lower16:TEGRA_EMC1_BASE
+	movteq	r1, #:upper16:TEGRA_EMC1_BASE
+	cmpeq	r0, r1
+
+	ldr	r1, [r0, #EMC_AUTO_CAL_CONFIG]
+	orr	r1, r1, #(1 << 31)	@ set AUTO_CAL_ACTIVE
+	orreq	r1, r1, #(1 << 27)	@ set slave mode for channel 1
+	str	r1, [r0, #EMC_AUTO_CAL_CONFIG]
+
+emc_wait_auto_cal_onetime:
+	ldr	r1, [r0, #EMC_AUTO_CAL_STATUS]
+	tst	r1, #(1 << 31)		@ wait until AUTO_CAL_ACTIVE is cleared
+	bne	emc_wait_auto_cal_onetime
+
+	ldr	r1, [r0, #EMC_CFG]
+	bic	r1, r1, #(1 << 31)	@ disable DRAM_CLK_STOP_PD
+	str	r1, [r0, #EMC_CFG]
+
+	mov	r1, #0
+	str	r1, [r0, #EMC_SELF_REF]	@ take DRAM out of self refresh
+	mov	r1, #1
+	cmp	r10, #TEGRA30
+	streq	r1, [r0, #EMC_NOP]
+	streq	r1, [r0, #EMC_NOP]
+	streq	r1, [r0, #EMC_REFRESH]
+
+	emc_device_mask r1, r0
+
+exit_selfrefresh_loop:
+	ldr	r2, [r0, #EMC_EMC_STATUS]
+	ands	r2, r2, r1
+	bne	exit_selfrefresh_loop
+
+	lsr	r1, r1, #8		@ devSel, bit0:dev0, bit1:dev1
+
+	mov32	r7, TEGRA_TMRUS_BASE
+	ldr	r2, [r0, #EMC_FBIO_CFG5]
+
+	and	r2, r2,	#3		@ check DRAM_TYPE
+	cmp	r2, #2
+	beq	emc_lpddr2
+
+	/* Issue a ZQ_CAL for dev0 - DDR3 */
+	mov32	r2, 0x80000011		@ DEV_SELECTION=2, LENGTH=LONG, CMD=1
+	str	r2, [r0, #EMC_ZQ_CAL]
+	ldr	r2, [r7]
+	add	r2, r2, #10
+	wait_until r2, r7, r3
+
+	tst	r1, #2
+	beq	zcal_done
+
+	/* Issue a ZQ_CAL for dev1 - DDR3 */
+	mov32	r2, 0x40000011		@ DEV_SELECTION=1, LENGTH=LONG, CMD=1
+	str	r2, [r0, #EMC_ZQ_CAL]
+	ldr	r2, [r7]
+	add	r2, r2, #10
+	wait_until r2, r7, r3
+	b	zcal_done
+
+emc_lpddr2:
+	/* Issue a ZQ_CAL for dev0 - LPDDR2 */
+	mov32	r2, 0x800A00AB		@ DEV_SELECTION=2, MA=10, OP=0xAB
+	str	r2, [r0, #EMC_MRW]
+	ldr	r2, [r7]
+	add	r2, r2, #1
+	wait_until r2, r7, r3
+
+	tst	r1, #2
+	beq	zcal_done
+
+	/* Issue a ZQ_CAL for dev0 - LPDDR2 */
+	mov32	r2, 0x400A00AB		@ DEV_SELECTION=1, MA=10, OP=0xAB
+	str	r2, [r0, #EMC_MRW]
+	ldr	r2, [r7]
+	add	r2, r2, #1
+	wait_until r2, r7, r3
+
+zcal_done:
+	mov	r1, #0			@ unstall all transactions
+	str	r1, [r0, #EMC_REQ_CTRL]
+	ldr	r1, [r5, #0x4]		@ restore EMC_ZCAL_INTERVAL
+	str	r1, [r0, #EMC_ZCAL_INTERVAL]
+	ldr	r1, [r5, #0x0]		@ restore EMC_CFG
+	str	r1, [r0, #EMC_CFG]
+
+	/* Tegra114 had dual EMC channel, now config the other one */
+	cmp	r10, #TEGRA114
+	bne	__no_dual_emc_chanl
+	mov32	r1, TEGRA_EMC1_BASE
+	cmp	r0, r1
+	movne	r0, r1
+	addne	r5, r5, #0x20
+	bne	exit_self_refresh
+__no_dual_emc_chanl:
+
+	mov32	r0, TEGRA_PMC_BASE
+	ldr	r0, [r0, #PMC_SCRATCH41]
+	mov	pc, r0			@ jump to tegra_resume
+ENDPROC(tegra30_lp1_reset)
+
+	.align	L1_CACHE_SHIFT
+tegra30_sdram_pad_address:
+	.word	TEGRA_EMC_BASE + EMC_CFG				@0x0
+	.word	TEGRA_EMC_BASE + EMC_ZCAL_INTERVAL			@0x4
+	.word	TEGRA_EMC_BASE + EMC_AUTO_CAL_INTERVAL			@0x8
+	.word	TEGRA_EMC_BASE + EMC_XM2VTTGENPADCTRL			@0xc
+	.word	TEGRA_EMC_BASE + EMC_XM2VTTGENPADCTRL2			@0x10
+	.word	TEGRA_PMC_BASE + PMC_IO_DPD_STATUS			@0x14
+	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_CLK_SOURCE_MSELECT	@0x18
+	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_SCLK_BURST		@0x1c
+
+tegra114_sdram_pad_address:
+	.word	TEGRA_EMC0_BASE + EMC_CFG				@0x0
+	.word	TEGRA_EMC0_BASE + EMC_ZCAL_INTERVAL			@0x4
+	.word	TEGRA_EMC0_BASE + EMC_AUTO_CAL_INTERVAL			@0x8
+	.word	TEGRA_EMC0_BASE + EMC_XM2VTTGENPADCTRL			@0xc
+	.word	TEGRA_EMC0_BASE + EMC_XM2VTTGENPADCTRL2			@0x10
+	.word	TEGRA_PMC_BASE + PMC_IO_DPD_STATUS			@0x14
+	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_CLK_SOURCE_MSELECT	@0x18
+	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_SCLK_BURST		@0x1c
+	.word	TEGRA_EMC1_BASE + EMC_CFG				@0x20
+	.word	TEGRA_EMC1_BASE + EMC_ZCAL_INTERVAL			@0x24
+	.word	TEGRA_EMC1_BASE + EMC_AUTO_CAL_INTERVAL			@0x28
+	.word	TEGRA_EMC1_BASE + EMC_XM2VTTGENPADCTRL			@0x2c
+	.word	TEGRA_EMC1_BASE + EMC_XM2VTTGENPADCTRL2			@0x30
+
+tegra30_sdram_pad_size:
+	.word	tegra114_sdram_pad_address - tegra30_sdram_pad_address
+
+tegra114_sdram_pad_size:
+	.word	tegra30_sdram_pad_size - tegra114_sdram_pad_address
+
+	.type	tegra30_sdram_pad_save, %object
+tegra30_sdram_pad_save:
+	.rept (tegra30_sdram_pad_size - tegra114_sdram_pad_address) / 4
+	.long	0
+	.endr
+
+/*
+ * tegra30_tear_down_core
+ *
+ * copied into and executed from IRAM
+ * puts memory in self-refresh for LP0 and LP1
+ */
+tegra30_tear_down_core:
+	bl	tegra30_sdram_self_refresh
+	bl	tegra30_switch_cpu_to_clk32k
+	b	tegra30_enter_sleep
+
+/*
+ * tegra30_switch_cpu_to_clk32k
+ *
+ * In LP0 and LP1 all PLLs will be turned off. Switching the CPU and System CLK
+ * to the 32KHz clock.
+ * r4 = TEGRA_PMC_BASE
+ * r5 = TEGRA_CLK_RESET_BASE
+ * r6 = TEGRA_FLOW_CTRL_BASE
+ * r7 = TEGRA_TMRUS_BASE
+ * r10= SoC ID
+ */
+tegra30_switch_cpu_to_clk32k:
+	/*
+	 * start by jumping to CLKM to safely disable PLLs, then jump to
+	 * CLKS.
+	 */
+	mov	r0, #(1 << 28)
+	str	r0, [r5, #CLK_RESET_SCLK_BURST]
+	/* 2uS delay delay between changing SCLK and CCLK */
+	ldr	r1, [r7]
+	add	r1, r1, #2
+	wait_until r1, r7, r9
+	str	r0, [r5, #CLK_RESET_CCLK_BURST]
+	mov	r0, #0
+	str	r0, [r5, #CLK_RESET_CCLK_DIVIDER]
+	str	r0, [r5, #CLK_RESET_SCLK_DIVIDER]
+
+	/* switch the clock source of mselect to be CLK_M */
+	ldr	r0, [r5, #CLK_RESET_CLK_SOURCE_MSELECT]
+	orr	r0, r0, #MSELECT_CLKM
+	str	r0, [r5, #CLK_RESET_CLK_SOURCE_MSELECT]
+
+	/* 2uS delay delay between changing SCLK and disabling PLLs */
+	ldr	r1, [r7]
+	add	r1, r1, #2
+	wait_until r1, r7, r9
+
+	/* disable PLLM via PMC in LP1 */
+	ldr	r0, [r4, #PMC_PLLP_WB0_OVERRIDE]
+	bic	r0, r0, #(1 << 12)
+	str	r0, [r4, #PMC_PLLP_WB0_OVERRIDE]
+
+	/* disable PLLP, PLLA, PLLC and PLLX */
+	ldr	r0, [r5, #CLK_RESET_PLLP_BASE]
+	bic	r0, r0, #(1 << 30)
+	str	r0, [r5, #CLK_RESET_PLLP_BASE]
+	ldr	r0, [r5, #CLK_RESET_PLLA_BASE]
+	bic	r0, r0, #(1 << 30)
+	str	r0, [r5, #CLK_RESET_PLLA_BASE]
+	ldr	r0, [r5, #CLK_RESET_PLLC_BASE]
+	bic	r0, r0, #(1 << 30)
+	str	r0, [r5, #CLK_RESET_PLLC_BASE]
+	ldr	r0, [r5, #CLK_RESET_PLLX_BASE]
+	bic	r0, r0, #(1 << 30)
+	str	r0, [r5, #CLK_RESET_PLLX_BASE]
+
+	cmp	r10, #TEGRA30
+	beq	_no_pll_in_iddq
+	pll_iddq_entry r1, r5, CLK_RESET_PLLX_MISC3, CLK_RESET_PLLX_MISC3_IDDQ
+_no_pll_in_iddq:
+
+	/* switch to CLKS */
+	mov	r0, #0	/* brust policy = 32KHz */
+	str	r0, [r5, #CLK_RESET_SCLK_BURST]
+
+	mov	pc, lr
+
 /*
  * tegra30_enter_sleep
  *
@@ -172,8 +660,12 @@
 	orr	r0, r0, #FLOW_CTRL_CSR_ENABLE
 	str	r0, [r6, r2]
 
+	tegra_get_soc_id TEGRA_APB_MISC_BASE, r10
+	cmp	r10, #TEGRA30
 	mov	r0, #FLOW_CTRL_WAIT_FOR_INTERRUPT
-	orr	r0, r0, #FLOW_CTRL_HALT_CPU_IRQ | FLOW_CTRL_HALT_CPU_FIQ
+	orreq	r0, r0, #FLOW_CTRL_HALT_CPU_IRQ | FLOW_CTRL_HALT_CPU_FIQ
+	orrne   r0, r0, #FLOW_CTRL_HALT_LIC_IRQ | FLOW_CTRL_HALT_LIC_FIQ
+
 	cpu_to_halt_reg r2, r1
 	str	r0, [r6, r2]
 	dsb
@@ -187,4 +679,126 @@
 	/* !!!FIXME!!! Implement halt failure handler */
 	b	halted
 
+/*
+ * tegra30_sdram_self_refresh
+ *
+ * called with MMU off and caches disabled
+ * must be executed from IRAM
+ * r4 = TEGRA_PMC_BASE
+ * r5 = TEGRA_CLK_RESET_BASE
+ * r6 = TEGRA_FLOW_CTRL_BASE
+ * r7 = TEGRA_TMRUS_BASE
+ * r10= SoC ID
+ */
+tegra30_sdram_self_refresh:
+
+	adr	r8, tegra30_sdram_pad_save
+	tegra_get_soc_id TEGRA_APB_MISC_BASE, r10
+	cmp	r10, #TEGRA30
+	adreq	r2, tegra30_sdram_pad_address
+	ldreq	r3, tegra30_sdram_pad_size
+	adrne	r2, tegra114_sdram_pad_address
+	ldrne	r3, tegra114_sdram_pad_size
+	mov	r9, #0
+
+padsave:
+	ldr	r0, [r2, r9]		@ r0 is the addr in the pad_address
+
+	ldr	r1, [r0]
+	str	r1, [r8, r9]		@ save the content of the addr
+
+	add	r9, r9, #4
+	cmp	r3, r9
+	bne	padsave
+padsave_done:
+
+	dsb
+
+	cmp	r10, #TEGRA30
+	ldreq	r0, =TEGRA_EMC_BASE	@ r0 reserved for emc base addr
+	ldrne	r0, =TEGRA_EMC0_BASE
+
+enter_self_refresh:
+	cmp	r10, #TEGRA30
+	mov	r1, #0
+	str	r1, [r0, #EMC_ZCAL_INTERVAL]
+	str	r1, [r0, #EMC_AUTO_CAL_INTERVAL]
+	ldr	r1, [r0, #EMC_CFG]
+	bic	r1, r1, #(1 << 28)
+	bicne	r1, r1, #(1 << 29)
+	str	r1, [r0, #EMC_CFG]	@ disable DYN_SELF_REF
+
+	emc_timing_update r1, r0
+
+	ldr	r1, [r7]
+	add	r1, r1, #5
+	wait_until r1, r7, r2
+
+emc_wait_auto_cal:
+	ldr	r1, [r0, #EMC_AUTO_CAL_STATUS]
+	tst	r1, #(1 << 31)		@ wait until AUTO_CAL_ACTIVE is cleared
+	bne	emc_wait_auto_cal
+
+	mov	r1, #3
+	str	r1, [r0, #EMC_REQ_CTRL]	@ stall incoming DRAM requests
+
+emcidle:
+	ldr	r1, [r0, #EMC_EMC_STATUS]
+	tst	r1, #4
+	beq	emcidle
+
+	mov	r1, #1
+	str	r1, [r0, #EMC_SELF_REF]
+
+	emc_device_mask r1, r0
+
+emcself:
+	ldr	r2, [r0, #EMC_EMC_STATUS]
+	and	r2, r2, r1
+	cmp	r2, r1
+	bne	emcself			@ loop until DDR in self-refresh
+
+	/* Put VTTGEN in the lowest power mode */
+	ldr	r1, [r0, #EMC_XM2VTTGENPADCTRL]
+	mov32	r2, 0xF8F8FFFF	@ clear XM2VTTGEN_DRVUP and XM2VTTGEN_DRVDN
+	and	r1, r1, r2
+	str	r1, [r0, #EMC_XM2VTTGENPADCTRL]
+	ldr	r1, [r0, #EMC_XM2VTTGENPADCTRL2]
+	cmp	r10, #TEGRA30
+	orreq	r1, r1, #7		@ set E_NO_VTTGEN
+	orrne	r1, r1, #0x3f
+	str	r1, [r0, #EMC_XM2VTTGENPADCTRL2]
+
+	emc_timing_update r1, r0
+
+	/* Tegra114 had dual EMC channel, now config the other one */
+	cmp	r10, #TEGRA114
+	bne	no_dual_emc_chanl
+	mov32	r1, TEGRA_EMC1_BASE
+	cmp	r0, r1
+	movne	r0, r1
+	bne	enter_self_refresh
+no_dual_emc_chanl:
+
+	ldr	r1, [r4, #PMC_CTRL]
+	tst	r1, #PMC_CTRL_SIDE_EFFECT_LP0
+	bne	pmc_io_dpd_skip
+	/*
+	 * Put DDR_DATA, DISC_ADDR_CMD, DDR_ADDR_CMD, POP_ADDR_CMD, POP_CLK
+	 * and COMP in the lowest power mode when LP1.
+	 */
+	mov32	r1, 0x8EC00000
+	str	r1, [r4, #PMC_IO_DPD_REQ]
+pmc_io_dpd_skip:
+
+	dsb
+
+	mov	pc, lr
+
+	.ltorg
+/* dummy symbol for end of IRAM */
+	.align L1_CACHE_SHIFT
+	.global tegra30_iram_end
+tegra30_iram_end:
+	b	.
 #endif
diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S
index 9daaef2..8d06213 100644
--- a/arch/arm/mach-tegra/sleep.S
+++ b/arch/arm/mach-tegra/sleep.S
@@ -56,7 +56,9 @@
 	isb
 
 	/* Flush the D-cache */
-	bl	v7_flush_dcache_louis
+	cmp	r0, #TEGRA_FLUSH_CACHE_ALL
+	blne	v7_flush_dcache_louis
+	bleq	v7_flush_dcache_all
 
 	/* Trun off coherency */
 	exit_smp r4, r5
@@ -67,15 +69,40 @@
 
 #ifdef CONFIG_PM_SLEEP
 /*
+ * tegra_init_l2_for_a15
+ *
+ * set up the correct L2 cache data RAM latency
+ */
+ENTRY(tegra_init_l2_for_a15)
+	mrc	p15, 0, r0, c0, c0, 5
+	ubfx	r0, r0, #8, #4
+	tst	r0, #1				@ only need for cluster 0
+	bne	_exit_init_l2_a15
+
+	mrc	p15, 0x1, r0, c9, c0, 2
+	and	r0, r0, #7
+	cmp	r0, #2
+	bicne	r0, r0, #7
+	orrne	r0, r0, #2
+	mcrne	p15, 0x1, r0, c9, c0, 2
+_exit_init_l2_a15:
+
+	mov	pc, lr
+ENDPROC(tegra_init_l2_for_a15)
+
+/*
  * tegra_sleep_cpu_finish(unsigned long v2p)
  *
  * enters suspend in LP2 by turning off the mmu and jumping to
  * tegra?_tear_down_cpu
  */
 ENTRY(tegra_sleep_cpu_finish)
+	mov	r4, r0
 	/* Flush and disable the L1 data cache */
+	mov	r0, #TEGRA_FLUSH_CACHE_ALL
 	bl	tegra_disable_clean_inv_dcache
 
+	mov	r0, r4
 	mov32	r6, tegra_tear_down_cpu
 	ldr	r1, [r6]
 	add	r1, r1, r0
@@ -107,10 +134,10 @@
 #ifdef CONFIG_CACHE_L2X0
 	/* Disable L2 cache */
 	check_cpu_part_num 0xc09, r9, r10
-	movweq	r4, #:lower16:(TEGRA_ARM_PERIF_BASE + 0x3000)
-	movteq	r4, #:upper16:(TEGRA_ARM_PERIF_BASE + 0x3000)
-	moveq	r5, #0
-	streq	r5, [r4, #L2X0_CTRL]
+	movweq	r2, #:lower16:(TEGRA_ARM_PERIF_BASE + 0x3000)
+	movteq	r2, #:upper16:(TEGRA_ARM_PERIF_BASE + 0x3000)
+	moveq	r3, #0
+	streq	r3, [r2, #L2X0_CTRL]
 #endif
 	mov	pc, r0
 ENDPROC(tegra_shut_off_mmu)
diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h
index 98b7da6..a4edbb3 100644
--- a/arch/arm/mach-tegra/sleep.h
+++ b/arch/arm/mach-tegra/sleep.h
@@ -41,7 +41,19 @@
 #define CPU_NOT_RESETTABLE	0
 #endif
 
+/* flag of tegra_disable_clean_inv_dcache to do LoUIS or all */
+#define TEGRA_FLUSH_CACHE_LOUIS	0
+#define TEGRA_FLUSH_CACHE_ALL	1
+
 #ifdef __ASSEMBLY__
+/* waits until the microsecond counter (base) is > rn */
+.macro wait_until, rn, base, tmp
+	add	\rn, \rn, #1
+1001:	ldr	\tmp, [\base]
+	cmp	\tmp, \rn
+	bmi	1001b
+.endm
+
 /* returns the offset of the flow controller halt register for a cpu */
 .macro cpu_to_halt_reg rd, rcpu
 	cmp	\rcpu, #0
@@ -144,7 +156,7 @@
 void tegra_pen_unlock(void);
 void tegra_resume(void);
 int tegra_sleep_cpu_finish(unsigned long);
-void tegra_disable_clean_inv_dcache(void);
+void tegra_disable_clean_inv_dcache(u32 flag);
 
 #ifdef CONFIG_HOTPLUG_CPU
 void tegra20_hotplug_shutdown(void);
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index b6015cb..806d803 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -290,6 +290,14 @@
 /* Tegra CPU clock and reset control regs */
 #define CLK_RST_CONTROLLER_CPU_CMPLX_STATUS	0x470
 
+#ifdef CONFIG_PM_SLEEP
+static struct cpu_clk_suspend_context {
+	u32 clk_csite_src;
+	u32 cclkg_burst;
+	u32 cclkg_divider;
+} tegra114_cpu_clk_sctx;
+#endif
+
 static int periph_clk_enb_refcnt[CLK_OUT_ENB_NUM * 32];
 
 static void __iomem *clk_base;
@@ -2142,9 +2150,39 @@
 	/* flow controller would take care in the power sequence. */
 }
 
+#ifdef CONFIG_PM_SLEEP
+static void tegra114_cpu_clock_suspend(void)
+{
+	/* switch coresite to clk_m, save off original source */
+	tegra114_cpu_clk_sctx.clk_csite_src =
+				readl(clk_base + CLK_SOURCE_CSITE);
+	writel(3 << 30, clk_base + CLK_SOURCE_CSITE);
+
+	tegra114_cpu_clk_sctx.cclkg_burst =
+				readl(clk_base + CCLKG_BURST_POLICY);
+	tegra114_cpu_clk_sctx.cclkg_divider =
+				readl(clk_base + CCLKG_BURST_POLICY + 4);
+}
+
+static void tegra114_cpu_clock_resume(void)
+{
+	writel(tegra114_cpu_clk_sctx.clk_csite_src,
+					clk_base + CLK_SOURCE_CSITE);
+
+	writel(tegra114_cpu_clk_sctx.cclkg_burst,
+					clk_base + CCLKG_BURST_POLICY);
+	writel(tegra114_cpu_clk_sctx.cclkg_divider,
+					clk_base + CCLKG_BURST_POLICY + 4);
+}
+#endif
+
 static struct tegra_cpu_car_ops tegra114_cpu_car_ops = {
 	.wait_for_reset	= tegra114_wait_cpu_in_reset,
 	.disable_clock	= tegra114_disable_cpu_clock,
+#ifdef CONFIG_PM_SLEEP
+	.suspend	= tegra114_cpu_clock_suspend,
+	.resume		= tegra114_cpu_clock_resume,
+#endif
 };
 
 static const struct of_device_id pmc_match[] __initconst = {