Merge branch 'devel-stable' into for-next
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index f32ce54..397b815 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -468,6 +468,10 @@
     - set the power.irq_safe flag for the device, causing the runtime-PM
       callbacks to be invoked with interrupts off
 
+  bool pm_runtime_is_irq_safe(struct device *dev);
+    - return true if power.irq_safe flag was set for the device, causing
+      the runtime-PM callbacks to be invoked with interrupts off
+
   void pm_runtime_mark_last_busy(struct device *dev);
     - set the power.last_busy field to the current time
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 89c4b5c..622e364 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -702,7 +702,9 @@
 	select CPU_SA1100
 	select GENERIC_CLOCKEVENTS
 	select HAVE_IDE
+	select IRQ_DOMAIN
 	select ISA
+	select MULTI_IRQ_HANDLER
 	select NEED_MACH_MEMORY_H
 	select SPARSE_IRQ
 	help
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 413fd94..68be901 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -397,8 +397,7 @@
 		add	sp, sp, r6
 #endif
 
-		tst	r4, #1
-		bleq	cache_clean_flush
+		bl	cache_clean_flush
 
 		adr	r0, BSYM(restart)
 		add	r0, r0, r6
@@ -1047,6 +1046,8 @@
 		b	call_cache_fn
 
 __armv4_mpu_cache_flush:
+		tst	r4, #1
+		movne	pc, lr
 		mov	r2, #1
 		mov	r3, #0
 		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
@@ -1064,6 +1065,8 @@
 		mov	pc, lr
 		
 __fa526_cache_flush:
+		tst	r4, #1
+		movne	pc, lr
 		mov	r1, #0
 		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
 		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
@@ -1072,13 +1075,16 @@
 
 __armv6_mmu_cache_flush:
 		mov	r1, #0
-		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
+		tst	r4, #1
+		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
 		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
-		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
+		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
 		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
 		mov	pc, lr
 
 __armv7_mmu_cache_flush:
+		tst	r4, #1
+		bne	iflush
 		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
 		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
 		mov	r10, #0
@@ -1139,6 +1145,8 @@
 		mov	pc, lr
 
 __armv5tej_mmu_cache_flush:
+		tst	r4, #1
+		movne	pc, lr
 1:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
 		bne	1b
 		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
@@ -1146,6 +1154,8 @@
 		mov	pc, lr
 
 __armv4_mmu_cache_flush:
+		tst	r4, #1
+		movne	pc, lr
 		mov	r2, #64*1024		@ default: 32K dcache size (*2)
 		mov	r11, #32		@ default: 32 byte line size
 		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
@@ -1179,6 +1189,8 @@
 
 __armv3_mmu_cache_flush:
 __armv3_mpu_cache_flush:
+		tst	r4, #1
+		movne	pc, lr
 		mov	r1, #0
 		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
 		mov	pc, lr
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index e57d7e5..7b69c5f 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -282,8 +282,8 @@
 	}
 
 	if (i == 8)
-		printk(KERN_ERR "Danger Will Robinson: failed to "
-			"re-trigger IRQ%d\n", d->irq);
+		pr_err("Danger Will Robinson: failed to re-trigger IRQ%d\n",
+		       d->irq);
 	return i == 8 ? -1 : 0;
 }
 
@@ -384,8 +384,8 @@
 	}
 
 	if (i == 8)
-		printk(KERN_ERR "Danger Will Robinson: failed to "
-			"re-trigger IRQ%d\n", d->irq);
+		pr_err("Danger Will Robinson: failed to re-trigger IRQ%d\n",
+		       d->irq);
 	return i == 8 ? -1 : 0;
 }
 
@@ -740,9 +740,8 @@
 		goto err_unmap;
 	}
 
-	printk(KERN_INFO "SA1111 Microprocessor Companion Chip: "
-		"silicon revision %lx, metal revision %lx\n",
-		(id & SKID_SIREV_MASK)>>4, (id & SKID_MTREV_MASK));
+	pr_info("SA1111 Microprocessor Companion Chip: silicon revision %lx, metal revision %lx\n",
+		(id & SKID_SIREV_MASK) >> 4, id & SKID_MTREV_MASK);
 
 	/*
 	 * We found it.  Wake the chip up, and initialise.
diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h
index a71b417..af79da4 100644
--- a/arch/arm/include/asm/hw_irq.h
+++ b/arch/arm/include/asm/hw_irq.h
@@ -8,6 +8,7 @@
 {
 	extern unsigned long irq_err_count;
 	irq_err_count++;
+	pr_crit("unexpected IRQ trap at vector %02x\n", irq);
 }
 
 void set_irq_flags(unsigned int irq, unsigned int flags);
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index d428e38..3446f6a 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -219,6 +219,23 @@
 bool __mcpm_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster);
 int __mcpm_cluster_state(unsigned int cluster);
 
+/**
+ * mcpm_sync_init - Initialize the cluster synchronization support
+ *
+ * @power_up_setup: platform specific function invoked during very
+ * 		    early CPU/cluster bringup stage.
+ *
+ * This prepares memory used by vlocks and the MCPM state machine used
+ * across CPUs that may have their caches active or inactive. Must be
+ * called only after a successful call to mcpm_platform_register().
+ *
+ * The power_up_setup argument is a pointer to assembly code called when
+ * the MMU and caches are still disabled during boot  and no stack space is
+ * available. The affinity level passed to that code corresponds to the
+ * resource that needs to be initialized (e.g. 1 for cluster level, 0 for
+ * CPU level).  Proper exclusion mechanisms are already activated at that
+ * point.
+ */
 int __init mcpm_sync_init(
 	void (*power_up_setup)(unsigned int affinity_level));
 
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index 209e650..a89b407 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -30,14 +30,14 @@
 static inline unsigned long __my_cpu_offset(void)
 {
 	unsigned long off;
-	register unsigned long *sp asm ("sp");
 
 	/*
 	 * Read TPIDRPRW.
 	 * We want to allow caching the value, so avoid using volatile and
 	 * instead use a fake stack read to hazard against barrier().
 	 */
-	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : "Q" (*sp));
+	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off)
+		: "Q" (*(const unsigned long *)current_stack_pointer));
 
 	return off;
 }
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 78a7793..19cfab5 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -157,7 +157,15 @@
 static inline void
 pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
 {
-	__pmd_populate(pmdp, page_to_phys(ptep), _PAGE_USER_TABLE);
+	extern pmdval_t user_pmd_table;
+	pmdval_t prot;
+
+	if (__LINUX_ARM_ARCH__ >= 6 && !IS_ENABLED(CONFIG_ARM_LPAE))
+		prot = user_pmd_table;
+	else
+		prot = _PAGE_USER_TABLE;
+
+	__pmd_populate(pmdp, page_to_phys(ptep), prot);
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h
index 5cfba15..5e68278 100644
--- a/arch/arm/include/asm/pgtable-2level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-2level-hwdef.h
@@ -20,12 +20,14 @@
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 1) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 2) << 0)
+#define PMD_PXNTABLE		(_AT(pmdval_t, 1) << 2)     /* v7 */
 #define PMD_BIT4		(_AT(pmdval_t, 1) << 4)
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, (x)) << 5)
 #define PMD_PROTECTION		(_AT(pmdval_t, 1) << 9)		/* v5 */
 /*
  *   - section
  */
+#define PMD_SECT_PXN    (_AT(pmdval_t, 1) << 0)     /* v7 */
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
 #define PMD_SECT_XN		(_AT(pmdval_t, 1) << 4)		/* v6 */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 9fd61c7..f8f1cff 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -76,6 +76,7 @@
 #define PTE_EXT_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
 #define PTE_EXT_AF		(_AT(pteval_t, 1) << 10)	/* Access Flag */
 #define PTE_EXT_NG		(_AT(pteval_t, 1) << 11)	/* nG */
+#define PTE_EXT_PXN		(_AT(pteval_t, 1) << 53)	/* PXN */
 #define PTE_EXT_XN		(_AT(pteval_t, 1) << 54)	/* XN */
 
 /*
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 3b30062..d5cac54 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -252,17 +252,57 @@
 	set_pte_ext(ptep, pteval, ext);
 }
 
-#define PTE_BIT_FUNC(fn,op) \
-static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
+{
+	pte_val(pte) &= ~pgprot_val(prot);
+	return pte;
+}
 
-PTE_BIT_FUNC(wrprotect, |= L_PTE_RDONLY);
-PTE_BIT_FUNC(mkwrite,   &= ~L_PTE_RDONLY);
-PTE_BIT_FUNC(mkclean,   &= ~L_PTE_DIRTY);
-PTE_BIT_FUNC(mkdirty,   |= L_PTE_DIRTY);
-PTE_BIT_FUNC(mkold,     &= ~L_PTE_YOUNG);
-PTE_BIT_FUNC(mkyoung,   |= L_PTE_YOUNG);
-PTE_BIT_FUNC(mkexec,   &= ~L_PTE_XN);
-PTE_BIT_FUNC(mknexec,   |= L_PTE_XN);
+static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
+{
+	pte_val(pte) |= pgprot_val(prot);
+	return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_RDONLY));
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(L_PTE_RDONLY));
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(L_PTE_DIRTY));
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_DIRTY));
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(L_PTE_YOUNG));
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_YOUNG));
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(L_PTE_XN));
+}
+
+static inline pte_t pte_mknexec(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_XN));
+}
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 601264d..51622ba 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -154,9 +154,8 @@
 	return regs->ARM_sp;
 }
 
-#define current_pt_regs(void) ({				\
-	register unsigned long sp asm ("sp");			\
-	(struct pt_regs *)((sp | (THREAD_SIZE - 1)) - 7) - 1;	\
+#define current_pt_regs(void) ({ (struct pt_regs *)			\
+		((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1;	\
 })
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index fc44d37..d890e41 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -44,16 +44,6 @@
 	__u32	extra[2];		/* Xscale 'acc' register, etc */
 };
 
-struct arm_restart_block {
-	union {
-		/* For user cache flushing */
-		struct {
-			unsigned long start;
-			unsigned long end;
-		} cache;
-	};
-};
-
 /*
  * low level task data that entry.S needs immediate access to.
  * __switch_to() assumes cpu_context follows immediately after cpu_domain.
@@ -79,7 +69,6 @@
 	unsigned long		thumbee_state;	/* ThumbEE Handler Base register */
 #endif
 	struct restart_block	restart_block;
-	struct arm_restart_block	arm_restart_block;
 };
 
 #define INIT_THREAD_INFO(tsk)						\
@@ -101,14 +90,19 @@
 #define init_stack		(init_thread_union.stack)
 
 /*
+ * how to get the current stack pointer in C
+ */
+register unsigned long current_stack_pointer asm ("sp");
+
+/*
  * how to get the thread information struct from C
  */
 static inline struct thread_info *current_thread_info(void) __attribute_const__;
 
 static inline struct thread_info *current_thread_info(void)
 {
-	register unsigned long sp asm ("sp");
-	return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
+	return (struct thread_info *)
+		(current_stack_pointer & ~(THREAD_SIZE - 1));
 }
 
 #define thread_saved_pc(tsk)	\
diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h
index f4ab34f..ee5f308 100644
--- a/arch/arm/include/asm/vfp.h
+++ b/arch/arm/include/asm/vfp.h
@@ -22,6 +22,7 @@
 #define FPSID_NODOUBLE		(1<<20)
 #define FPSID_ARCH_BIT		(16)
 #define FPSID_ARCH_MASK		(0xF  << FPSID_ARCH_BIT)
+#define FPSID_CPUID_ARCH_MASK	(0x7F  << FPSID_ARCH_BIT)
 #define FPSID_PART_BIT		(8)
 #define FPSID_PART_MASK		(0xFF << FPSID_PART_BIT)
 #define FPSID_VARIANT_BIT	(4)
@@ -75,6 +76,10 @@
 /* MVFR0 bits */
 #define MVFR0_A_SIMD_BIT	(0)
 #define MVFR0_A_SIMD_MASK	(0xf << MVFR0_A_SIMD_BIT)
+#define MVFR0_SP_BIT		(4)
+#define MVFR0_SP_MASK		(0xf << MVFR0_SP_BIT)
+#define MVFR0_DP_BIT		(8)
+#define MVFR0_DP_MASK		(0xf << MVFR0_DP_BIT)
 
 /* Bit patterns for decoding the packaged operation descriptors */
 #define VFPOPDESC_LENGTH_BIT	(9)
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index 3aaa75c..705bb76 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -412,6 +412,7 @@
 #define __NR_seccomp			(__NR_SYSCALL_BASE+383)
 #define __NR_getrandom			(__NR_SYSCALL_BASE+384)
 #define __NR_memfd_create		(__NR_SYSCALL_BASE+385)
+#define __NR_bpf			(__NR_SYSCALL_BASE+386)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 70b7307..2ac3920 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -47,6 +47,7 @@
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_ARM_ARCH_TIMER)	+= arch_timer.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= entry-ftrace.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o insn.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o insn.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o insn.o patch.o
@@ -84,6 +85,7 @@
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o
 obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o
+CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
 
diff --git a/arch/arm/kernel/atags_compat.c b/arch/arm/kernel/atags_compat.c
index 5236ad3..05c28b1 100644
--- a/arch/arm/kernel/atags_compat.c
+++ b/arch/arm/kernel/atags_compat.c
@@ -97,8 +97,7 @@
 	struct tag *tag = taglist;
 
 	if (params->u1.s.page_size != PAGE_SIZE) {
-		printk(KERN_WARNING "Warning: bad configuration page, "
-		       "trying to continue\n");
+		pr_warn("Warning: bad configuration page, trying to continue\n");
 		return;
 	}
 
@@ -109,8 +108,7 @@
 	    params->u1.s.nr_pages != 0x04000 &&
 	    params->u1.s.nr_pages != 0x08000 &&
 	    params->u1.s.nr_pages != 0x10000) {
-		printk(KERN_WARNING "Warning: bad NeTTrom parameters "
-		       "detected, using defaults\n");
+		pr_warn("Warning: bad NeTTrom parameters detected, using defaults\n");
 
 		params->u1.s.nr_pages = 0x1000;	/* 16MB */
 		params->u1.s.ramdisk_size = 0;
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 528f8af..68c6ae0 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -167,8 +167,7 @@
 {
 	for (; t->hdr.size; t = tag_next(t))
 		if (!parse_tag(t))
-			printk(KERN_WARNING
-				"Ignoring unrecognised tag 0x%08x\n",
+			pr_warn("Ignoring unrecognised tag 0x%08x\n",
 				t->hdr.tag);
 }
 
@@ -193,7 +192,7 @@
 	 */
 	for_each_machine_desc(p)
 		if (machine_nr == p->nr) {
-			printk("Machine: %s\n", p->name);
+			pr_info("Machine: %s\n", p->name);
 			mdesc = p;
 			break;
 		}
diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c
index c7ff807..5a33790 100644
--- a/arch/arm/kernel/atags_proc.c
+++ b/arch/arm/kernel/atags_proc.c
@@ -41,7 +41,7 @@
 	size_t size;
 
 	if (tag->hdr.tag != ATAG_CORE) {
-		printk(KERN_INFO "No ATAGs?");
+		pr_info("No ATAGs?");
 		return -EINVAL;
 	}
 
@@ -68,7 +68,7 @@
 
 nomem:
 	kfree(b);
-	printk(KERN_ERR "Exporting ATAGs: not enough memory\n");
+	pr_err("Exporting ATAGs: not enough memory\n");
 
 	return -ENOMEM;
 }
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 17a26c1..391ffdf 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -355,7 +355,7 @@
 	/*
 	 * Report what we did for this bus
 	 */
-	printk(KERN_INFO "PCI: bus%d: Fast back to back transfers %sabled\n",
+	pr_info("PCI: bus%d: Fast back to back transfers %sabled\n",
 		bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis");
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 9f899d8..e51833f 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -395,6 +395,7 @@
 		CALL(sys_seccomp)
 		CALL(sys_getrandom)
 /* 385 */	CALL(sys_memfd_create)
+		CALL(sys_bpf)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c
index 360bb6d..84363fe 100644
--- a/arch/arm/kernel/dma-isa.c
+++ b/arch/arm/kernel/dma-isa.c
@@ -213,8 +213,8 @@
 		for (chan = 0; chan < 8; chan++) {
 			int ret = isa_dma_add(chan, &isa_dma[chan]);
 			if (ret)
-				printk(KERN_ERR "ISADMA%u: unable to register: %d\n",
-					chan, ret);
+				pr_err("ISADMA%u: unable to register: %d\n",
+				       chan, ret);
 		}
 
 		request_dma(DMA_ISA_CASCADE, "cascade");
diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c
index 7b829d9..e651c4d 100644
--- a/arch/arm/kernel/dma.c
+++ b/arch/arm/kernel/dma.c
@@ -79,7 +79,7 @@
 	return ret;
 
 bad_dma:
-	printk(KERN_ERR "dma: trying to allocate DMA%d\n", chan);
+	pr_err("dma: trying to allocate DMA%d\n", chan);
 	return -EINVAL;
 
 busy:
@@ -100,7 +100,7 @@
 		goto bad_dma;
 
 	if (dma->active) {
-		printk(KERN_ERR "dma%d: freeing active DMA\n", chan);
+		pr_err("dma%d: freeing active DMA\n", chan);
 		dma->d_ops->disable(chan, dma);
 		dma->active = 0;
 	}
@@ -111,11 +111,11 @@
 		return;
 	}
 
-	printk(KERN_ERR "dma%d: trying to free free DMA\n", chan);
+	pr_err("dma%d: trying to free free DMA\n", chan);
 	return;
 
 bad_dma:
-	printk(KERN_ERR "dma: trying to free DMA%d\n", chan);
+	pr_err("dma: trying to free DMA%d\n", chan);
 }
 EXPORT_SYMBOL(free_dma);
 
@@ -126,8 +126,7 @@
 	dma_t *dma = dma_channel(chan);
 
 	if (dma->active)
-		printk(KERN_ERR "dma%d: altering DMA SG while "
-		       "DMA active\n", chan);
+		pr_err("dma%d: altering DMA SG while DMA active\n", chan);
 
 	dma->sg = sg;
 	dma->sgcount = nr_sg;
@@ -144,8 +143,7 @@
 	dma_t *dma = dma_channel(chan);
 
 	if (dma->active)
-		printk(KERN_ERR "dma%d: altering DMA address while "
-		       "DMA active\n", chan);
+		pr_err("dma%d: altering DMA address while DMA active\n", chan);
 
 	dma->sg = NULL;
 	dma->addr = addr;
@@ -162,8 +160,7 @@
 	dma_t *dma = dma_channel(chan);
 
 	if (dma->active)
-		printk(KERN_ERR "dma%d: altering DMA count while "
-		       "DMA active\n", chan);
+		pr_err("dma%d: altering DMA count while DMA active\n", chan);
 
 	dma->sg = NULL;
 	dma->count = count;
@@ -178,8 +175,7 @@
 	dma_t *dma = dma_channel(chan);
 
 	if (dma->active)
-		printk(KERN_ERR "dma%d: altering DMA mode while "
-		       "DMA active\n", chan);
+		pr_err("dma%d: altering DMA mode while DMA active\n", chan);
 
 	dma->dma_mode = mode;
 	dma->invalid = 1;
@@ -202,7 +198,7 @@
 	return;
 
 free_dma:
-	printk(KERN_ERR "dma%d: trying to enable free DMA\n", chan);
+	pr_err("dma%d: trying to enable free DMA\n", chan);
 	BUG();
 }
 EXPORT_SYMBOL(enable_dma);
@@ -223,7 +219,7 @@
 	return;
 
 free_dma:
-	printk(KERN_ERR "dma%d: trying to disable free DMA\n", chan);
+	pr_err("dma%d: trying to disable free DMA\n", chan);
 	BUG();
 }
 EXPORT_SYMBOL(disable_dma);
@@ -240,7 +236,7 @@
 
 void set_dma_page(unsigned int chan, char pagenr)
 {
-	printk(KERN_ERR "dma%d: trying to set_dma_page\n", chan);
+	pr_err("dma%d: trying to set_dma_page\n", chan);
 }
 EXPORT_SYMBOL(set_dma_page);
 
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 6bb09d4..f8ccc21 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -109,241 +109,6 @@
 #undef CALL
 #define CALL(x) .long x
 
-#ifdef CONFIG_FUNCTION_TRACER
-/*
- * When compiling with -pg, gcc inserts a call to the mcount routine at the
- * start of every function.  In mcount, apart from the function's address (in
- * lr), we need to get hold of the function's caller's address.
- *
- * Older GCCs (pre-4.4) inserted a call to a routine called mcount like this:
- *
- *	bl	mcount
- *
- * These versions have the limitation that in order for the mcount routine to
- * be able to determine the function's caller's address, an APCS-style frame
- * pointer (which is set up with something like the code below) is required.
- *
- *	mov     ip, sp
- *	push    {fp, ip, lr, pc}
- *	sub     fp, ip, #4
- *
- * With EABI, these frame pointers are not available unless -mapcs-frame is
- * specified, and if building as Thumb-2, not even then.
- *
- * Newer GCCs (4.4+) solve this problem by introducing a new version of mcount,
- * with call sites like:
- *
- *	push	{lr}
- *	bl	__gnu_mcount_nc
- *
- * With these compilers, frame pointers are not necessary.
- *
- * mcount can be thought of as a function called in the middle of a subroutine
- * call.  As such, it needs to be transparent for both the caller and the
- * callee: the original lr needs to be restored when leaving mcount, and no
- * registers should be clobbered.  (In the __gnu_mcount_nc implementation, we
- * clobber the ip register.  This is OK because the ARM calling convention
- * allows it to be clobbered in subroutines and doesn't use it to hold
- * parameters.)
- *
- * When using dynamic ftrace, we patch out the mcount call by a "mov r0, r0"
- * for the mcount case, and a "pop {lr}" for the __gnu_mcount_nc case (see
- * arch/arm/kernel/ftrace.c).
- */
-
-#ifndef CONFIG_OLD_MCOUNT
-#if (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
-#error Ftrace requires CONFIG_FRAME_POINTER=y with GCC older than 4.4.0.
-#endif
-#endif
-
-.macro mcount_adjust_addr rd, rn
-	bic	\rd, \rn, #1		@ clear the Thumb bit if present
-	sub	\rd, \rd, #MCOUNT_INSN_SIZE
-.endm
-
-.macro __mcount suffix
-	mcount_enter
-	ldr	r0, =ftrace_trace_function
-	ldr	r2, [r0]
-	adr	r0, .Lftrace_stub
-	cmp	r0, r2
-	bne	1f
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	ldr     r1, =ftrace_graph_return
-	ldr     r2, [r1]
-	cmp     r0, r2
-	bne     ftrace_graph_caller\suffix
-
-	ldr     r1, =ftrace_graph_entry
-	ldr     r2, [r1]
-	ldr     r0, =ftrace_graph_entry_stub
-	cmp     r0, r2
-	bne     ftrace_graph_caller\suffix
-#endif
-
-	mcount_exit
-
-1: 	mcount_get_lr	r1			@ lr of instrumented func
-	mcount_adjust_addr	r0, lr		@ instrumented function
-	adr	lr, BSYM(2f)
-	mov	pc, r2
-2:	mcount_exit
-.endm
-
-.macro __ftrace_caller suffix
-	mcount_enter
-
-	mcount_get_lr	r1			@ lr of instrumented func
-	mcount_adjust_addr	r0, lr		@ instrumented function
-
-	.globl ftrace_call\suffix
-ftrace_call\suffix:
-	bl	ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl ftrace_graph_call\suffix
-ftrace_graph_call\suffix:
-	mov	r0, r0
-#endif
-
-	mcount_exit
-.endm
-
-.macro __ftrace_graph_caller
-	sub	r0, fp, #4		@ &lr of instrumented routine (&parent)
-#ifdef CONFIG_DYNAMIC_FTRACE
-	@ called from __ftrace_caller, saved in mcount_enter
-	ldr	r1, [sp, #16]		@ instrumented routine (func)
-	mcount_adjust_addr	r1, r1
-#else
-	@ called from __mcount, untouched in lr
-	mcount_adjust_addr	r1, lr	@ instrumented routine (func)
-#endif
-	mov	r2, fp			@ frame pointer
-	bl	prepare_ftrace_return
-	mcount_exit
-.endm
-
-#ifdef CONFIG_OLD_MCOUNT
-/*
- * mcount
- */
-
-.macro mcount_enter
-	stmdb	sp!, {r0-r3, lr}
-.endm
-
-.macro mcount_get_lr reg
-	ldr	\reg, [fp, #-4]
-.endm
-
-.macro mcount_exit
-	ldr	lr, [fp, #-4]
-	ldmia	sp!, {r0-r3, pc}
-.endm
-
-ENTRY(mcount)
-#ifdef CONFIG_DYNAMIC_FTRACE
-	stmdb	sp!, {lr}
-	ldr	lr, [fp, #-4]
-	ldmia	sp!, {pc}
-#else
-	__mcount _old
-#endif
-ENDPROC(mcount)
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(ftrace_caller_old)
-	__ftrace_caller _old
-ENDPROC(ftrace_caller_old)
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller_old)
-	__ftrace_graph_caller
-ENDPROC(ftrace_graph_caller_old)
-#endif
-
-.purgem mcount_enter
-.purgem mcount_get_lr
-.purgem mcount_exit
-#endif
-
-/*
- * __gnu_mcount_nc
- */
-
-.macro mcount_enter
-/*
- * This pad compensates for the push {lr} at the call site.  Note that we are
- * unable to unwind through a function which does not otherwise save its lr.
- */
- UNWIND(.pad	#4)
-	stmdb	sp!, {r0-r3, lr}
- UNWIND(.save	{r0-r3, lr})
-.endm
-
-.macro mcount_get_lr reg
-	ldr	\reg, [sp, #20]
-.endm
-
-.macro mcount_exit
-	ldmia	sp!, {r0-r3, ip, lr}
-	ret	ip
-.endm
-
-ENTRY(__gnu_mcount_nc)
-UNWIND(.fnstart)
-#ifdef CONFIG_DYNAMIC_FTRACE
-	mov	ip, lr
-	ldmia	sp!, {lr}
-	ret	ip
-#else
-	__mcount
-#endif
-UNWIND(.fnend)
-ENDPROC(__gnu_mcount_nc)
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(ftrace_caller)
-UNWIND(.fnstart)
-	__ftrace_caller
-UNWIND(.fnend)
-ENDPROC(ftrace_caller)
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-UNWIND(.fnstart)
-	__ftrace_graph_caller
-UNWIND(.fnend)
-ENDPROC(ftrace_graph_caller)
-#endif
-
-.purgem mcount_enter
-.purgem mcount_get_lr
-.purgem mcount_exit
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl return_to_handler
-return_to_handler:
-	stmdb	sp!, {r0-r3}
-	mov	r0, fp			@ frame pointer
-	bl	ftrace_return_to_handler
-	mov	lr, r0			@ r0 has real ret addr
-	ldmia	sp!, {r0-r3}
-	ret	lr
-#endif
-
-ENTRY(ftrace_stub)
-.Lftrace_stub:
-	ret	lr
-ENDPROC(ftrace_stub)
-
-#endif /* CONFIG_FUNCTION_TRACER */
-
 /*=============================================================================
  * SWI handler
  *-----------------------------------------------------------------------------
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
new file mode 100644
index 0000000..fe57c73
--- /dev/null
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -0,0 +1,243 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/assembler.h>
+#include <asm/ftrace.h>
+#include <asm/unwind.h>
+
+#include "entry-header.S"
+
+/*
+ * When compiling with -pg, gcc inserts a call to the mcount routine at the
+ * start of every function.  In mcount, apart from the function's address (in
+ * lr), we need to get hold of the function's caller's address.
+ *
+ * Older GCCs (pre-4.4) inserted a call to a routine called mcount like this:
+ *
+ *	bl	mcount
+ *
+ * These versions have the limitation that in order for the mcount routine to
+ * be able to determine the function's caller's address, an APCS-style frame
+ * pointer (which is set up with something like the code below) is required.
+ *
+ *	mov     ip, sp
+ *	push    {fp, ip, lr, pc}
+ *	sub     fp, ip, #4
+ *
+ * With EABI, these frame pointers are not available unless -mapcs-frame is
+ * specified, and if building as Thumb-2, not even then.
+ *
+ * Newer GCCs (4.4+) solve this problem by introducing a new version of mcount,
+ * with call sites like:
+ *
+ *	push	{lr}
+ *	bl	__gnu_mcount_nc
+ *
+ * With these compilers, frame pointers are not necessary.
+ *
+ * mcount can be thought of as a function called in the middle of a subroutine
+ * call.  As such, it needs to be transparent for both the caller and the
+ * callee: the original lr needs to be restored when leaving mcount, and no
+ * registers should be clobbered.  (In the __gnu_mcount_nc implementation, we
+ * clobber the ip register.  This is OK because the ARM calling convention
+ * allows it to be clobbered in subroutines and doesn't use it to hold
+ * parameters.)
+ *
+ * When using dynamic ftrace, we patch out the mcount call by a "mov r0, r0"
+ * for the mcount case, and a "pop {lr}" for the __gnu_mcount_nc case (see
+ * arch/arm/kernel/ftrace.c).
+ */
+
+#ifndef CONFIG_OLD_MCOUNT
+#if (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
+#error Ftrace requires CONFIG_FRAME_POINTER=y with GCC older than 4.4.0.
+#endif
+#endif
+
+.macro mcount_adjust_addr rd, rn
+	bic	\rd, \rn, #1		@ clear the Thumb bit if present
+	sub	\rd, \rd, #MCOUNT_INSN_SIZE
+.endm
+
+.macro __mcount suffix
+	mcount_enter
+	ldr	r0, =ftrace_trace_function
+	ldr	r2, [r0]
+	adr	r0, .Lftrace_stub
+	cmp	r0, r2
+	bne	1f
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ldr     r1, =ftrace_graph_return
+	ldr     r2, [r1]
+	cmp     r0, r2
+	bne     ftrace_graph_caller\suffix
+
+	ldr     r1, =ftrace_graph_entry
+	ldr     r2, [r1]
+	ldr     r0, =ftrace_graph_entry_stub
+	cmp     r0, r2
+	bne     ftrace_graph_caller\suffix
+#endif
+
+	mcount_exit
+
+1: 	mcount_get_lr	r1			@ lr of instrumented func
+	mcount_adjust_addr	r0, lr		@ instrumented function
+	adr	lr, BSYM(2f)
+	mov	pc, r2
+2:	mcount_exit
+.endm
+
+.macro __ftrace_caller suffix
+	mcount_enter
+
+	mcount_get_lr	r1			@ lr of instrumented func
+	mcount_adjust_addr	r0, lr		@ instrumented function
+
+	.globl ftrace_call\suffix
+ftrace_call\suffix:
+	bl	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl ftrace_graph_call\suffix
+ftrace_graph_call\suffix:
+	mov	r0, r0
+#endif
+
+	mcount_exit
+.endm
+
+.macro __ftrace_graph_caller
+	sub	r0, fp, #4		@ &lr of instrumented routine (&parent)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	@ called from __ftrace_caller, saved in mcount_enter
+	ldr	r1, [sp, #16]		@ instrumented routine (func)
+	mcount_adjust_addr	r1, r1
+#else
+	@ called from __mcount, untouched in lr
+	mcount_adjust_addr	r1, lr	@ instrumented routine (func)
+#endif
+	mov	r2, fp			@ frame pointer
+	bl	prepare_ftrace_return
+	mcount_exit
+.endm
+
+#ifdef CONFIG_OLD_MCOUNT
+/*
+ * mcount
+ */
+
+.macro mcount_enter
+	stmdb	sp!, {r0-r3, lr}
+.endm
+
+.macro mcount_get_lr reg
+	ldr	\reg, [fp, #-4]
+.endm
+
+.macro mcount_exit
+	ldr	lr, [fp, #-4]
+	ldmia	sp!, {r0-r3, pc}
+.endm
+
+ENTRY(mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	stmdb	sp!, {lr}
+	ldr	lr, [fp, #-4]
+	ldmia	sp!, {pc}
+#else
+	__mcount _old
+#endif
+ENDPROC(mcount)
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(ftrace_caller_old)
+	__ftrace_caller _old
+ENDPROC(ftrace_caller_old)
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller_old)
+	__ftrace_graph_caller
+ENDPROC(ftrace_graph_caller_old)
+#endif
+
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+#endif
+
+/*
+ * __gnu_mcount_nc
+ */
+
+.macro mcount_enter
+/*
+ * This pad compensates for the push {lr} at the call site.  Note that we are
+ * unable to unwind through a function which does not otherwise save its lr.
+ */
+ UNWIND(.pad	#4)
+	stmdb	sp!, {r0-r3, lr}
+ UNWIND(.save	{r0-r3, lr})
+.endm
+
+.macro mcount_get_lr reg
+	ldr	\reg, [sp, #20]
+.endm
+
+.macro mcount_exit
+	ldmia	sp!, {r0-r3, ip, lr}
+	ret	ip
+.endm
+
+ENTRY(__gnu_mcount_nc)
+UNWIND(.fnstart)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	mov	ip, lr
+	ldmia	sp!, {lr}
+	ret	ip
+#else
+	__mcount
+#endif
+UNWIND(.fnend)
+ENDPROC(__gnu_mcount_nc)
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(ftrace_caller)
+UNWIND(.fnstart)
+	__ftrace_caller
+UNWIND(.fnend)
+ENDPROC(ftrace_caller)
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+UNWIND(.fnstart)
+	__ftrace_graph_caller
+UNWIND(.fnend)
+ENDPROC(ftrace_graph_caller)
+#endif
+
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl return_to_handler
+return_to_handler:
+	stmdb	sp!, {r0-r3}
+	mov	r0, fp			@ frame pointer
+	bl	ftrace_return_to_handler
+	mov	lr, r0			@ r0 has real ret addr
+	ldmia	sp!, {r0-r3}
+	ret	lr
+#endif
+
+ENTRY(ftrace_stub)
+.Lftrace_stub:
+	ret	lr
+ENDPROC(ftrace_stub)
diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c
index 131a6ab..8b96972 100644
--- a/arch/arm/kernel/etm.c
+++ b/arch/arm/kernel/etm.c
@@ -213,7 +213,7 @@
 	int length;
 
 	if (!t->etb_regs) {
-		printk(KERN_INFO "No tracing hardware found\n");
+		pr_info("No tracing hardware found\n");
 		return;
 	}
 
@@ -229,11 +229,11 @@
 
 	etb_writel(t, first, ETBR_READADDR);
 
-	printk(KERN_INFO "Trace buffer contents length: %d\n", length);
-	printk(KERN_INFO "--- ETB buffer begin ---\n");
+	pr_info("Trace buffer contents length: %d\n", length);
+	pr_info("--- ETB buffer begin ---\n");
 	for (; length; length--)
 		printk("%08x", cpu_to_be32(etb_readl(t, ETBR_READMEM)));
-	printk(KERN_INFO "\n--- ETB buffer end ---\n");
+	pr_info("\n--- ETB buffer end ---\n");
 
 	/* deassert the overflow bit */
 	etb_writel(t, 1, ETBR_CTRL);
@@ -633,14 +633,14 @@
 
 	retval = amba_driver_register(&etb_driver);
 	if (retval) {
-		printk(KERN_ERR "Failed to register etb\n");
+		pr_err("Failed to register etb\n");
 		return retval;
 	}
 
 	retval = amba_driver_register(&etm_driver);
 	if (retval) {
 		amba_driver_unregister(&etb_driver);
-		printk(KERN_ERR "Failed to probe etm\n");
+		pr_err("Failed to probe etm\n");
 		return retval;
 	}
 
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index b37752a..059c3da 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -124,7 +124,7 @@
 void release_fiq(struct fiq_handler *f)
 {
 	if (current_fiq != f) {
-		printk(KERN_ERR "%s FIQ trying to release %s FIQ\n",
+		pr_err("%s FIQ trying to release %s FIQ\n",
 		       f->name, current_fiq->name);
 		dump_stack();
 		return;
diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c
index 9203cf8..eedefe0 100644
--- a/arch/arm/kernel/io.c
+++ b/arch/arm/kernel/io.c
@@ -51,6 +51,7 @@
 		from++;
 	}
 }
+EXPORT_SYMBOL(_memcpy_fromio);
 
 /*
  * Copy data from "real" memory space to IO memory space.
@@ -66,6 +67,7 @@
 		to++;
 	}
 }
+EXPORT_SYMBOL(_memcpy_toio);
 
 /*
  * "memset" on IO memory space.
@@ -79,7 +81,4 @@
 		dst++;
 	}
 }
-
-EXPORT_SYMBOL(_memcpy_fromio);
-EXPORT_SYMBOL(_memcpy_toio);
 EXPORT_SYMBOL(_memset_io);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 7c81ec4..ad857bad 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -31,6 +31,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
+#include <linux/ratelimit.h>
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/kallsyms.h>
@@ -82,7 +83,7 @@
 	unsigned long clr = 0, set = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
 
 	if (irq >= nr_irqs) {
-		printk(KERN_ERR "Trying to set irq flags for IRQ%d\n", irq);
+		pr_err("Trying to set irq flags for IRQ%d\n", irq);
 		return;
 	}
 
@@ -135,7 +136,6 @@
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
-
 static bool migrate_one_irq(struct irq_desc *desc)
 {
 	struct irq_data *d = irq_desc_get_irq_data(desc);
@@ -187,8 +187,8 @@
 		affinity_broken = migrate_one_irq(desc);
 		raw_spin_unlock(&desc->lock);
 
-		if (affinity_broken && printk_ratelimit())
-			pr_warn("IRQ%u no longer affine to CPU%u\n",
+		if (affinity_broken)
+			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
 				i, smp_processor_id());
 	}
 
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index ad58e56..49fadbd 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -58,6 +58,7 @@
 #define MMX_SIZE		(0x98)
 
 	.text
+	.arm
 
 /*
  * Lazy switching of Concan coprocessor context
@@ -182,6 +183,8 @@
 	tmcr	wCon, r2
 	ret	lr
 
+ENDPROC(iwmmxt_task_enable)
+
 /*
  * Back up Concan regs to save area and disable access to them
  * (mainly for gdb or sleep mode usage)
@@ -232,6 +235,8 @@
 1:	msr	cpsr_c, ip			@ restore interrupt mode
 	ldmfd	sp!, {r4, pc}
 
+ENDPROC(iwmmxt_task_disable)
+
 /*
  * Copy Concan state to given memory address
  *
@@ -268,6 +273,8 @@
 	msr	cpsr_c, ip			@ restore interrupt mode
 	ret	r3
 
+ENDPROC(iwmmxt_task_copy)
+
 /*
  * Restore Concan state from given memory address
  *
@@ -304,6 +311,8 @@
 	msr	cpsr_c, ip			@ restore interrupt mode
 	ret	r3
 
+ENDPROC(iwmmxt_task_restore)
+
 /*
  * Concan handling on task switch
  *
@@ -335,6 +344,8 @@
 	mrc	p15, 0, r1, c2, c0, 0
 	sub	pc, lr, r1, lsr #32		@ cpwait and return
 
+ENDPROC(iwmmxt_task_switch)
+
 /*
  * Remove Concan ownership of given task
  *
@@ -353,6 +364,8 @@
 	msr	cpsr_c, r2			@ restore interrupts
 	ret	lr
 
+ENDPROC(iwmmxt_task_release)
+
 	.data
 concan_owner:
 	.word	0
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 4423a56..de2b085 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -127,12 +127,12 @@
 		msecs--;
 	}
 	if (atomic_read(&waiting_for_crash_ipi) > 0)
-		printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n");
+		pr_warn("Non-crashing CPUs did not react to IPI\n");
 
 	crash_save_cpu(regs, smp_processor_id());
 	machine_kexec_mask_interrupts();
 
-	printk(KERN_INFO "Loading crashdump kernel...\n");
+	pr_info("Loading crashdump kernel...\n");
 }
 
 /*
@@ -178,7 +178,7 @@
 	reboot_entry_phys = (unsigned long)reboot_entry +
 		(reboot_code_buffer_phys - (unsigned long)reboot_code_buffer);
 
-	printk(KERN_INFO "Bye!\n");
+	pr_info("Bye!\n");
 
 	if (kexec_reinit)
 		kexec_reinit();
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 6a4dffe..bea7db9 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -251,7 +251,7 @@
 #endif
 
 		default:
-			printk(KERN_ERR "%s: unknown relocation: %u\n",
+			pr_err("%s: unknown relocation: %u\n",
 			       module->name, ELF32_R_TYPE(rel->r_info));
 			return -ENOEXEC;
 		}
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fe972a2..fdfa3a7 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -51,8 +51,8 @@
 static const char *processor_modes[] __maybe_unused = {
   "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" ,
   "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26",
-  "USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "UK6_32" , "ABT_32" ,
-  "UK8_32" , "UK9_32" , "UK10_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32"
+  "USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "MON_32" , "ABT_32" ,
+  "UK8_32" , "UK9_32" , "HYP_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32"
 };
 
 static const char *isa_modes[] __maybe_unused = {
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index 98ea4b7..24b4a04 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -39,13 +39,12 @@
 {
 	struct return_address_data data;
 	struct stackframe frame;
-	register unsigned long current_sp asm ("sp");
 
 	data.level = level + 2;
 	data.addr = NULL;
 
 	frame.fp = (unsigned long)__builtin_frame_address(0);
-	frame.sp = current_sp;
+	frame.sp = current_stack_pointer;
 	frame.lr = (unsigned long)__builtin_return_address(0);
 	frame.pc = (unsigned long)return_address;
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c031063..8361652 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -900,6 +900,7 @@
 		mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type);
 	machine_desc = mdesc;
 	machine_name = mdesc->name;
+	dump_stack_set_arch_desc("%s", mdesc->name);
 
 	if (mdesc->reboot_mode != REBOOT_HARD)
 		reboot_mode = mdesc->reboot_mode;
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index bd19834..8aa6f1b 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -592,7 +592,6 @@
 				}
 				syscall = 0;
 			} else if (thread_flags & _TIF_UPROBE) {
-				clear_thread_flag(TIF_UPROBE);
 				uprobe_notify_resume(regs);
 			} else {
 				clear_thread_flag(TIF_NOTIFY_RESUME);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 13396d3..5e6052e 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -225,7 +225,7 @@
 		pr_err("CPU%u: cpu didn't die\n", cpu);
 		return;
 	}
-	printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
+	pr_notice("CPU%u: shutdown\n", cpu);
 
 	/*
 	 * platform_cpu_kill() is generally expected to do the powering off
@@ -235,7 +235,7 @@
 	 * the requesting CPU and the dying CPU actually losing power.
 	 */
 	if (!platform_cpu_kill(cpu))
-		printk("CPU%u: unable to kill\n", cpu);
+		pr_err("CPU%u: unable to kill\n", cpu);
 }
 
 /*
@@ -351,7 +351,7 @@
 
 	cpu_init();
 
-	printk("CPU%u: Booted secondary processor\n", cpu);
+	pr_debug("CPU%u: Booted secondary processor\n", cpu);
 
 	preempt_disable();
 	trace_hardirqs_off();
@@ -387,9 +387,6 @@
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
-	printk(KERN_INFO "SMP: Total of %d processors activated.\n",
-	       num_online_cpus());
-
 	hyp_mode_check();
 }
 
@@ -521,7 +518,7 @@
 	if (system_state == SYSTEM_BOOTING ||
 	    system_state == SYSTEM_RUNNING) {
 		raw_spin_lock(&stop_lock);
-		printk(KERN_CRIT "CPU%u: stopping\n", cpu);
+		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();
 		raw_spin_unlock(&stop_lock);
 	}
@@ -615,8 +612,8 @@
 		break;
 
 	default:
-		printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
-		       cpu, ipinr);
+		pr_crit("CPU%u: Unknown IPI message 0x%x\n",
+		        cpu, ipinr);
 		break;
 	}
 
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 9309021..172c6a05 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -199,7 +199,7 @@
 	 * the timer ticks
 	 */
 	if (twd_timer_rate == 0) {
-		printk(KERN_INFO "Calibrating local timer... ");
+		pr_info("Calibrating local timer... ");
 
 		/* Wait for a tick to start */
 		waitjiffies = get_jiffies_64() + 1;
@@ -223,7 +223,7 @@
 
 		twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
 
-		printk("%lu.%02luMHz.\n", twd_timer_rate / 1000000,
+		pr_cont("%lu.%02luMHz.\n", twd_timer_rate / 1000000,
 			(twd_timer_rate / 10000) % 100);
 	}
 }
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index f065eb05..92b7237 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -134,12 +134,10 @@
 		frame.pc = thread_saved_pc(tsk);
 #endif
 	} else {
-		register unsigned long current_sp asm ("sp");
-
 		/* We don't want this function nor the caller */
 		data.skip += 2;
 		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.sp = current_sp;
+		frame.sp = current_stack_pointer;
 		frame.lr = (unsigned long)__builtin_return_address(0);
 		frame.pc = (unsigned long)__save_stack_trace;
 	}
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 587fdfe..afdd51e 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -260,7 +260,7 @@
 		return -ENOMEM;
 #endif /* CONFIG_PROC_FS */
 
-	printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n");
+	pr_notice("Registering SWP/SWPB emulation handler\n");
 	register_undef_hook(&swp_hook);
 
 	return 0;
diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c
index 80f0d69..8ff8dbf 100644
--- a/arch/arm/kernel/thumbee.c
+++ b/arch/arm/kernel/thumbee.c
@@ -72,7 +72,7 @@
 	if ((pfr0 & 0x0000f000) != 0x00001000)
 		return 0;
 
-	printk(KERN_INFO "ThumbEE CPU extension supported.\n");
+	pr_info("ThumbEE CPU extension supported.\n");
 	elf_hwcap |= HWCAP_THUMBEE;
 	thread_register_notifier(&thumbee_notifier_block);
 
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 89cfdd6..08b7847 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -165,7 +165,7 @@
 
 	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
 
-	printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
+	pr_info("CPU%u: update cpu_capacity %lu\n",
 		cpu, arch_scale_cpu_capacity(NULL, cpu));
 }
 
@@ -269,7 +269,7 @@
 
 	update_cpu_capacity(cpuid);
 
-	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
+	pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
 		cpu_topology[cpuid].core_id,
 		cpu_topology[cpuid].socket_id, mpidr);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 0c8b108..788e23f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -198,14 +198,14 @@
 	}
 
 	if (!fp) {
-		printk("no frame pointer");
+		pr_cont("no frame pointer");
 		ok = 0;
 	} else if (verify_stack(fp)) {
-		printk("invalid frame pointer 0x%08x", fp);
+		pr_cont("invalid frame pointer 0x%08x", fp);
 		ok = 0;
 	} else if (fp < (unsigned long)end_of_stack(tsk))
-		printk("frame pointer underflow");
-	printk("\n");
+		pr_cont("frame pointer underflow");
+	pr_cont("\n");
 
 	if (ok)
 		c_backtrace(fp, mode);
@@ -240,8 +240,8 @@
 	static int die_counter;
 	int ret;
 
-	printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP
-	       S_ISA "\n", str, err, ++die_counter);
+	pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP S_ISA "\n",
+	         str, err, ++die_counter);
 
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
@@ -250,8 +250,8 @@
 
 	print_modules();
 	__show_regs(regs);
-	printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
-		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
+	pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
+		 TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
 
 	if (!user_mode(regs) || in_interrupt()) {
 		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
@@ -446,7 +446,7 @@
 die_sig:
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_UNDEFINED) {
-		printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
+		pr_info("%s (%d): undefined instruction: pc=%p\n",
 			current->comm, task_pid_nr(current), pc);
 		__show_regs(regs);
 		dump_instr(KERN_INFO, regs);
@@ -496,7 +496,7 @@
 {
 	console_verbose();
 
-	printk(KERN_CRIT "Bad mode in %s handler detected\n", handler[reason]);
+	pr_crit("Bad mode in %s handler detected\n", handler[reason]);
 
 	die("Oops - bad mode", regs, 0);
 	local_irq_disable();
@@ -516,7 +516,7 @@
 
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_SYSCALL) {
-		printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
+		pr_err("[%d] %s: obsolete system call %08x.\n",
 			task_pid_nr(current), current->comm, n);
 		dump_instr(KERN_ERR, regs);
 	}
@@ -533,8 +533,6 @@
 	return regs->ARM_r0;
 }
 
-static long do_cache_op_restart(struct restart_block *);
-
 static inline int
 __do_cache_op(unsigned long start, unsigned long end)
 {
@@ -543,24 +541,8 @@
 	do {
 		unsigned long chunk = min(PAGE_SIZE, end - start);
 
-		if (signal_pending(current)) {
-			struct thread_info *ti = current_thread_info();
-
-			ti->restart_block = (struct restart_block) {
-				.fn	= do_cache_op_restart,
-			};
-
-			ti->arm_restart_block = (struct arm_restart_block) {
-				{
-					.cache = {
-						.start	= start,
-						.end	= end,
-					},
-				},
-			};
-
-			return -ERESTART_RESTARTBLOCK;
-		}
+		if (fatal_signal_pending(current))
+			return 0;
 
 		ret = flush_cache_user_range(start, start + chunk);
 		if (ret)
@@ -573,15 +555,6 @@
 	return 0;
 }
 
-static long do_cache_op_restart(struct restart_block *unused)
-{
-	struct arm_restart_block *restart_block;
-
-	restart_block = &current_thread_info()->arm_restart_block;
-	return __do_cache_op(restart_block->cache.start,
-			     restart_block->cache.end);
-}
-
 static inline int
 do_cache_op(unsigned long start, unsigned long end, int flags)
 {
@@ -721,7 +694,7 @@
 	 * something catastrophic has happened
 	 */
 	if (user_debug & UDBG_SYSCALL) {
-		printk("[%d] %s: arm syscall %d\n",
+		pr_err("[%d] %s: arm syscall %d\n",
 		       task_pid_nr(current), current->comm, no);
 		dump_instr("", regs);
 		if (user_mode(regs)) {
@@ -780,8 +753,8 @@
 
 void __bad_xchg(volatile void *ptr, int size)
 {
-	printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
-		__builtin_return_address(0), ptr, size);
+	pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
+	       __builtin_return_address(0), ptr, size);
 	BUG();
 }
 EXPORT_SYMBOL(__bad_xchg);
@@ -798,8 +771,8 @@
 
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_BADABORT) {
-		printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
-			task_pid_nr(current), current->comm, code, instr);
+		pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n",
+		       task_pid_nr(current), current->comm, code, instr);
 		dump_instr(KERN_ERR, regs);
 		show_pte(current->mm, addr);
 	}
@@ -815,29 +788,29 @@
 
 void __readwrite_bug(const char *fn)
 {
-	printk("%s called, but not implemented\n", fn);
+	pr_err("%s called, but not implemented\n", fn);
 	BUG();
 }
 EXPORT_SYMBOL(__readwrite_bug);
 
 void __pte_error(const char *file, int line, pte_t pte)
 {
-	printk("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte));
+	pr_err("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte));
 }
 
 void __pmd_error(const char *file, int line, pmd_t pmd)
 {
-	printk("%s:%d: bad pmd %08llx.\n", file, line, (long long)pmd_val(pmd));
+	pr_err("%s:%d: bad pmd %08llx.\n", file, line, (long long)pmd_val(pmd));
 }
 
 void __pgd_error(const char *file, int line, pgd_t pgd)
 {
-	printk("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd));
+	pr_err("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd));
 }
 
 asmlinkage void __div0(void)
 {
-	printk("Division by zero in kernel.\n");
+	pr_err("Division by zero in kernel.\n");
 	dump_stack();
 }
 EXPORT_SYMBOL(__div0);
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index cbb85c5..0bee233 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -471,7 +471,6 @@
 void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	register unsigned long current_sp asm ("sp");
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
@@ -485,7 +484,7 @@
 			frame.pc = regs->ARM_lr;
 	} else if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.sp = current_sp;
+		frame.sp = current_stack_pointer;
 		frame.lr = (unsigned long)__builtin_return_address(0);
 		frame.pc = (unsigned long)unwind_backtrace;
 	} else {
diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c
index e42adc6..bdbb8853 100644
--- a/arch/arm/kernel/xscale-cp0.c
+++ b/arch/arm/kernel/xscale-cp0.c
@@ -157,15 +157,14 @@
 
 	if (cpu_has_iwmmxt()) {
 #ifndef CONFIG_IWMMXT
-		printk(KERN_WARNING "CAUTION: XScale iWMMXt coprocessor "
-			"detected, but kernel support is missing.\n");
+		pr_warn("CAUTION: XScale iWMMXt coprocessor detected, but kernel support is missing.\n");
 #else
-		printk(KERN_INFO "XScale iWMMXt coprocessor detected.\n");
+		pr_info("XScale iWMMXt coprocessor detected.\n");
 		elf_hwcap |= HWCAP_IWMMXT;
 		thread_register_notifier(&iwmmxt_notifier_block);
 #endif
 	} else {
-		printk(KERN_INFO "XScale DSP coprocessor detected.\n");
+		pr_info("XScale DSP coprocessor detected.\n");
 		thread_register_notifier(&dsp_notifier_block);
 		cp_access |= 1;
 	}
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 66a477a..7a235b9 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 /*
  * Prototype:
@@ -77,6 +78,10 @@
 	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
 	.endm
 
+	.macro usave reg1 reg2
+	UNWIND(	.save {r0, r2, r3, \reg1, \reg2}	)
+	.endm
+
 	.macro exit reg1 reg2
 	add	sp, sp, #8
 	ldmfd	sp!, {r0, \reg1, \reg2}
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 3bc8eb8..652e4d9 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -53,6 +53,12 @@
  *	data as needed by the implementation including this code. Called
  *	upon code entry.
  *
+ * usave reg1 reg2
+ *
+ *	Unwind annotation macro is corresponding for 'enter' macro.
+ *	It tell unwinder that preserved some provided registers on the stack
+ *	and additional data by a prior 'enter' macro.
+ *
  * exit reg1 reg2
  *
  *	Restore registers with the values previously saved with the
@@ -67,7 +73,12 @@
  */
 
 
+	UNWIND(	.fnstart			)
 		enter	r4, lr
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+		usave	r4, lr			  @ in first stmdb block
 
 		subs	r2, r2, #4
 		blt	8f
@@ -79,6 +90,11 @@
 
 1:		subs	r2, r2, #(28)
 		stmfd	sp!, {r5 - r8}
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+		usave	r4, lr
+	UNWIND(	.save	{r5 - r8}		) @ in second stmfd block
 		blt	5f
 
 	CALGN(	ands	ip, r0, #31		)
@@ -144,7 +160,10 @@
 	CALGN(	bcs	2b			)
 
 7:		ldmfd	sp!, {r5 - r8}
+	UNWIND(	.fnend				) @ end of second stmfd block
 
+	UNWIND(	.fnstart			)
+		usave	r4, lr			  @ still in first stmdb block
 8:		movs	r2, r2, lsl #31
 		ldr1b	r1, r3, ne, abort=21f
 		ldr1b	r1, r4, cs, abort=21f
@@ -173,10 +192,13 @@
 		ldr1w	r1, lr, abort=21f
 		beq	17f
 		bgt	18f
+	UNWIND(	.fnend				)
 
 
 		.macro	forward_copy_shift pull push
 
+	UNWIND(	.fnstart			)
+		usave	r4, lr			  @ still in first stmdb block
 		subs	r2, r2, #28
 		blt	14f
 
@@ -187,7 +209,11 @@
 	CALGN(	bcc	15f			)
 
 11:		stmfd	sp!, {r5 - r9}
+	UNWIND(	.fnend				)
 
+	UNWIND(	.fnstart			)
+		usave	r4, lr
+	UNWIND(	.save	{r5 - r9}		) @ in new second stmfd block
 	PLD(	pld	[r1, #0]		)
 	PLD(	subs	r2, r2, #96		)
 	PLD(	pld	[r1, #28]		)
@@ -221,7 +247,10 @@
 	PLD(	bge	13b			)
 
 		ldmfd	sp!, {r5 - r9}
+	UNWIND(	.fnend				) @ end of the second stmfd block
 
+	UNWIND(	.fnstart			)
+		usave	r4, lr			  @ still in first stmdb block
 14:		ands	ip, r2, #28
 		beq	16f
 
@@ -236,6 +265,7 @@
 
 16:		sub	r1, r1, #(\push / 8)
 		b	8b
+	UNWIND(	.fnend				)
 
 		.endm
 
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index d066df6..a9d3db1 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 /*
  * Prototype:
@@ -80,6 +81,10 @@
 	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
 	.endm
 
+	.macro usave reg1 reg2
+	UNWIND(	.save {r0, r2, r3, \reg1, \reg2}	)
+	.endm
+
 	.macro exit reg1 reg2
 	add	sp, sp, #8
 	ldmfd	sp!, {r0, \reg1, \reg2}
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index a9b9e22..7797e81 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 #define LDR1W_SHIFT	0
 #define STR1W_SHIFT	0
@@ -48,6 +49,10 @@
 	stmdb sp!, {r0, \reg1, \reg2}
 	.endm
 
+	.macro usave reg1 reg2
+	UNWIND(	.save	{r0, \reg1, \reg2}	)
+	.endm
+
 	.macro exit reg1 reg2
 	ldmfd sp!, {r0, \reg1, \reg2}
 	.endm
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index d1fc0c0..69a9d47 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 		.text
 
@@ -27,12 +28,17 @@
  */
 
 ENTRY(memmove)
+	UNWIND(	.fnstart			)
 
 		subs	ip, r0, r1
 		cmphi	r2, ip
 		bls	memcpy
 
 		stmfd	sp!, {r0, r4, lr}
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		) @ in first stmfd block
 		add	r1, r1, r2
 		add	r0, r0, r2
 		subs	r2, r2, #4
@@ -45,6 +51,11 @@
 
 1:		subs	r2, r2, #(28)
 		stmfd	sp!, {r5 - r8}
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		)
+	UNWIND(	.save	{r5 - r8}		) @ in second stmfd block
 		blt	5f
 
 	CALGN(	ands	ip, r0, #31		)
@@ -97,6 +108,10 @@
 	CALGN(	bcs	2b			)
 
 7:		ldmfd	sp!, {r5 - r8}
+	UNWIND(	.fnend				) @ end of second stmfd block
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		) @ still in first stmfd block
 
 8:		movs	r2, r2, lsl #31
 		ldrneb	r3, [r1, #-1]!
@@ -124,10 +139,13 @@
 		ldr	r3, [r1, #0]
 		beq	17f
 		blt	18f
+	UNWIND(	.fnend				)
 
 
 		.macro	backward_copy_shift push pull
 
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		) @ still in first stmfd block
 		subs	r2, r2, #28
 		blt	14f
 
@@ -137,6 +155,11 @@
 	CALGN(	bcc	15f			)
 
 11:		stmfd	sp!, {r5 - r9}
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		)
+	UNWIND(	.save	{r5 - r9}		) @ in new second stmfd block
 
 	PLD(	pld	[r1, #-4]		)
 	PLD(	subs	r2, r2, #96		)
@@ -171,6 +194,10 @@
 	PLD(	bge	13b			)
 
 		ldmfd	sp!, {r5 - r9}
+	UNWIND(	.fnend				) @ end of the second stmfd block
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save {r0, r4, lr}		) @ still in first stmfd block
 
 14:		ands	ip, r2, #28
 		beq	16f
@@ -186,6 +213,7 @@
 
 16:		add	r1, r1, #(\pull / 8)
 		b	8b
+	UNWIND(	.fnend				)
 
 		.endm
 
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 671455c..a4ee97b 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -11,11 +11,13 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 	.text
 	.align	5
 
 ENTRY(memset)
+UNWIND( .fnstart         )
 	ands	r3, r0, #3		@ 1 unaligned?
 	mov	ip, r0			@ preserve r0 as return value
 	bne	6f			@ 1
@@ -34,6 +36,9 @@
  * We need 2 extra registers for this loop - use r8 and the LR
  */
 	stmfd	sp!, {r8, lr}
+UNWIND( .fnend              )
+UNWIND( .fnstart            )
+UNWIND( .save {r8, lr}      )
 	mov	r8, r1
 	mov	lr, r1
 
@@ -53,6 +58,7 @@
 	tst	r2, #16
 	stmneia	ip!, {r1, r3, r8, lr}
 	ldmfd	sp!, {r8, lr}
+UNWIND( .fnend              )
 
 #else
 
@@ -62,6 +68,9 @@
  */
 
 	stmfd	sp!, {r4-r8, lr}
+UNWIND( .fnend                 )
+UNWIND( .fnstart               )
+UNWIND( .save {r4-r8, lr}      )
 	mov	r4, r1
 	mov	r5, r1
 	mov	r6, r1
@@ -94,9 +103,11 @@
 	tst	r2, #16
 	stmneia	ip!, {r4-r7}
 	ldmfd	sp!, {r4-r8, lr}
+UNWIND( .fnend                 )
 
 #endif
 
+UNWIND( .fnstart            )
 4:	tst	r2, #8
 	stmneia	ip!, {r1, r3}
 	tst	r2, #4
@@ -120,4 +131,5 @@
 	strb	r1, [ip], #1		@ 1
 	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
 	b	1b
+UNWIND( .fnend   )
 ENDPROC(memset)
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index 385ccb3..0eded95 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -9,6 +9,7 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 	.text
 	.align	5
@@ -18,6 +19,7 @@
  * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
  * don't bother; we use byte stores instead.
  */
+UNWIND(	.fnstart			)
 1:	subs	r1, r1, #4		@ 1 do we have enough
 	blt	5f			@ 1 bytes to align with?
 	cmp	r3, #2			@ 1
@@ -47,6 +49,9 @@
  * use the LR
  */
 	str	lr, [sp, #-4]!		@ 1
+UNWIND(	.fnend				)
+UNWIND(	.fnstart			)
+UNWIND(	.save 	{lr}			)
 	mov	ip, r2			@ 1
 	mov	lr, r2			@ 1
 
@@ -66,6 +71,7 @@
 	tst	r1, #16			@ 1 16 bytes or more?
 	stmneia	r0!, {r2, r3, ip, lr}	@ 4
 	ldr	lr, [sp], #4		@ 1
+UNWIND(	.fnend				)
 
 #else
 
@@ -75,6 +81,9 @@
  */
 
 	stmfd	sp!, {r4-r7, lr}
+UNWIND(	.fnend		       )
+UNWIND(	.fnstart	       )
+UNWIND(	.save 	{r4-r7, lr}    )
 	mov	r4, r2
 	mov	r5, r2
 	mov	r6, r2
@@ -105,9 +114,11 @@
 	tst	r1, #16
 	stmneia	r0!, {r4-r7}
 	ldmfd	sp!, {r4-r7, lr}
+UNWIND(	.fnend		       )
 
 #endif
 
+UNWIND(	.fnstart			)
 4:	tst	r1, #8			@ 1 8 bytes or more?
 	stmneia	r0!, {r2, r3}		@ 2
 	tst	r1, #4			@ 1 4 bytes or more?
@@ -122,4 +133,5 @@
 	tst	r1, #1			@ 1 a byte left over
 	strneb	r2, [r0], #1		@ 1
 	ret	lr			@ 1
+UNWIND(	.fnend				)
 ENDPROC(__memzero)
diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c
index 9fa6a99..03c75a8 100644
--- a/arch/arm/mach-sa1100/clock.c
+++ b/arch/arm/mach-sa1100/clock.c
@@ -15,10 +15,12 @@
 #include <linux/clkdev.h>
 
 #include <mach/hardware.h>
+#include <mach/generic.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
 	void			(*disable)(struct clk *);
+	unsigned long		(*get_rate)(struct clk *);
 };
 
 struct clk {
@@ -33,13 +35,6 @@
 
 static DEFINE_SPINLOCK(clocks_lock);
 
-/* Dummy clk routine to build generic kernel parts that may be using them */
-unsigned long clk_get_rate(struct clk *clk)
-{
-	return 0;
-}
-EXPORT_SYMBOL(clk_get_rate);
-
 static void clk_gpio27_enable(struct clk *clk)
 {
 	/*
@@ -58,6 +53,19 @@
 	GAFR &= ~GPIO_32_768kHz;
 }
 
+static void clk_cpu_enable(struct clk *clk)
+{
+}
+
+static void clk_cpu_disable(struct clk *clk)
+{
+}
+
+static unsigned long clk_cpu_get_rate(struct clk *clk)
+{
+	return sa11x0_getspeed(0) * 1000;
+}
+
 int clk_enable(struct clk *clk)
 {
 	unsigned long flags;
@@ -87,16 +95,37 @@
 }
 EXPORT_SYMBOL(clk_disable);
 
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (clk && clk->ops && clk->ops->get_rate)
+		return clk->ops->get_rate(clk);
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
 const struct clkops clk_gpio27_ops = {
 	.enable		= clk_gpio27_enable,
 	.disable	= clk_gpio27_disable,
 };
 
+const struct clkops clk_cpu_ops = {
+	.enable		= clk_cpu_enable,
+	.disable	= clk_cpu_disable,
+	.get_rate	= clk_cpu_get_rate,
+};
+
 static DEFINE_CLK(gpio27, &clk_gpio27_ops);
 
+static DEFINE_CLK(cpu, &clk_cpu_ops);
+
 static struct clk_lookup sa11xx_clkregs[] = {
 	CLKDEV_INIT("sa1111.0", NULL, &clk_gpio27),
 	CLKDEV_INIT("sa1100-rtc", NULL, NULL),
+	CLKDEV_INIT("sa11x0-fb", NULL, &clk_cpu),
+	CLKDEV_INIT("sa11x0-pcmcia", NULL, &clk_cpu),
+	/* sa1111 names devices using internal offsets, PCMCIA is at 0x1800 */
+	CLKDEV_INIT("1800", NULL, &clk_cpu),
 };
 
 static int __init sa11xx_clk_init(void)
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 108939f..b90c7d8 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -30,7 +30,7 @@
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 #include <linux/gpio.h>
-#include <linux/pda_power.h>
+#include <linux/power/gpio-charger.h>
 
 #include <video/sa1100fb.h>
 
@@ -131,62 +131,24 @@
 /*
  * Collie AC IN
  */
-static int collie_power_init(struct device *dev)
-{
-	int ret = gpio_request(COLLIE_GPIO_AC_IN, "ac in");
-	if (ret)
-		goto err_gpio_req;
-
-	ret = gpio_direction_input(COLLIE_GPIO_AC_IN);
-	if (ret)
-		goto err_gpio_in;
-
-	return 0;
-
-err_gpio_in:
-	gpio_free(COLLIE_GPIO_AC_IN);
-err_gpio_req:
-	return ret;
-}
-
-static void collie_power_exit(struct device *dev)
-{
-	gpio_free(COLLIE_GPIO_AC_IN);
-}
-
-static int collie_power_ac_online(void)
-{
-	return gpio_get_value(COLLIE_GPIO_AC_IN) == 2;
-}
-
 static char *collie_ac_supplied_to[] = {
 	"main-battery",
 	"backup-battery",
 };
 
-static struct pda_power_pdata collie_power_data = {
-	.init			= collie_power_init,
-	.is_ac_online		= collie_power_ac_online,
-	.exit			= collie_power_exit,
+
+static struct gpio_charger_platform_data collie_power_data = {
+	.name			= "charger",
+	.type			= POWER_SUPPLY_TYPE_MAINS,
+	.gpio			= COLLIE_GPIO_AC_IN,
 	.supplied_to		= collie_ac_supplied_to,
 	.num_supplicants	= ARRAY_SIZE(collie_ac_supplied_to),
 };
 
-static struct resource collie_power_resource[] = {
-	{
-		.name		= "ac",
-		.flags		= IORESOURCE_IRQ |
-				  IORESOURCE_IRQ_HIGHEDGE |
-				  IORESOURCE_IRQ_LOWEDGE,
-	},
-};
-
 static struct platform_device collie_power_device = {
-	.name			= "pda-power",
+	.name			= "gpio-charger",
 	.id			= -1,
 	.dev.platform_data	= &collie_power_data,
-	.resource		= collie_power_resource,
-	.num_resources		= ARRAY_SIZE(collie_power_resource),
 };
 
 #ifdef CONFIG_SHARP_LOCOMO
@@ -420,9 +382,6 @@
 
 	GPSR |= _COLLIE_GPIO_UCB1x00_RESET;
 
-	collie_power_resource[0].start = gpio_to_irq(COLLIE_GPIO_AC_IN);
-	collie_power_resource[0].end = gpio_to_irq(COLLIE_GPIO_AC_IN);
-
 	sa11x0_ppc_configure_mcp();
 
 
diff --git a/arch/arm/mach-sa1100/include/mach/entry-macro.S b/arch/arm/mach-sa1100/include/mach/entry-macro.S
deleted file mode 100644
index 8cf7630..0000000
--- a/arch/arm/mach-sa1100/include/mach/entry-macro.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * arch/arm/mach-sa1100/include/mach/entry-macro.S
- *
- * Low-level IRQ helper macros for SA1100-based platforms
- *
- * This file is licensed under  the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-		.macro  get_irqnr_preamble, base, tmp
-		mov	\base, #0xfa000000		@ ICIP = 0xfa050000
-		add	\base, \base, #0x00050000
-		.endm
-
-		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-		ldr	\irqstat, [\base]		@ get irqs
-		ldr	\irqnr, [\base, #4]		@ ICMR = 0xfa050004
-		ands	\irqstat, \irqstat, \irqnr
-		mov	\irqnr, #0
-		beq	1001f
-		tst	\irqstat, #0xff
-		moveq	\irqstat, \irqstat, lsr #8
-		addeq	\irqnr, \irqnr, #8
-		tsteq	\irqstat, #0xff
-		moveq	\irqstat, \irqstat, lsr #8
-		addeq	\irqnr, \irqnr, #8
-		tsteq	\irqstat, #0xff
-		moveq	\irqstat, \irqstat, lsr #8
-		addeq	\irqnr, \irqnr, #8
-		tst	\irqstat, #0x0f
-		moveq	\irqstat, \irqstat, lsr #4
-		addeq	\irqnr, \irqnr, #4
-		tst	\irqstat, #0x03
-		moveq	\irqstat, \irqstat, lsr #2
-		addeq	\irqnr, \irqnr, #2
-		tst	\irqstat, #0x01
-		addeqs	\irqnr, \irqnr, #1
-1001:
-		.endm
-
diff --git a/arch/arm/mach-sa1100/include/mach/irqs.h b/arch/arm/mach-sa1100/include/mach/irqs.h
index 3790298..de09834 100644
--- a/arch/arm/mach-sa1100/include/mach/irqs.h
+++ b/arch/arm/mach-sa1100/include/mach/irqs.h
@@ -8,56 +8,56 @@
  * 2001/11/14	RMK	Cleaned up and standardised a lot of the IRQs.
  */
 
-#define	IRQ_GPIO0		0
-#define	IRQ_GPIO1		1
-#define	IRQ_GPIO2		2
-#define	IRQ_GPIO3		3
-#define	IRQ_GPIO4		4
-#define	IRQ_GPIO5		5
-#define	IRQ_GPIO6		6
-#define	IRQ_GPIO7		7
-#define	IRQ_GPIO8		8
-#define	IRQ_GPIO9		9
-#define	IRQ_GPIO10		10
-#define	IRQ_GPIO11_27		11
-#define	IRQ_LCD  		12	/* LCD controller           */
-#define	IRQ_Ser0UDC		13	/* Ser. port 0 UDC          */
-#define	IRQ_Ser1SDLC		14	/* Ser. port 1 SDLC         */
-#define	IRQ_Ser1UART		15	/* Ser. port 1 UART         */
-#define	IRQ_Ser2ICP		16	/* Ser. port 2 ICP          */
-#define	IRQ_Ser3UART		17	/* Ser. port 3 UART         */
-#define	IRQ_Ser4MCP		18	/* Ser. port 4 MCP          */
-#define	IRQ_Ser4SSP		19	/* Ser. port 4 SSP          */
-#define	IRQ_DMA0 		20	/* DMA controller channel 0 */
-#define	IRQ_DMA1 		21	/* DMA controller channel 1 */
-#define	IRQ_DMA2 		22	/* DMA controller channel 2 */
-#define	IRQ_DMA3 		23	/* DMA controller channel 3 */
-#define	IRQ_DMA4 		24	/* DMA controller channel 4 */
-#define	IRQ_DMA5 		25	/* DMA controller channel 5 */
-#define	IRQ_OST0 		26	/* OS Timer match 0         */
-#define	IRQ_OST1 		27	/* OS Timer match 1         */
-#define	IRQ_OST2 		28	/* OS Timer match 2         */
-#define	IRQ_OST3 		29	/* OS Timer match 3         */
-#define	IRQ_RTC1Hz		30	/* RTC 1 Hz clock           */
-#define	IRQ_RTCAlrm		31	/* RTC Alarm                */
+#define	IRQ_GPIO0		1
+#define	IRQ_GPIO1		2
+#define	IRQ_GPIO2		3
+#define	IRQ_GPIO3		4
+#define	IRQ_GPIO4		5
+#define	IRQ_GPIO5		6
+#define	IRQ_GPIO6		7
+#define	IRQ_GPIO7		8
+#define	IRQ_GPIO8		9
+#define	IRQ_GPIO9		10
+#define	IRQ_GPIO10		11
+#define	IRQ_GPIO11_27		12
+#define	IRQ_LCD			13	/* LCD controller           */
+#define	IRQ_Ser0UDC		14	/* Ser. port 0 UDC          */
+#define	IRQ_Ser1SDLC		15	/* Ser. port 1 SDLC         */
+#define	IRQ_Ser1UART		16	/* Ser. port 1 UART         */
+#define	IRQ_Ser2ICP		17	/* Ser. port 2 ICP          */
+#define	IRQ_Ser3UART		18	/* Ser. port 3 UART         */
+#define	IRQ_Ser4MCP		19	/* Ser. port 4 MCP          */
+#define	IRQ_Ser4SSP		20	/* Ser. port 4 SSP          */
+#define	IRQ_DMA0		21	/* DMA controller channel 0 */
+#define	IRQ_DMA1		22	/* DMA controller channel 1 */
+#define	IRQ_DMA2		23	/* DMA controller channel 2 */
+#define	IRQ_DMA3		24	/* DMA controller channel 3 */
+#define	IRQ_DMA4		25	/* DMA controller channel 4 */
+#define	IRQ_DMA5		26	/* DMA controller channel 5 */
+#define	IRQ_OST0		27	/* OS Timer match 0         */
+#define	IRQ_OST1		28	/* OS Timer match 1         */
+#define	IRQ_OST2		29	/* OS Timer match 2         */
+#define	IRQ_OST3		30	/* OS Timer match 3         */
+#define	IRQ_RTC1Hz		31	/* RTC 1 Hz clock           */
+#define	IRQ_RTCAlrm		32	/* RTC Alarm                */
 
-#define	IRQ_GPIO11		32
-#define	IRQ_GPIO12		33
-#define	IRQ_GPIO13		34
-#define	IRQ_GPIO14		35
-#define	IRQ_GPIO15		36
-#define	IRQ_GPIO16		37
-#define	IRQ_GPIO17		38
-#define	IRQ_GPIO18		39
-#define	IRQ_GPIO19		40
-#define	IRQ_GPIO20		41
-#define	IRQ_GPIO21		42
-#define	IRQ_GPIO22		43
-#define	IRQ_GPIO23		44
-#define	IRQ_GPIO24		45
-#define	IRQ_GPIO25		46
-#define	IRQ_GPIO26		47
-#define	IRQ_GPIO27		48
+#define	IRQ_GPIO11		33
+#define	IRQ_GPIO12		34
+#define	IRQ_GPIO13		35
+#define	IRQ_GPIO14		36
+#define	IRQ_GPIO15		37
+#define	IRQ_GPIO16		38
+#define	IRQ_GPIO17		39
+#define	IRQ_GPIO18		40
+#define	IRQ_GPIO19		41
+#define	IRQ_GPIO20		42
+#define	IRQ_GPIO21		43
+#define	IRQ_GPIO22		44
+#define	IRQ_GPIO23		45
+#define	IRQ_GPIO24		46
+#define	IRQ_GPIO25		47
+#define	IRQ_GPIO26		48
+#define	IRQ_GPIO27		49
 
 /*
  * The next 16 interrupts are for board specific purposes.  Since
@@ -65,8 +65,8 @@
  * these.  If you need more, increase IRQ_BOARD_END, but keep it
  * within sensible limits.  IRQs 49 to 64 are available.
  */
-#define IRQ_BOARD_START		49
-#define IRQ_BOARD_END		65
+#define IRQ_BOARD_START		50
+#define IRQ_BOARD_END		66
 
 /*
  * Figure out the MAX IRQ number.
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 2124f1fc..63e2901 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -14,17 +14,73 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/ioport.h>
 #include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <asm/mach/irq.h>
+#include <asm/exception.h>
 
 #include "generic.h"
 
 
 /*
+ * We don't need to ACK IRQs on the SA1100 unless they're GPIOs
+ * this is for internal IRQs i.e. from IRQ LCD to RTCAlrm.
+ */
+static void sa1100_mask_irq(struct irq_data *d)
+{
+	ICMR &= ~BIT(d->hwirq);
+}
+
+static void sa1100_unmask_irq(struct irq_data *d)
+{
+	ICMR |= BIT(d->hwirq);
+}
+
+/*
+ * Apart form GPIOs, only the RTC alarm can be a wakeup event.
+ */
+static int sa1100_set_wake(struct irq_data *d, unsigned int on)
+{
+	if (BIT(d->hwirq) == IC_RTCAlrm) {
+		if (on)
+			PWER |= PWER_RTC;
+		else
+			PWER &= ~PWER_RTC;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static struct irq_chip sa1100_normal_chip = {
+	.name		= "SC",
+	.irq_ack	= sa1100_mask_irq,
+	.irq_mask	= sa1100_mask_irq,
+	.irq_unmask	= sa1100_unmask_irq,
+	.irq_set_wake	= sa1100_set_wake,
+};
+
+static int sa1100_normal_irqdomain_map(struct irq_domain *d,
+		unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &sa1100_normal_chip,
+				 handle_level_irq);
+	set_irq_flags(irq, IRQF_VALID);
+
+	return 0;
+}
+
+static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
+	.map = sa1100_normal_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static struct irq_domain *sa1100_normal_irqdomain;
+
+/*
  * SA1100 GPIO edge detection for IRQs:
  * IRQs are generated on Falling-Edge, Rising-Edge, or both.
  * Use this instead of directly setting GRER/GFER.
@@ -33,20 +89,11 @@
 static int GPIO_IRQ_falling_edge;
 static int GPIO_IRQ_mask = (1 << 11) - 1;
 
-/*
- * To get the GPIO number from an IRQ number
- */
-#define GPIO_11_27_IRQ(i)	((i) - 21)
-#define GPIO11_27_MASK(irq)	(1 << GPIO_11_27_IRQ(irq))
-
 static int sa1100_gpio_type(struct irq_data *d, unsigned int type)
 {
 	unsigned int mask;
 
-	if (d->irq <= 10)
-		mask = 1 << d->irq;
-	else
-		mask = GPIO11_27_MASK(d->irq);
+	mask = BIT(d->hwirq);
 
 	if (type == IRQ_TYPE_PROBE) {
 		if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask)
@@ -70,41 +117,51 @@
 }
 
 /*
- * GPIO IRQs must be acknowledged.  This is for IRQs from 0 to 10.
+ * GPIO IRQs must be acknowledged.
  */
-static void sa1100_low_gpio_ack(struct irq_data *d)
+static void sa1100_gpio_ack(struct irq_data *d)
 {
-	GEDR = (1 << d->irq);
+	GEDR = BIT(d->hwirq);
 }
 
-static void sa1100_low_gpio_mask(struct irq_data *d)
-{
-	ICMR &= ~(1 << d->irq);
-}
-
-static void sa1100_low_gpio_unmask(struct irq_data *d)
-{
-	ICMR |= 1 << d->irq;
-}
-
-static int sa1100_low_gpio_wake(struct irq_data *d, unsigned int on)
+static int sa1100_gpio_wake(struct irq_data *d, unsigned int on)
 {
 	if (on)
-		PWER |= 1 << d->irq;
+		PWER |= BIT(d->hwirq);
 	else
-		PWER &= ~(1 << d->irq);
+		PWER &= ~BIT(d->hwirq);
 	return 0;
 }
 
+/*
+ * This is for IRQs from 0 to 10.
+ */
 static struct irq_chip sa1100_low_gpio_chip = {
 	.name		= "GPIO-l",
-	.irq_ack	= sa1100_low_gpio_ack,
-	.irq_mask	= sa1100_low_gpio_mask,
-	.irq_unmask	= sa1100_low_gpio_unmask,
+	.irq_ack	= sa1100_gpio_ack,
+	.irq_mask	= sa1100_mask_irq,
+	.irq_unmask	= sa1100_unmask_irq,
 	.irq_set_type	= sa1100_gpio_type,
-	.irq_set_wake	= sa1100_low_gpio_wake,
+	.irq_set_wake	= sa1100_gpio_wake,
 };
 
+static int sa1100_low_gpio_irqdomain_map(struct irq_domain *d,
+		unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &sa1100_low_gpio_chip,
+				 handle_edge_irq);
+	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+
+	return 0;
+}
+
+static struct irq_domain_ops sa1100_low_gpio_irqdomain_ops = {
+	.map = sa1100_low_gpio_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static struct irq_domain *sa1100_low_gpio_irqdomain;
+
 /*
  * IRQ11 (GPIO11 through 27) handler.  We enter here with the
  * irq_controller_lock held, and IRQs disabled.  Decode the IRQ
@@ -141,16 +198,9 @@
  * In addition, the IRQs are all collected up into one bit in the
  * interrupt controller registers.
  */
-static void sa1100_high_gpio_ack(struct irq_data *d)
-{
-	unsigned int mask = GPIO11_27_MASK(d->irq);
-
-	GEDR = mask;
-}
-
 static void sa1100_high_gpio_mask(struct irq_data *d)
 {
-	unsigned int mask = GPIO11_27_MASK(d->irq);
+	unsigned int mask = BIT(d->hwirq);
 
 	GPIO_IRQ_mask &= ~mask;
 
@@ -160,7 +210,7 @@
 
 static void sa1100_high_gpio_unmask(struct irq_data *d)
 {
-	unsigned int mask = GPIO11_27_MASK(d->irq);
+	unsigned int mask = BIT(d->hwirq);
 
 	GPIO_IRQ_mask |= mask;
 
@@ -168,61 +218,32 @@
 	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
 }
 
-static int sa1100_high_gpio_wake(struct irq_data *d, unsigned int on)
-{
-	if (on)
-		PWER |= GPIO11_27_MASK(d->irq);
-	else
-		PWER &= ~GPIO11_27_MASK(d->irq);
-	return 0;
-}
-
 static struct irq_chip sa1100_high_gpio_chip = {
 	.name		= "GPIO-h",
-	.irq_ack	= sa1100_high_gpio_ack,
+	.irq_ack	= sa1100_gpio_ack,
 	.irq_mask	= sa1100_high_gpio_mask,
 	.irq_unmask	= sa1100_high_gpio_unmask,
 	.irq_set_type	= sa1100_gpio_type,
-	.irq_set_wake	= sa1100_high_gpio_wake,
+	.irq_set_wake	= sa1100_gpio_wake,
 };
 
-/*
- * We don't need to ACK IRQs on the SA1100 unless they're GPIOs
- * this is for internal IRQs i.e. from 11 to 31.
- */
-static void sa1100_mask_irq(struct irq_data *d)
+static int sa1100_high_gpio_irqdomain_map(struct irq_domain *d,
+		unsigned int irq, irq_hw_number_t hwirq)
 {
-	ICMR &= ~(1 << d->irq);
+	irq_set_chip_and_handler(irq, &sa1100_high_gpio_chip,
+				 handle_edge_irq);
+	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+
+	return 0;
 }
 
-static void sa1100_unmask_irq(struct irq_data *d)
-{
-	ICMR |= (1 << d->irq);
-}
-
-/*
- * Apart form GPIOs, only the RTC alarm can be a wakeup event.
- */
-static int sa1100_set_wake(struct irq_data *d, unsigned int on)
-{
-	if (d->irq == IRQ_RTCAlrm) {
-		if (on)
-			PWER |= PWER_RTC;
-		else
-			PWER &= ~PWER_RTC;
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static struct irq_chip sa1100_normal_chip = {
-	.name		= "SC",
-	.irq_ack	= sa1100_mask_irq,
-	.irq_mask	= sa1100_mask_irq,
-	.irq_unmask	= sa1100_unmask_irq,
-	.irq_set_wake	= sa1100_set_wake,
+static struct irq_domain_ops sa1100_high_gpio_irqdomain_ops = {
+	.map = sa1100_high_gpio_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
 };
 
+static struct irq_domain *sa1100_high_gpio_irqdomain;
+
 static struct resource irq_resource =
 	DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs");
 
@@ -291,10 +312,25 @@
 
 device_initcall(sa1100irq_init_devicefs);
 
+static asmlinkage void __exception_irq_entry
+sa1100_handle_irq(struct pt_regs *regs)
+{
+	uint32_t icip, icmr, mask;
+
+	do {
+		icip = (ICIP);
+		icmr = (ICMR);
+		mask = icip & icmr;
+
+		if (mask == 0)
+			break;
+
+		handle_IRQ(ffs(mask) - 1 + IRQ_GPIO0, regs);
+	} while (1);
+}
+
 void __init sa1100_init_irq(void)
 {
-	unsigned int irq;
-
 	request_resource(&iomem_resource, &irq_resource);
 
 	/* disable all IRQs */
@@ -314,29 +350,24 @@
 	 */
 	ICCR = 1;
 
-	for (irq = 0; irq <= 10; irq++) {
-		irq_set_chip_and_handler(irq, &sa1100_low_gpio_chip,
-					 handle_edge_irq);
-		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
+	sa1100_low_gpio_irqdomain = irq_domain_add_legacy(NULL,
+			11, IRQ_GPIO0, 0,
+			&sa1100_low_gpio_irqdomain_ops, NULL);
 
-	for (irq = 12; irq <= 31; irq++) {
-		irq_set_chip_and_handler(irq, &sa1100_normal_chip,
-					 handle_level_irq);
-		set_irq_flags(irq, IRQF_VALID);
-	}
+	sa1100_normal_irqdomain = irq_domain_add_legacy(NULL,
+			21, IRQ_GPIO11_27, 11,
+			&sa1100_normal_irqdomain_ops, NULL);
 
-	for (irq = 32; irq <= 48; irq++) {
-		irq_set_chip_and_handler(irq, &sa1100_high_gpio_chip,
-					 handle_edge_irq);
-		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
+	sa1100_high_gpio_irqdomain = irq_domain_add_legacy(NULL,
+			17, IRQ_GPIO11, 11,
+			&sa1100_high_gpio_irqdomain_ops, NULL);
 
 	/*
 	 * Install handler for GPIO 11-27 edge detect interrupts
 	 */
-	irq_set_chip(IRQ_GPIO11_27, &sa1100_normal_chip);
 	irq_set_chained_handler(IRQ_GPIO11_27, sa1100_high_gpio_handler);
 
+	set_handle_irq(sa1100_handle_irq);
+
 	sa1100_init_gpio();
 }
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index c9cd9c5..bc219b3 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -798,6 +798,7 @@
 
 config KUSER_HELPERS
 	bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
+	depends on MMU
 	default y
 	help
 	  Warning: disabling this option may break user programs.
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 91da64d..d3afdf9 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -6,7 +6,7 @@
 				   iomap.o
 
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
-				   mmap.o pgd.o mmu.o
+				   mmap.o pgd.o mmu.o pageattr.o
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 83792f4..2c0c541 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -113,7 +113,7 @@
 		new_usermode |= UM_FIXUP;
 
 		if (warn)
-			printk(KERN_WARNING "alignment: ignoring faults is unsafe on this CPU.  Defaulting to fixup mode.\n");
+			pr_warn("alignment: ignoring faults is unsafe on this CPU.  Defaulting to fixup mode.\n");
 	}
 
 	return new_usermode;
@@ -523,7 +523,7 @@
 	 * processor for us.
 	 */
 	if (addr != eaddr) {
-		printk(KERN_ERR "LDMSTM: PC = %08lx, instr = %08lx, "
+		pr_err("LDMSTM: PC = %08lx, instr = %08lx, "
 			"addr = %08lx, eaddr = %08lx\n",
 			 instruction_pointer(regs), instr, addr, eaddr);
 		show_regs(regs);
@@ -567,7 +567,7 @@
 	return TYPE_FAULT;
 
 bad:
-	printk(KERN_ERR "Alignment trap: not handling ldm with s-bit set\n");
+	pr_err("Alignment trap: not handling ldm with s-bit set\n");
 	return TYPE_ERROR;
 }
 
@@ -899,13 +899,13 @@
 	return 0;
 
  swp:
-	printk(KERN_ERR "Alignment trap: not handling swp instruction\n");
+	pr_err("Alignment trap: not handling swp instruction\n");
 
  bad:
 	/*
 	 * Oops, we didn't handle the instruction.
 	 */
-	printk(KERN_ERR "Alignment trap: not handling instruction "
+	pr_err("Alignment trap: not handling instruction "
 		"%0*lx at [<%08lx>]\n",
 		isize << 1,
 		isize == 2 ? tinstr : instr, instrptr);
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index e028a7f..097181e 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -313,7 +313,7 @@
 	 */
 	u = read_extra_features();
 	if (!(u & 0x01000000)) {
-		printk(KERN_INFO "Feroceon L2: Disabling L2 prefetch.\n");
+		pr_info("Feroceon L2: Disabling L2 prefetch.\n");
 		write_extra_features(u | 0x01000000);
 	}
 }
@@ -326,7 +326,7 @@
 	if (!(u & 0x00400000)) {
 		int i, d;
 
-		printk(KERN_INFO "Feroceon L2: Enabling L2\n");
+		pr_info("Feroceon L2: Enabling L2\n");
 
 		d = flush_and_disable_dcache();
 		i = invalidate_and_disable_icache();
@@ -353,7 +353,7 @@
 
 	enable_l2();
 
-	printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n",
+	pr_info("Feroceon L2: Cache support initialised%s.\n",
 			 l2_wt_override ? ", in WT override mode" : "");
 }
 #ifdef CONFIG_OF
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 55f9d6e..5e65ca8 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -956,7 +956,7 @@
  * @associativity: variable to return the calculated associativity in
  * @max_way_size: the maximum size in bytes for the cache ways
  */
-static void __init l2x0_cache_size_of_parse(const struct device_node *np,
+static int __init l2x0_cache_size_of_parse(const struct device_node *np,
 					    u32 *aux_val, u32 *aux_mask,
 					    u32 *associativity,
 					    u32 max_way_size)
@@ -974,7 +974,7 @@
 	of_property_read_u32(np, "cache-line-size", &line_size);
 
 	if (!cache_size || !sets)
-		return;
+		return -ENODEV;
 
 	/* All these l2 caches have the same line = block size actually */
 	if (!line_size) {
@@ -1009,7 +1009,7 @@
 
 	if (way_size > max_way_size) {
 		pr_err("L2C OF: set size %dKB is too large\n", way_size);
-		return;
+		return -EINVAL;
 	}
 
 	pr_info("L2C OF: override cache size: %d bytes (%dKB)\n",
@@ -1027,7 +1027,7 @@
 	if (way_size_bits < 1 || way_size_bits > 6) {
 		pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n",
 		       way_size);
-		return;
+		return -EINVAL;
 	}
 
 	mask |= L2C_AUX_CTRL_WAY_SIZE_MASK;
@@ -1036,6 +1036,8 @@
 	*aux_val &= ~mask;
 	*aux_val |= val;
 	*aux_mask &= ~mask;
+
+	return 0;
 }
 
 static void __init l2x0_of_parse(const struct device_node *np,
@@ -1046,6 +1048,7 @@
 	u32 dirty = 0;
 	u32 val = 0, mask = 0;
 	u32 assoc;
+	int ret;
 
 	of_property_read_u32(np, "arm,tag-latency", &tag);
 	if (tag) {
@@ -1068,7 +1071,10 @@
 		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
 	}
 
-	l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K);
+	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K);
+	if (ret)
+		return;
+
 	if (assoc > 8) {
 		pr_err("l2x0 of: cache setting yield too high associativity\n");
 		pr_err("l2x0 of: %d calculated, max 8\n", assoc);
@@ -1125,6 +1131,7 @@
 	u32 tag[3] = { 0, 0, 0 };
 	u32 filter[2] = { 0, 0 };
 	u32 assoc;
+	int ret;
 
 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
 	if (tag[0] && tag[1] && tag[2])
@@ -1152,7 +1159,10 @@
 			       l2x0_base + L310_ADDR_FILTER_START);
 	}
 
-	l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
+	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
+	if (ret)
+		return;
+
 	switch (assoc) {
 	case 16:
 		*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
@@ -1164,8 +1174,8 @@
 		*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
 		break;
 	default:
-		pr_err("PL310 OF: cache setting yield illegal associativity\n");
-		pr_err("PL310 OF: %d calculated, only 8 and 16 legal\n", assoc);
+		pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
+		       assoc);
 		break;
 	}
 }
diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c
index b273739..1e373d2 100644
--- a/arch/arm/mm/cache-tauros2.c
+++ b/arch/arm/mm/cache-tauros2.c
@@ -185,7 +185,7 @@
 		u &= ~0x01000000;
 	else
 		u |= 0x01000000;
-	printk(KERN_INFO "Tauros2: %s L2 prefetch.\n",
+	pr_info("Tauros2: %s L2 prefetch.\n",
 			(features & CACHE_TAUROS2_PREFETCH_ON)
 			? "Enabling" : "Disabling");
 
@@ -193,7 +193,7 @@
 		u |= 0x00100000;
 	else
 		u &= ~0x00100000;
-	printk(KERN_INFO "Tauros2: %s line fill burt8.\n",
+	pr_info("Tauros2: %s line fill burt8.\n",
 			(features & CACHE_TAUROS2_LINEFILL_BURST8)
 			? "Enabling" : "Disabling");
 
@@ -216,7 +216,7 @@
 		 */
 		feat = read_extra_features();
 		if (!(feat & 0x00400000)) {
-			printk(KERN_INFO "Tauros2: Enabling L2 cache.\n");
+			pr_info("Tauros2: Enabling L2 cache.\n");
 			write_extra_features(feat | 0x00400000);
 		}
 
@@ -253,7 +253,7 @@
 		 */
 		actlr = read_actlr();
 		if (!(actlr & 0x00000002)) {
-			printk(KERN_INFO "Tauros2: Enabling L2 cache.\n");
+			pr_info("Tauros2: Enabling L2 cache.\n");
 			write_actlr(actlr | 0x00000002);
 		}
 
@@ -262,11 +262,11 @@
 #endif
 
 	if (mode == NULL) {
-		printk(KERN_CRIT "Tauros2: Unable to detect CPU mode.\n");
+		pr_crit("Tauros2: Unable to detect CPU mode.\n");
 		return;
 	}
 
-	printk(KERN_INFO "Tauros2: L2 cache support initialised "
+	pr_info("Tauros2: L2 cache support initialised "
 			 "in %s mode.\n", mode);
 }
 
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 6eb97b3..9189256 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -184,36 +184,46 @@
 	u64 asid = atomic64_read(&mm->context.id);
 	u64 generation = atomic64_read(&asid_generation);
 
-	if (asid != 0 && is_reserved_asid(asid)) {
+	if (asid != 0) {
 		/*
-		 * Our current ASID was active during a rollover, we can
-		 * continue to use it and this was just a false alarm.
+		 * If our current ASID was active during a rollover, we
+		 * can continue to use it and this was just a false alarm.
 		 */
-		asid = generation | (asid & ~ASID_MASK);
-	} else {
+		if (is_reserved_asid(asid))
+			return generation | (asid & ~ASID_MASK);
+
 		/*
-		 * Allocate a free ASID. If we can't find one, take a
-		 * note of the currently active ASIDs and mark the TLBs
-		 * as requiring flushes. We always count from ASID #1,
-		 * as we reserve ASID #0 to switch via TTBR0 and to
-		 * avoid speculative page table walks from hitting in
-		 * any partial walk caches, which could be populated
-		 * from overlapping level-1 descriptors used to map both
-		 * the module area and the userspace stack.
+		 * We had a valid ASID in a previous life, so try to re-use
+		 * it if possible.,
 		 */
-		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
-		if (asid == NUM_USER_ASIDS) {
-			generation = atomic64_add_return(ASID_FIRST_VERSION,
-							 &asid_generation);
-			flush_context(cpu);
-			asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
-		}
-		__set_bit(asid, asid_map);
-		cur_idx = asid;
-		asid |= generation;
-		cpumask_clear(mm_cpumask(mm));
+		asid &= ~ASID_MASK;
+		if (!__test_and_set_bit(asid, asid_map))
+			goto bump_gen;
 	}
 
+	/*
+	 * Allocate a free ASID. If we can't find one, take a note of the
+	 * currently active ASIDs and mark the TLBs as requiring flushes.
+	 * We always count from ASID #1, as we reserve ASID #0 to switch
+	 * via TTBR0 and to avoid speculative page table walks from hitting
+	 * in any partial walk caches, which could be populated from
+	 * overlapping level-1 descriptors used to map both the module
+	 * area and the userspace stack.
+	 */
+	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+	if (asid == NUM_USER_ASIDS) {
+		generation = atomic64_add_return(ASID_FIRST_VERSION,
+						 &asid_generation);
+		flush_context(cpu);
+		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+	}
+
+	__set_bit(asid, asid_map);
+	cur_idx = asid;
+
+bump_gen:
+	asid |= generation;
+	cpumask_clear(mm_cpumask(mm));
 	return asid;
 }
 
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index b9bcc9d..7042334 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -62,7 +62,7 @@
 	__asm__("mcrr	p15, 0, %1, %0, c6	@ 0xec401f06"
 	   :
 	   : "r" (kto),
-	     "r" ((unsigned long)kto + PAGE_SIZE - L1_CACHE_BYTES)
+	     "r" ((unsigned long)kto + PAGE_SIZE - 1)
 	   : "cc");
 }
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c245d90..e890711 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1198,7 +1198,6 @@
 {
 	return dma_common_pages_remap(pages, size,
 			VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller);
-	return NULL;
 }
 
 /*
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index ff379ac..d9e0d00 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -235,7 +235,7 @@
 	const char *reason;
 	unsigned long v = 1;
 
-	printk(KERN_INFO "CPU: Testing write buffer coherency: ");
+	pr_info("CPU: Testing write buffer coherency: ");
 
 	page = alloc_page(GFP_KERNEL);
 	if (page) {
@@ -261,9 +261,9 @@
 	}
 
 	if (v) {
-		printk("failed, %s\n", reason);
+		pr_cont("failed, %s\n", reason);
 		shared_pte_mask = L_PTE_MT_UNCACHED;
 	} else {
-		printk("ok\n");
+		pr_cont("ok\n");
 	}
 }
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index eb8830a..a982dc3 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -63,9 +63,9 @@
 	if (!mm)
 		mm = &init_mm;
 
-	printk(KERN_ALERT "pgd = %p\n", mm->pgd);
+	pr_alert("pgd = %p\n", mm->pgd);
 	pgd = pgd_offset(mm, addr);
-	printk(KERN_ALERT "[%08lx] *pgd=%08llx",
+	pr_alert("[%08lx] *pgd=%08llx",
 			addr, (long long)pgd_val(*pgd));
 
 	do {
@@ -77,31 +77,31 @@
 			break;
 
 		if (pgd_bad(*pgd)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
 		pud = pud_offset(pgd, addr);
 		if (PTRS_PER_PUD != 1)
-			printk(", *pud=%08llx", (long long)pud_val(*pud));
+			pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
 
 		if (pud_none(*pud))
 			break;
 
 		if (pud_bad(*pud)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (PTRS_PER_PMD != 1)
-			printk(", *pmd=%08llx", (long long)pmd_val(*pmd));
+			pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
 
 		if (pmd_none(*pmd))
 			break;
 
 		if (pmd_bad(*pmd)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
@@ -110,15 +110,15 @@
 			break;
 
 		pte = pte_offset_map(pmd, addr);
-		printk(", *pte=%08llx", (long long)pte_val(*pte));
+		pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
 #ifndef CONFIG_ARM_LPAE
-		printk(", *ppte=%08llx",
+		pr_cont(", *ppte=%08llx",
 		       (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
 #endif
 		pte_unmap(pte);
 	} while(0);
 
-	printk("\n");
+	pr_cont("\n");
 }
 #else					/* CONFIG_MMU */
 void show_pte(struct mm_struct *mm, unsigned long addr)
@@ -142,10 +142,9 @@
 	 * No handler, we'll have to terminate things with extreme prejudice.
 	 */
 	bust_spinlocks(1);
-	printk(KERN_ALERT
-		"Unable to handle kernel %s at virtual address %08lx\n",
-		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
-		"paging request", addr);
+	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
+		 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
+		 "paging request", addr);
 
 	show_pte(mm, addr);
 	die("Oops", regs, fsr);
@@ -551,7 +550,7 @@
 	if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
 		return;
 
-	printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
+	pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 		inf->name, fsr, addr);
 
 	info.si_signo = inf->sig;
@@ -583,7 +582,7 @@
 	if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 		return;
 
-	printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
+	pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 		inf->name, ifsr, addr);
 
 	info.si_signo = inf->sig;
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 265b836..34b66af 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -33,7 +33,7 @@
 	asm(	"mcrr	p15, 0, %1, %0, c14\n"
 	"	mcr	p15, 0, %2, c7, c10, 4"
 	    :
-	    : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES), "r" (zero)
+	    : "r" (to), "r" (to + PAGE_SIZE - 1), "r" (zero)
 	    : "cc");
 }
 
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index 8106198..b98895d 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -128,8 +128,11 @@
 {
 	unsigned long vaddr;
 	int idx, type;
+	struct page *page = pfn_to_page(pfn);
 
 	pagefault_disable();
+	if (!PageHighMem(page))
+		return page_address(page);
 
 	type = kmap_atomic_idx_push();
 	idx = type + KM_TYPE_NR * smp_processor_id();
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 67b1542..ba87b1b 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -68,7 +68,7 @@
 
 static int __init parse_tag_initrd(const struct tag *tag)
 {
-	printk(KERN_WARNING "ATAG_INITRD is deprecated; "
+	pr_warn("ATAG_INITRD is deprecated; "
 		"please update your bootloader.\n");
 	phys_initrd_start = __virt_to_phys(tag->u.initrd.start);
 	phys_initrd_size = tag->u.initrd.size;
@@ -545,7 +545,7 @@
 #define MLM(b, t) b, t, ((t) - (b)) >> 20
 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
 
-	printk(KERN_NOTICE "Virtual kernel memory layout:\n"
+	pr_notice("Virtual kernel memory layout:\n"
 			"    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HAVE_TCM
 			"    DTCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a7b12cb..cda7c40 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -53,6 +53,8 @@
  */
 pmd_t *top_pmd;
 
+pmdval_t user_pmd_table = _PAGE_USER_TABLE;
+
 #define CPOLICY_UNCACHED	0
 #define CPOLICY_BUFFERED	1
 #define CPOLICY_WRITETHROUGH	2
@@ -193,7 +195,7 @@
 static int __init early_nocache(char *__unused)
 {
 	char *p = "buffered";
-	printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
+	pr_warn("nocache is deprecated; use cachepolicy=%s\n", p);
 	early_cachepolicy(p);
 	return 0;
 }
@@ -202,7 +204,7 @@
 static int __init early_nowrite(char *__unused)
 {
 	char *p = "uncached";
-	printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
+	pr_warn("nowb is deprecated; use cachepolicy=%s\n", p);
 	early_cachepolicy(p);
 	return 0;
 }
@@ -355,44 +357,6 @@
 }
 EXPORT_SYMBOL(get_mem_type);
 
-#define PTE_SET_FN(_name, pteop) \
-static int pte_set_##_name(pte_t *ptep, pgtable_t token, unsigned long addr, \
-			void *data) \
-{ \
-	pte_t pte = pteop(*ptep); \
-\
-	set_pte_ext(ptep, pte, 0); \
-	return 0; \
-} \
-
-#define SET_MEMORY_FN(_name, callback) \
-int set_memory_##_name(unsigned long addr, int numpages) \
-{ \
-	unsigned long start = addr; \
-	unsigned long size = PAGE_SIZE*numpages; \
-	unsigned end = start + size; \
-\
-	if (start < MODULES_VADDR || start >= MODULES_END) \
-		return -EINVAL;\
-\
-	if (end < MODULES_VADDR || end >= MODULES_END) \
-		return -EINVAL; \
-\
-	apply_to_page_range(&init_mm, start, size, callback, NULL); \
-	flush_tlb_kernel_range(start, end); \
-	return 0;\
-}
-
-PTE_SET_FN(ro, pte_wrprotect)
-PTE_SET_FN(rw, pte_mkwrite)
-PTE_SET_FN(x, pte_mkexec)
-PTE_SET_FN(nx, pte_mknexec)
-
-SET_MEMORY_FN(ro, pte_set_ro)
-SET_MEMORY_FN(rw, pte_set_rw)
-SET_MEMORY_FN(x, pte_set_x)
-SET_MEMORY_FN(nx, pte_set_nx)
-
 /*
  * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range().
  * As a result, this can only be called with preemption disabled, as under
@@ -552,14 +516,23 @@
 	hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte;
 	s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2;
 
+#ifndef CONFIG_ARM_LPAE
 	/*
 	 * We don't use domains on ARMv6 (since this causes problems with
 	 * v6/v7 kernels), so we must use a separate memory type for user
 	 * r/o, kernel r/w to map the vectors page.
 	 */
-#ifndef CONFIG_ARM_LPAE
 	if (cpu_arch == CPU_ARCH_ARMv6)
 		vecs_pgprot |= L_PTE_MT_VECTORS;
+
+	/*
+	 * Check is it with support for the PXN bit
+	 * in the Short-descriptor translation table format descriptors.
+	 */
+	if (cpu_arch == CPU_ARCH_ARMv7 &&
+		(read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) == 4) {
+		user_pmd_table |= PMD_PXNTABLE;
+	}
 #endif
 
 	/*
@@ -629,6 +602,11 @@
 	}
 	kern_pgprot |= PTE_EXT_AF;
 	vecs_pgprot |= PTE_EXT_AF;
+
+	/*
+	 * Set PXN for user mappings
+	 */
+	user_pgprot |= PTE_EXT_PXN;
 #endif
 
 	for (i = 0; i < 16; i++) {
@@ -810,8 +788,7 @@
 	length = PAGE_ALIGN(md->length);
 
 	if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
-		printk(KERN_ERR "MM: CPU does not support supersection "
-		       "mapping for 0x%08llx at 0x%08lx\n",
+		pr_err("MM: CPU does not support supersection mapping for 0x%08llx at 0x%08lx\n",
 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
 		return;
 	}
@@ -823,15 +800,13 @@
 	 *	of the actual domain assignments in use.
 	 */
 	if (type->domain) {
-		printk(KERN_ERR "MM: invalid domain in supersection "
-		       "mapping for 0x%08llx at 0x%08lx\n",
+		pr_err("MM: invalid domain in supersection mapping for 0x%08llx at 0x%08lx\n",
 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
 		return;
 	}
 
 	if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
-		printk(KERN_ERR "MM: cannot create mapping for 0x%08llx"
-		       " at 0x%08lx invalid alignment\n",
+		pr_err("MM: cannot create mapping for 0x%08llx at 0x%08lx invalid alignment\n",
 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
 		return;
 	}
@@ -874,18 +849,16 @@
 	pgd_t *pgd;
 
 	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
-		printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx"
-		       " at 0x%08lx in user region\n",
-		       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
+		pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n",
+			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
 		return;
 	}
 
 	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
 	    md->virtual >= PAGE_OFFSET &&
 	    (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
-		printk(KERN_WARNING "BUG: mapping for 0x%08llx"
-		       " at 0x%08lx out of vmalloc space\n",
-		       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
+		pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
+			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
 	}
 
 	type = &mem_types[md->type];
@@ -905,9 +878,8 @@
 	length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
 
 	if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
-		printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not "
-		       "be mapped using pages, ignoring.\n",
-		       (long long)__pfn_to_phys(md->pfn), addr);
+		pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n",
+			(long long)__pfn_to_phys(md->pfn), addr);
 		return;
 	}
 
@@ -1077,15 +1049,13 @@
 
 	if (vmalloc_reserve < SZ_16M) {
 		vmalloc_reserve = SZ_16M;
-		printk(KERN_WARNING
-			"vmalloc area too small, limiting to %luMB\n",
+		pr_warn("vmalloc area too small, limiting to %luMB\n",
 			vmalloc_reserve >> 20);
 	}
 
 	if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
 		vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
-		printk(KERN_WARNING
-			"vmalloc area is too big, limiting to %luMB\n",
+		pr_warn("vmalloc area is too big, limiting to %luMB\n",
 			vmalloc_reserve >> 20);
 	}
 
@@ -1118,7 +1088,7 @@
 
 			if (highmem) {
 				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
-					&block_start, &block_end);
+					  &block_start, &block_end);
 				memblock_remove(reg->base, reg->size);
 				continue;
 			}
@@ -1127,7 +1097,7 @@
 				phys_addr_t overlap_size = reg->size - size_limit;
 
 				pr_notice("Truncating RAM at %pa-%pa to -%pa",
-				      &block_start, &block_end, &vmalloc_limit);
+					  &block_start, &block_end, &vmalloc_limit);
 				memblock_remove(vmalloc_limit, overlap_size);
 				block_end = vmalloc_limit;
 			}
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
new file mode 100644
index 0000000..004e35c
--- /dev/null
+++ b/arch/arm/mm/pageattr.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+struct page_change_data {
+	pgprot_t set_mask;
+	pgprot_t clear_mask;
+};
+
+static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
+			void *data)
+{
+	struct page_change_data *cdata = data;
+	pte_t pte = *ptep;
+
+	pte = clear_pte_bit(pte, cdata->clear_mask);
+	pte = set_pte_bit(pte, cdata->set_mask);
+
+	set_pte_ext(ptep, pte, 0);
+	return 0;
+}
+
+static int change_memory_common(unsigned long addr, int numpages,
+				pgprot_t set_mask, pgprot_t clear_mask)
+{
+	unsigned long start = addr;
+	unsigned long size = PAGE_SIZE*numpages;
+	unsigned long end = start + size;
+	int ret;
+	struct page_change_data data;
+
+	if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+		start &= PAGE_MASK;
+		end = start + size;
+		WARN_ON_ONCE(1);
+	}
+
+	if (!is_module_address(start) || !is_module_address(end - 1))
+		return -EINVAL;
+
+	data.set_mask = set_mask;
+	data.clear_mask = clear_mask;
+
+	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+					&data);
+
+	flush_tlb_kernel_range(start, end);
+	return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(L_PTE_RDONLY),
+					__pgprot(0));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(0),
+					__pgprot(L_PTE_RDONLY));
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(L_PTE_XN),
+					__pgprot(0));
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(0),
+					__pgprot(L_PTE_XN));
+}
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b3a9478..8b4ee5e 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -270,7 +270,6 @@
 /* Auxiliary Debug Modes Control 1 Register */
 #define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */
 #define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */
-#define PJ4B_BCK_OFF_STREX (1 << 5) /* Enable the back off of STREX instr */
 #define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */
 
 /* Auxiliary Debug Modes Control 2 Register */
@@ -293,7 +292,6 @@
 	/* Auxiliary Debug Modes Control 1 Register */
 	mrc	p15, 1,	r0, c15, c1, 1
 	orr     r0, r0, #PJ4B_CLEAN_LINE
-	orr     r0, r0, #PJ4B_BCK_OFF_STREX
 	orr     r0, r0, #PJ4B_INTER_PARITY
 	bic	r0, r0, #PJ4B_STATIC_BP
 	mcr	p15, 1,	r0, c15, c1, 1
@@ -593,9 +591,10 @@
 	/*
 	 * Some Krait processors don't indicate support for SDIV and UDIV
 	 * instructions in the ARM instruction set, even though they actually
-	 * do support them.
+	 * do support them. They also don't indicate support for fused multiply
+	 * instructions even though they actually do support them.
 	 */
-	__v7_proc __v7_setup, hwcaps = HWCAP_IDIV
+	__v7_proc __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4
 	.size	__krait_proc_info, . - __krait_proc_info
 
 	/*
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 23259f10..afa2b3c 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -535,7 +535,7 @@
 	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mrc	p15, 0, r6, c13, c0, 0	@ PID
 	mrc	p15, 0, r7, c3, c0, 0	@ domain ID
-	mrc	p15, 0, r8, c1, c1, 0	@ auxiliary control reg
+	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
 	mrc	p15, 0, r9, c1, c0, 0	@ control reg
 	bic	r4, r4, #2		@ clear frequency change bit
 	stmia	r0, {r4 - r9}		@ store cp regs
@@ -552,7 +552,7 @@
 	mcr	p15, 0, r6, c13, c0, 0	@ PID
 	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
 	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
-	mcr	p15, 0, r8, c1, c1, 0	@ auxiliary control reg
+	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
 	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_xscale_do_resume)
diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c
index 4e729f0..ec717c1 100644
--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -86,20 +86,20 @@
 static int __init fpe_init(void)
 {
 	if (sizeof(FPA11) > sizeof(union fp_state)) {
-		printk(KERN_ERR "nwfpe: bad structure size\n");
+		pr_err("nwfpe: bad structure size\n");
 		return -EINVAL;
 	}
 
 	if (sizeof(FPREG) != 12) {
-		printk(KERN_ERR "nwfpe: bad register size\n");
+		pr_err("nwfpe: bad register size\n");
 		return -EINVAL;
 	}
 	if (fpe_type[0] && strcmp(fpe_type, "nwfpe"))
 		return 0;
 
 	/* Display title, version and copyright information. */
-	printk(KERN_WARNING "NetWinder Floating Point Emulator V0.97 ("
-	       NWFPE_BITS " precision)\n");
+	pr_info("NetWinder Floating Point Emulator V0.97 ("
+	        NWFPE_BITS " precision)\n");
 
 	thread_register_notifier(&nwfpe_notifier_block);
 
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index cda654c..f74a8f7 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -197,6 +197,12 @@
 	tst	r5, #FPSCR_IXE
 	bne	process_exception
 
+	tst	r5, #FPSCR_LENGTH_MASK
+	beq	skip
+	orr	r1, r1, #FPEXC_DEX
+	b	process_exception
+skip:
+
 	@ Fall into hand on to next handler - appropriate coproc instr
 	@ not recognised by VFP
 
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 2f37e1d..f6e4d56 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -738,63 +738,73 @@
 	vfp_vector = vfp_null_entry;
 
 	pr_info("VFP support v0.3: ");
-	if (VFP_arch)
+	if (VFP_arch) {
 		pr_cont("not present\n");
-	else if (vfpsid & FPSID_NODOUBLE) {
-		pr_cont("no double precision support\n");
-	} else {
-		hotcpu_notifier(vfp_hotplug, 0);
-
-		VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT;  /* Extract the architecture version */
-		pr_cont("implementor %02x architecture %d part %02x variant %x rev %x\n",
-			(vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT,
-			(vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT,
-			(vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT,
-			(vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT,
-			(vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT);
-
-		vfp_vector = vfp_support_entry;
-
-		thread_register_notifier(&vfp_notifier_block);
-		vfp_pm_init();
-
-		/*
-		 * We detected VFP, and the support code is
-		 * in place; report VFP support to userspace.
-		 */
-		elf_hwcap |= HWCAP_VFP;
-#ifdef CONFIG_VFPv3
-		if (VFP_arch >= 2) {
-			elf_hwcap |= HWCAP_VFPv3;
-
-			/*
-			 * Check for VFPv3 D16 and VFPv4 D16.  CPUs in
-			 * this configuration only have 16 x 64bit
-			 * registers.
-			 */
-			if (((fmrx(MVFR0) & MVFR0_A_SIMD_MASK)) == 1)
-				elf_hwcap |= HWCAP_VFPv3D16; /* also v4-D16 */
-			else
-				elf_hwcap |= HWCAP_VFPD32;
-		}
-#endif
+		return 0;
+	/* Extract the architecture on CPUID scheme */
+	} else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
+		VFP_arch = vfpsid & FPSID_CPUID_ARCH_MASK;
+		VFP_arch >>= FPSID_ARCH_BIT;
 		/*
 		 * Check for the presence of the Advanced SIMD
 		 * load/store instructions, integer and single
 		 * precision floating point operations. Only check
 		 * for NEON if the hardware has the MVFR registers.
 		 */
-		if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
-#ifdef CONFIG_NEON
-			if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
-				elf_hwcap |= HWCAP_NEON;
-#endif
-#ifdef CONFIG_VFPv3
+		if (IS_ENABLED(CONFIG_NEON) &&
+		   (fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+			elf_hwcap |= HWCAP_NEON;
+
+		if (IS_ENABLED(CONFIG_VFPv3)) {
+			u32 mvfr0 = fmrx(MVFR0);
+			if (((mvfr0 & MVFR0_DP_MASK) >> MVFR0_DP_BIT) == 0x2 ||
+			    ((mvfr0 & MVFR0_SP_MASK) >> MVFR0_SP_BIT) == 0x2) {
+				elf_hwcap |= HWCAP_VFPv3;
+				/*
+				 * Check for VFPv3 D16 and VFPv4 D16.  CPUs in
+				 * this configuration only have 16 x 64bit
+				 * registers.
+				 */
+				if ((mvfr0 & MVFR0_A_SIMD_MASK) == 1)
+					/* also v4-D16 */
+					elf_hwcap |= HWCAP_VFPv3D16;
+				else
+					elf_hwcap |= HWCAP_VFPD32;
+			}
+
 			if ((fmrx(MVFR1) & 0xf0000000) == 0x10000000)
 				elf_hwcap |= HWCAP_VFPv4;
-#endif
 		}
+	/* Extract the architecture version on pre-cpuid scheme */
+	} else {
+		if (vfpsid & FPSID_NODOUBLE) {
+			pr_cont("no double precision support\n");
+			return 0;
+		}
+
+		VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT;
 	}
+
+	hotcpu_notifier(vfp_hotplug, 0);
+
+	vfp_vector = vfp_support_entry;
+
+	thread_register_notifier(&vfp_notifier_block);
+	vfp_pm_init();
+
+	/*
+	 * We detected VFP, and the support code is
+	 * in place; report VFP support to userspace.
+	 */
+	elf_hwcap |= HWCAP_VFP;
+
+	pr_cont("implementor %02x architecture %d part %02x variant %x rev %x\n",
+		(vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT,
+		VFP_arch,
+		(vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT,
+		(vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT,
+		(vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT);
+
 	return 0;
 }
 
diff --git a/arch/arm/vfp/vfpsingle.c b/arch/arm/vfp/vfpsingle.c
index 4f96c16..f0465ba 100644
--- a/arch/arm/vfp/vfpsingle.c
+++ b/arch/arm/vfp/vfpsingle.c
@@ -290,7 +290,7 @@
 	u32 z, a;
 
 	if ((significand & 0xc0000000) != 0x40000000) {
-		printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
+		pr_warn("VFP: estimate_sqrt: invalid significand\n");
 	}
 
 	a = significand << 1;
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 47bbdc1..f8e3bb4 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -95,8 +95,12 @@
 	struct amba_device *pcdev = to_amba_device(dev);
 	int ret = pm_generic_runtime_suspend(dev);
 
-	if (ret == 0 && dev->driver)
-		clk_disable_unprepare(pcdev->pclk);
+	if (ret == 0 && dev->driver) {
+		if (pm_runtime_is_irq_safe(dev))
+			clk_disable(pcdev->pclk);
+		else
+			clk_disable_unprepare(pcdev->pclk);
+	}
 
 	return ret;
 }
@@ -107,7 +111,10 @@
 	int ret;
 
 	if (dev->driver) {
-		ret = clk_prepare_enable(pcdev->pclk);
+		if (pm_runtime_is_irq_safe(dev))
+			ret = clk_enable(pcdev->pclk);
+		else
+			ret = clk_prepare_enable(pcdev->pclk);
 		/* Failure is probably fatal to the system, but... */
 		if (ret)
 			return ret;
@@ -115,7 +122,7 @@
 
 	return pm_generic_runtime_resume(dev);
 }
-#endif
+#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops amba_pm = {
 	.suspend	= pm_generic_suspend,
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 4839bfa..2d324f7 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -27,6 +27,7 @@
 #include <linux/of.h>
 #include <linux/of_dma.h>
 #include <linux/err.h>
+#include <linux/pm_runtime.h>
 
 #include "dmaengine.h"
 #define PL330_MAX_CHAN		8
@@ -265,6 +266,9 @@
 
 #define NR_DEFAULT_DESC	16
 
+/* Delay for runtime PM autosuspend, ms */
+#define PL330_AUTOSUSPEND_DELAY 20
+
 /* Populated by the PL330 core driver for DMA API driver's info */
 struct pl330_config {
 	u32	periph_id;
@@ -1958,6 +1962,7 @@
 	struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data;
 	struct dma_pl330_desc *desc, *_dt;
 	unsigned long flags;
+	bool power_down = false;
 
 	spin_lock_irqsave(&pch->lock, flags);
 
@@ -1972,10 +1977,17 @@
 	/* Try to submit a req imm. next to the last completed cookie */
 	fill_queue(pch);
 
-	/* Make sure the PL330 Channel thread is active */
-	spin_lock(&pch->thread->dmac->lock);
-	_start(pch->thread);
-	spin_unlock(&pch->thread->dmac->lock);
+	if (list_empty(&pch->work_list)) {
+		spin_lock(&pch->thread->dmac->lock);
+		_stop(pch->thread);
+		spin_unlock(&pch->thread->dmac->lock);
+		power_down = true;
+	} else {
+		/* Make sure the PL330 Channel thread is active */
+		spin_lock(&pch->thread->dmac->lock);
+		_start(pch->thread);
+		spin_unlock(&pch->thread->dmac->lock);
+	}
 
 	while (!list_empty(&pch->completed_list)) {
 		dma_async_tx_callback callback;
@@ -1990,6 +2002,12 @@
 		if (pch->cyclic) {
 			desc->status = PREP;
 			list_move_tail(&desc->node, &pch->work_list);
+			if (power_down) {
+				spin_lock(&pch->thread->dmac->lock);
+				_start(pch->thread);
+				spin_unlock(&pch->thread->dmac->lock);
+				power_down = false;
+			}
 		} else {
 			desc->status = FREE;
 			list_move_tail(&desc->node, &pch->dmac->desc_pool);
@@ -2004,6 +2022,12 @@
 		}
 	}
 	spin_unlock_irqrestore(&pch->lock, flags);
+
+	/* If work list empty, power down */
+	if (power_down) {
+		pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+		pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
+	}
 }
 
 bool pl330_filter(struct dma_chan *chan, void *param)
@@ -2073,6 +2097,7 @@
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
+		pm_runtime_get_sync(pl330->ddma.dev);
 		spin_lock_irqsave(&pch->lock, flags);
 
 		spin_lock(&pl330->lock);
@@ -2099,10 +2124,15 @@
 			dma_cookie_complete(&desc->txd);
 		}
 
+		if (!list_empty(&pch->work_list))
+			pm_runtime_put(pl330->ddma.dev);
+
 		list_splice_tail_init(&pch->submitted_list, &pl330->desc_pool);
 		list_splice_tail_init(&pch->work_list, &pl330->desc_pool);
 		list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
 		spin_unlock_irqrestore(&pch->lock, flags);
+		pm_runtime_mark_last_busy(pl330->ddma.dev);
+		pm_runtime_put_autosuspend(pl330->ddma.dev);
 		break;
 	case DMA_SLAVE_CONFIG:
 		slave_config = (struct dma_slave_config *)arg;
@@ -2138,6 +2168,7 @@
 
 	tasklet_kill(&pch->task);
 
+	pm_runtime_get_sync(pch->dmac->ddma.dev);
 	spin_lock_irqsave(&pch->lock, flags);
 
 	pl330_release_channel(pch->thread);
@@ -2147,6 +2178,8 @@
 		list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
 
 	spin_unlock_irqrestore(&pch->lock, flags);
+	pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+	pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
 }
 
 static enum dma_status
@@ -2162,6 +2195,15 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&pch->lock, flags);
+	if (list_empty(&pch->work_list)) {
+		/*
+		 * Warn on nothing pending. Empty submitted_list may
+		 * break our pm_runtime usage counter as it is
+		 * updated on work_list emptiness status.
+		 */
+		WARN_ON(list_empty(&pch->submitted_list));
+		pm_runtime_get_sync(pch->dmac->ddma.dev);
+	}
 	list_splice_tail_init(&pch->submitted_list, &pch->work_list);
 	spin_unlock_irqrestore(&pch->lock, flags);
 
@@ -2585,6 +2627,46 @@
 	return 0;
 }
 
+/*
+ * Runtime PM callbacks are provided by amba/bus.c driver.
+ *
+ * It is assumed here that IRQ safe runtime PM is chosen in probe and amba
+ * bus driver will only disable/enable the clock in runtime PM callbacks.
+ */
+static int __maybe_unused pl330_suspend(struct device *dev)
+{
+	struct amba_device *pcdev = to_amba_device(dev);
+
+	pm_runtime_disable(dev);
+
+	if (!pm_runtime_status_suspended(dev)) {
+		/* amba did not disable the clock */
+		amba_pclk_disable(pcdev);
+	}
+	amba_pclk_unprepare(pcdev);
+
+	return 0;
+}
+
+static int __maybe_unused pl330_resume(struct device *dev)
+{
+	struct amba_device *pcdev = to_amba_device(dev);
+	int ret;
+
+	ret = amba_pclk_prepare(pcdev);
+	if (ret)
+		return ret;
+
+	if (!pm_runtime_status_suspended(dev))
+		ret = amba_pclk_enable(pcdev);
+
+	pm_runtime_enable(dev);
+
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(pl330_pm, pl330_suspend, pl330_resume);
+
 static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
 {
@@ -2738,6 +2820,12 @@
 		pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan,
 		pcfg->num_peri, pcfg->num_events);
 
+	pm_runtime_irq_safe(&adev->dev);
+	pm_runtime_use_autosuspend(&adev->dev);
+	pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY);
+	pm_runtime_mark_last_busy(&adev->dev);
+	pm_runtime_put_autosuspend(&adev->dev);
+
 	return 0;
 probe_err3:
 	/* Idle the DMAC */
@@ -2764,6 +2852,8 @@
 	struct pl330_dmac *pl330 = amba_get_drvdata(adev);
 	struct dma_pl330_chan *pch, *_p;
 
+	pm_runtime_get_noresume(pl330->ddma.dev);
+
 	if (adev->dev.of_node)
 		of_dma_controller_free(adev->dev.of_node);
 
@@ -2802,6 +2892,7 @@
 	.drv = {
 		.owner = THIS_MODULE,
 		.name = "dma-pl330",
+		.pm = &pl330_pm,
 	},
 	.id_table = pl330_ids,
 	.probe = pl330_probe,
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index ff8a027..d2ab060 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -93,6 +93,7 @@
 	for (i = 0; i < sinfo->nskt; i++)
 		soc_pcmcia_remove_one(&sinfo->skt[i]);
 
+	clk_put(sinfo->clk);
 	kfree(sinfo);
 	return 0;
 }
diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c
index 65b02c3..7bae7e5 100644
--- a/drivers/pcmcia/sa1111_generic.c
+++ b/drivers/pcmcia/sa1111_generic.c
@@ -145,6 +145,12 @@
 			return -ENOMEM;
 
 		s->soc.nr = ops->first + i;
+		s->soc.clk = clk_get(&dev->dev, NULL);
+		if (IS_ERR(s->soc.clk)) {
+			ret = PTR_ERR(s->soc.clk);
+			kfree(s);
+			return ret;
+		}
 		soc_pcmcia_init_one(&s->soc, ops, &dev->dev);
 		s->dev = dev;
 		if (s->soc.nr) {
@@ -220,6 +226,7 @@
 	for (; s; s = next) {
 		next = s->next;
 		soc_pcmcia_remove_one(&s->soc);
+		clk_put(s->soc.clk);
 		kfree(s);
 	}
 
diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c
index 54d3089..cf6de2c 100644
--- a/drivers/pcmcia/sa11xx_base.c
+++ b/drivers/pcmcia/sa11xx_base.c
@@ -135,14 +135,16 @@
 static int
 sa1100_pcmcia_set_timing(struct soc_pcmcia_socket *skt)
 {
-	return sa1100_pcmcia_set_mecr(skt, cpufreq_get(0));
+	unsigned long clk = clk_get_rate(skt->clk);
+
+	return sa1100_pcmcia_set_mecr(skt, clk / 1000);
 }
 
 static int
 sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf)
 {
 	struct soc_pcmcia_timing timing;
-	unsigned int clock = cpufreq_get(0);
+	unsigned int clock = clk_get_rate(skt->clk);
 	unsigned long mecr = MECR;
 	char *p = buf;
 
@@ -218,6 +220,11 @@
 	struct skt_dev_info *sinfo;
 	struct soc_pcmcia_socket *skt;
 	int i, ret = 0;
+	struct clk *clk;
+
+	clk = clk_get(dev, NULL);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
 
 	sa11xx_drv_pcmcia_ops(ops);
 
@@ -226,12 +233,14 @@
 		return -ENOMEM;
 
 	sinfo->nskt = nr;
+	sinfo->clk = clk;
 
 	/* Initialize processor specific parameters */
 	for (i = 0; i < nr; i++) {
 		skt = &sinfo->skt[i];
 
 		skt->nr = first + i;
+		skt->clk = clk;
 		soc_pcmcia_init_one(skt, ops, dev);
 
 		ret = sa11xx_drv_pcmcia_add_one(skt);
@@ -242,6 +251,7 @@
 	if (ret) {
 		while (--i >= 0)
 			soc_pcmcia_remove_one(&sinfo->skt[i]);
+		clk_put(clk);
 		kfree(sinfo);
 	} else {
 		dev_set_drvdata(dev, sinfo);
diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c
index a2bc6ee..933f465 100644
--- a/drivers/pcmcia/soc_common.c
+++ b/drivers/pcmcia/soc_common.c
@@ -120,6 +120,8 @@
 
 	if (skt->ops->hw_shutdown)
 		skt->ops->hw_shutdown(skt);
+
+	clk_disable_unprepare(skt->clk);
 }
 
 static void soc_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
@@ -131,6 +133,8 @@
 {
 	int ret = 0, i;
 
+	clk_prepare_enable(skt->clk);
+
 	if (skt->ops->hw_init) {
 		ret = skt->ops->hw_init(skt);
 		if (ret)
diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c
index 9690216..c0abe27 100644
--- a/drivers/video/fbdev/sa1100fb.c
+++ b/drivers/video/fbdev/sa1100fb.c
@@ -178,6 +178,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mutex.h>
 #include <linux/io.h>
+#include <linux/clk.h>
 
 #include <video/sa1100fb.h>
 
@@ -416,9 +417,9 @@
 		var->transp.offset);
 
 #ifdef CONFIG_CPU_FREQ
-	dev_dbg(fbi->dev, "dma period = %d ps, clock = %d kHz\n",
+	dev_dbg(fbi->dev, "dma period = %d ps, clock = %ld kHz\n",
 		sa1100fb_display_dma_period(var),
-		cpufreq_get(smp_processor_id()));
+		clk_get_rate(fbi->clk) / 1000);
 #endif
 
 	return 0;
@@ -592,9 +593,10 @@
  * Calculate the PCD value from the clock rate (in picoseconds).
  * We take account of the PPCR clock setting.
  */
-static inline unsigned int get_pcd(unsigned int pixclock, unsigned int cpuclock)
+static inline unsigned int get_pcd(struct sa1100fb_info *fbi,
+		unsigned int pixclock)
 {
-	unsigned int pcd = cpuclock / 100;
+	unsigned int pcd = clk_get_rate(fbi->clk) / 100 / 1000;
 
 	pcd *= pixclock;
 	pcd /= 10000000;
@@ -673,7 +675,7 @@
 		LCCR2_BegFrmDel(var->upper_margin) +
 		LCCR2_EndFrmDel(var->lower_margin);
 
-	pcd = get_pcd(var->pixclock, cpufreq_get(0));
+	pcd = get_pcd(fbi, var->pixclock);
 	new_regs.lccr3 = LCCR3_PixClkDiv(pcd) | fbi->inf->lccr3 |
 		(var->sync & FB_SYNC_HOR_HIGH_ACT ? LCCR3_HorSnchH : LCCR3_HorSnchL) |
 		(var->sync & FB_SYNC_VERT_HIGH_ACT ? LCCR3_VrtSnchH : LCCR3_VrtSnchL);
@@ -787,6 +789,9 @@
 	fbi->palette_cpu[0] &= 0xcfff;
 	fbi->palette_cpu[0] |= palette_pbs(&fbi->fb.var);
 
+	/* enable LCD controller clock */
+	clk_prepare_enable(fbi->clk);
+
 	/* Sequence from 11.7.10 */
 	writel_relaxed(fbi->reg_lccr3, fbi->base + LCCR3);
 	writel_relaxed(fbi->reg_lccr2, fbi->base + LCCR2);
@@ -831,6 +836,9 @@
 
 	schedule_timeout(20 * HZ / 1000);
 	remove_wait_queue(&fbi->ctrlr_wait, &wait);
+
+	/* disable LCD controller clock */
+	clk_disable_unprepare(fbi->clk);
 }
 
 /*
@@ -1009,7 +1017,6 @@
 			 void *data)
 {
 	struct sa1100fb_info *fbi = TO_INF(nb, freq_transition);
-	struct cpufreq_freqs *f = data;
 	u_int pcd;
 
 	switch (val) {
@@ -1018,7 +1025,7 @@
 		break;
 
 	case CPUFREQ_POSTCHANGE:
-		pcd = get_pcd(fbi->fb.var.pixclock, f->new);
+		pcd = get_pcd(fbi, fbi->fb.var.pixclock);
 		fbi->reg_lccr3 = (fbi->reg_lccr3 & ~0xff) | LCCR3_PixClkDiv(pcd);
 		set_ctrlr_state(fbi, C_ENABLE_CLKCHANGE);
 		break;
@@ -1225,6 +1232,13 @@
 	if (!fbi)
 		goto failed;
 
+	fbi->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(fbi->clk)) {
+		ret = PTR_ERR(fbi->clk);
+		fbi->clk = NULL;
+		goto failed;
+	}
+
 	fbi->base = ioremap(res->start, resource_size(res));
 	if (!fbi->base)
 		goto failed;
@@ -1277,6 +1291,8 @@
  failed:
 	if (fbi)
 		iounmap(fbi->base);
+	if (fbi->clk)
+		clk_put(fbi->clk);
 	kfree(fbi);
 	release_mem_region(res->start, resource_size(res));
 	return ret;
diff --git a/drivers/video/fbdev/sa1100fb.h b/drivers/video/fbdev/sa1100fb.h
index fc5d429..0139d13 100644
--- a/drivers/video/fbdev/sa1100fb.h
+++ b/drivers/video/fbdev/sa1100fb.h
@@ -68,6 +68,7 @@
 #endif
 
 	const struct sa1100fb_mach_info *inf;
+	struct clk *clk;
 };
 
 #define TO_INF(ptr,member)	container_of(ptr,struct sa1100fb_info,member)
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index c324f57..ac02f9b 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -97,6 +97,16 @@
 #define amba_pclk_disable(d)	\
 	do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0)
 
+static inline int amba_pclk_prepare(struct amba_device *dev)
+{
+	return clk_prepare(dev->pclk);
+}
+
+static inline void amba_pclk_unprepare(struct amba_device *dev)
+{
+	clk_unprepare(dev->pclk);
+}
+
 /* Some drivers don't use the struct amba_device */
 #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
 #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 367f49b..44d74f0 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -128,6 +128,11 @@
 	ACCESS_ONCE(dev->power.last_busy) = jiffies;
 }
 
+static inline bool pm_runtime_is_irq_safe(struct device *dev)
+{
+	return dev->power.irq_safe;
+}
+
 #else /* !CONFIG_PM_RUNTIME */
 
 static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
@@ -167,6 +172,7 @@
 
 static inline void pm_runtime_no_callbacks(struct device *dev) {}
 static inline void pm_runtime_irq_safe(struct device *dev) {}
+static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; }
 
 static inline bool pm_runtime_callbacks_present(struct device *dev) { return false; }
 static inline void pm_runtime_mark_last_busy(struct device *dev) {}