Merge branch 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm

Pull ARM updates from Russell King:

 - add support for ELF fdpic binaries on both MMU and noMMU platforms

 - linker script cleanups

 - support for compressed .data section for XIP images

 - discard memblock arrays when possible

 - various cleanups

 - atomic DMA pool updates

 - better diagnostics of missing/corrupt device tree

 - export information to allow userspace kexec tool to place images more
   inteligently, so that the device tree isn't overwritten by the
   booting kernel

 - make early_printk more efficient on semihosted systems

 - noMMU cleanups

 - SA1111 PCMCIA update in preparation for further cleanups

* 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm: (38 commits)
  ARM: 8719/1: NOMMU: work around maybe-uninitialized warning
  ARM: 8717/2: debug printch/printascii: translate '\n' to "\r\n" not "\n\r"
  ARM: 8713/1: NOMMU: Support MPU in XIP configuration
  ARM: 8712/1: NOMMU: Use more MPU regions to cover memory
  ARM: 8711/1: V7M: Add support for MPU to M-class
  ARM: 8710/1: Kconfig: Kill CONFIG_VECTORS_BASE
  ARM: 8709/1: NOMMU: Disallow MPU for XIP
  ARM: 8708/1: NOMMU: Rework MPU to be mostly done in C
  ARM: 8707/1: NOMMU: Update MPU accessors to use cp15 helpers
  ARM: 8706/1: NOMMU: Move out MPU setup in separate module
  ARM: 8702/1: head-common.S: Clear lr before jumping to start_kernel()
  ARM: 8705/1: early_printk: use printascii() rather than printch()
  ARM: 8703/1: debug.S: move hexbuf to a writable section
  ARM: add additional table to compressed kernel
  ARM: decompressor: fix BSS size calculation
  pcmcia: sa1111: remove special sa1111 mmio accessors
  pcmcia: sa1111: use sa1111_get_irq() to obtain IRQ resources
  ARM: better diagnostics with missing/corrupt dtb
  ARM: 8699/1: dma-mapping: Remove init_dma_coherent_pool_size()
  ARM: 8698/1: dma-mapping: Mark atomic_pool as __ro_after_init
  ..
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d1346a1..4f3ee0e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -3,6 +3,7 @@
 	bool
 	default y
 	select ARCH_CLOCKSOURCE_DATA
+	select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
@@ -240,15 +241,6 @@
 config ARCH_MTD_XIP
 	bool
 
-config VECTORS_BASE
-	hex
-	default 0xffff0000 if MMU || CPU_HIGH_VECTOR
-	default DRAM_BASE if REMAP_VECTORS_TO_RAM
-	default 0x00000000
-	help
-	  The base address of exception vectors.  This must be two pages
-	  in size.
-
 config ARM_PATCH_PHYS_VIRT
 	bool "Patch physical to virtual translations at runtime" if EMBEDDED
 	default y
@@ -2006,6 +1998,17 @@
 	  be linked for and stored to.  This address is dependent on your
 	  own flash usage.
 
+config XIP_DEFLATED_DATA
+	bool "Store kernel .data section compressed in ROM"
+	depends on XIP_KERNEL
+	select ZLIB_INFLATE
+	help
+	  Before the kernel is actually executed, its .data section has to be
+	  copied to RAM from ROM. This option allows for storing that data
+	  in compressed form and decompressed to RAM rather than merely being
+	  copied, saving some precious ROM space. A possible drawback is a
+	  slightly longer boot delay.
+
 config KEXEC
 	bool "Kexec system call (EXPERIMENTAL)"
 	depends on (!SMP || PM_SLEEP_SMP)
diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu
index 22f34c4..1168a03 100644
--- a/arch/arm/Kconfig-nommu
+++ b/arch/arm/Kconfig-nommu
@@ -53,8 +53,8 @@
 
 config ARM_MPU
        bool 'Use the ARM v7 PMSA Compliant MPU'
-       depends on CPU_V7
-       default y
+       depends on CPU_V7 || CPU_V7M
+       default y if CPU_V7
        help
          Some ARM systems without an MMU have instead a Memory Protection
          Unit (MPU) that defines the type and permissions for regions of
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 50f8d1b..a3af4dc 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -31,8 +31,19 @@
 
 ifeq ($(CONFIG_XIP_KERNEL),y)
 
+cmd_deflate_xip_data = $(CONFIG_SHELL) -c \
+	'$(srctree)/$(src)/deflate_xip_data.sh $< $@ || { rm -f $@; false; }'
+
+ifeq ($(CONFIG_XIP_DEFLATED_DATA),y)
+quiet_cmd_mkxip = XIPZ    $@
+cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data)
+else
+quiet_cmd_mkxip = $(quiet_cmd_objcopy)
+cmd_mkxip = $(cmd_objcopy)
+endif
+
 $(obj)/xipImage: vmlinux FORCE
-	$(call if_changed,objcopy)
+	$(call if_changed,mkxip)
 	@$(kecho) '  Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
 
 $(obj)/Image $(obj)/zImage: FORCE
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index a588923..45a6b9b 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -117,8 +117,11 @@
 asflags-y := -DZIMAGE
 
 # Supply kernel BSS size to the decompressor via a linker symbol.
-KBSS_SZ = $(shell $(CROSS_COMPILE)size $(obj)/../../../../vmlinux | \
-		awk 'END{print $$3}')
+KBSS_SZ = $(shell $(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \
+		perl -e 'while (<>) { \
+			$$bss_start=hex($$1) if /^([[:xdigit:]]+) B __bss_start$$/; \
+			$$bss_end=hex($$1) if /^([[:xdigit:]]+) B __bss_stop$$/; \
+		}; printf "%d\n", $$bss_end - $$bss_start;')
 LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
 # Supply ZRELADDR to the decompressor via a linker symbol.
 ifneq ($(CONFIG_AUTO_ZRELADDR),y)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 8a75687..45c8823 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -143,6 +143,8 @@
 		.word	_magic_start	@ absolute load/run zImage address
 		.word	_magic_end	@ zImage end address
 		.word	0x04030201	@ endianness flag
+		.word	0x45454545	@ another magic number to indicate
+		.word	_magic_table	@ additional data table
 
 		__EFI_HEADER
 1:
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
index 7d06aa1..e6bf677 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -44,12 +44,22 @@
     *(.glue_7t)
     *(.glue_7)
   }
+  .table : ALIGN(4) {
+    _table_start = .;
+    LONG(ZIMAGE_MAGIC(2))
+    LONG(ZIMAGE_MAGIC(0x5a534c4b))
+    LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start))
+    LONG(ZIMAGE_MAGIC(_kernel_bss_size))
+    LONG(0)
+    _table_end = .;
+  }
   .rodata : {
     *(.rodata)
     *(.rodata.*)
   }
   .piggydata : {
     *(.piggydata)
+    __piggy_size_addr = . - 4;
   }
 
   . = ALIGN(4);
@@ -97,6 +107,7 @@
   _magic_sig = ZIMAGE_MAGIC(0x016f2818);
   _magic_start = ZIMAGE_MAGIC(_start);
   _magic_end = ZIMAGE_MAGIC(_edata);
+  _magic_table = ZIMAGE_MAGIC(_table_start - _start);
 
   . = BSS_START;
   __bss_start = .;
diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh
new file mode 100755
index 0000000..1189598
--- /dev/null
+++ b/arch/arm/boot/deflate_xip_data.sh
@@ -0,0 +1,64 @@
+#!/bin/sh
+
+# XIP kernel .data segment compressor
+#
+# Created by:	Nicolas Pitre, August 2017
+# Copyright:	(C) 2017  Linaro Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+# This script locates the start of the .data section in xipImage and
+# substitutes it with a compressed version. The needed offsets are obtained
+# from symbol addresses in vmlinux. It is expected that .data extends to
+# the end of xipImage.
+
+set -e
+
+VMLINUX="$1"
+XIPIMAGE="$2"
+
+DD="dd status=none"
+
+# Use "make V=1" to debug this script.
+case "$KBUILD_VERBOSE" in
+*1*)
+	set -x
+	;;
+esac
+
+sym_val() {
+	# extract hex value for symbol in $1
+	local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}")
+	[ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; }
+	# convert from hex to decimal
+	echo $((0x$val))
+}
+
+__data_loc=$(sym_val __data_loc)
+_edata_loc=$(sym_val _edata_loc)
+base_offset=$(sym_val _xiprom)
+
+# convert to file based offsets
+data_start=$(($__data_loc - $base_offset))
+data_end=$(($_edata_loc - $base_offset))
+
+# Make sure data occupies the last part of the file.
+file_end=$(stat -c "%s" "$XIPIMAGE")
+if [ "$file_end" != "$data_end" ]; then
+	printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \
+	       $(($file_end + $base_offset)) $_edata_loc 2>&1
+	exit 1;
+fi
+
+# be ready to clean up
+trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3
+
+# substitute the data section by a compressed version
+$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp"
+$DD if="$XIPIMAGE"  skip=$data_start iflag=skip_bytes |
+gzip -9 >> "$XIPIMAGE.tmp"
+
+# replace kernel binary
+mv -f "$XIPIMAGE.tmp" "$XIPIMAGE"
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 4419333..cb54642 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -174,6 +174,11 @@
 	return read_cpuid(CPUID_CACHETYPE);
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_mputype(void)
+{
+	return read_cpuid(CPUID_MPUIR);
+}
+
 #elif defined(CONFIG_CPU_V7M)
 
 static inline unsigned int __attribute_const__ read_cpuid_id(void)
@@ -186,6 +191,11 @@
 	return readl(BASEADDR_V7M_SCB + V7M_SCB_CTR);
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_mputype(void)
+{
+	return readl(BASEADDR_V7M_SCB + MPU_TYPE);
+}
+
 #else /* ifdef CONFIG_CPU_CP15 / elif defined(CONFIG_CPU_V7M) */
 
 static inline unsigned int __attribute_const__ read_cpuid_id(void)
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 3ca1199..daf8374 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -191,13 +191,6 @@
 			unsigned long attrs);
 
 /*
- * This can be called during early boot to increase the size of the atomic
- * coherent DMA pool above the default value of 256KiB. It must be called
- * before postcore_initcall.
- */
-extern void __init init_dma_coherent_pool_size(unsigned long size);
-
-/*
  * For SA-1111, IXP425, and ADI systems  the dma-mapping functions are "magic"
  * and utilize bounce buffers as needed to work around limited DMA windows.
  *
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 8c5ca92..b078d99 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -101,10 +101,15 @@
 extern int elf_check_arch(const struct elf32_hdr *);
 #define elf_check_arch elf_check_arch
 
+#define ELFOSABI_ARM_FDPIC  65	/* ARM FDPIC platform */
+#define elf_check_fdpic(x)  ((x)->e_ident[EI_OSABI] == ELFOSABI_ARM_FDPIC)
+#define elf_check_const_displacement(x)  ((x)->e_flags & EF_ARM_PIC)
+#define ELF_FDPIC_CORE_EFLAGS  0
+
 #define vmcore_elf64_check_arch(x) (0)
 
-extern int arm_elf_read_implies_exec(const struct elf32_hdr *, int);
-#define elf_read_implies_exec(ex,stk) arm_elf_read_implies_exec(&(ex), stk)
+extern int arm_elf_read_implies_exec(int);
+#define elf_read_implies_exec(ex,stk) arm_elf_read_implies_exec(stk)
 
 struct task_struct;
 int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
@@ -121,6 +126,13 @@
    have no such handler.  */
 #define ELF_PLAT_INIT(_r, load_addr)	(_r)->ARM_r0 = 0
 
+#define ELF_FDPIC_PLAT_INIT(_r, _exec_map_addr, _interp_map_addr, dynamic_addr) \
+	do { \
+		(_r)->ARM_r7 = _exec_map_addr; \
+		(_r)->ARM_r8 = _interp_map_addr; \
+		(_r)->ARM_r9 = dynamic_addr; \
+	} while(0)
+
 extern void elf_set_personality(const struct elf32_hdr *);
 #define SET_PERSONALITY(ex)	elf_set_personality(&(ex))
 
diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index b03d3fa..eb4e420 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -19,7 +19,6 @@
 	} while (0)
 
 extern pte_t *pkmap_page_table;
-extern pte_t *fixmap_page_table;
 
 extern void *kmap_high(struct page *page);
 extern void kunmap_high(struct page *page);
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 65669b9..1592a42 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -15,6 +15,10 @@
 #ifdef CONFIG_VDSO
 	unsigned long	vdso;
 #endif
+#ifdef CONFIG_BINFMT_ELF_FDPIC
+	unsigned long	exec_fdpic_loadmap;
+	unsigned long	interp_fdpic_loadmap;
+#endif
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
@@ -34,6 +38,10 @@
  */
 typedef struct {
 	unsigned long	end_brk;
+#ifdef CONFIG_BINFMT_ELF_FDPIC
+	unsigned long	exec_fdpic_loadmap;
+	unsigned long	interp_fdpic_loadmap;
+#endif
 } mm_context_t;
 
 #endif
diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
index 0c3f774..6d1491c 100644
--- a/arch/arm/include/asm/mpu.h
+++ b/arch/arm/include/asm/mpu.h
@@ -2,8 +2,6 @@
 #ifndef __ARM_MPU_H
 #define __ARM_MPU_H
 
-#ifdef CONFIG_ARM_MPU
-
 /* MPUIR layout */
 #define MPUIR_nU		1
 #define MPUIR_DREGION		8
@@ -18,6 +16,11 @@
 /* MPU D/I Size Register fields */
 #define MPU_RSR_SZ		1
 #define MPU_RSR_EN		0
+#define MPU_RSR_SD		8
+
+/* Number of subregions (SD) */
+#define MPU_NR_SUBREGS		8
+#define MPU_MIN_SUBREG_SIZE	256
 
 /* The D/I RSR value for an enabled region spanning the whole of memory */
 #define MPU_RSR_ALL_MEM		63
@@ -39,6 +42,7 @@
 #endif
 
 /* Access permission bits of ACR (only define those that we use)*/
+#define MPU_AP_PL1RO_PL0NA	(0x5 << 8)
 #define MPU_AP_PL1RW_PL0RW	(0x3 << 8)
 #define MPU_AP_PL1RW_PL0R0	(0x2 << 8)
 #define MPU_AP_PL1RW_PL0NA	(0x1 << 8)
@@ -47,7 +51,7 @@
 #define MPU_PROBE_REGION	0
 #define MPU_BG_REGION		1
 #define MPU_RAM_REGION		2
-#define MPU_VECTORS_REGION	3
+#define MPU_ROM_REGION		3
 
 /* Maximum number of regions Linux is interested in */
 #define MPU_MAX_REGIONS		16
@@ -65,13 +69,23 @@
 };
 
 struct mpu_rgn_info {
-	u32 mpuir;
+	unsigned int used;
 	struct mpu_rgn rgns[MPU_MAX_REGIONS];
 };
 extern struct mpu_rgn_info mpu_rgn_info;
 
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_ARM_MPU
 
-#endif /* CONFIG_ARM_MPU */
+extern void __init adjust_lowmem_bounds_mpu(void);
+extern void __init mpu_setup(void);
+
+#else
+
+static inline void adjust_lowmem_bounds_mpu(void) {}
+static inline void mpu_setup(void) {}
+
+#endif /* !CONFIG_ARM_MPU */
+
+#endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index c3d5fc1..338cbe0 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -47,15 +47,24 @@
 
 #define INIT_THREAD  {	}
 
-#ifdef CONFIG_MMU
-#define nommu_start_thread(regs) do { } while (0)
-#else
-#define nommu_start_thread(regs) regs->ARM_r10 = current->mm->start_data
-#endif
-
 #define start_thread(regs,pc,sp)					\
 ({									\
+	unsigned long r7, r8, r9;					\
+									\
+	if (IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC)) {			\
+		r7 = regs->ARM_r7;					\
+		r8 = regs->ARM_r8;					\
+		r9 = regs->ARM_r9;					\
+	}								\
 	memset(regs->uregs, 0, sizeof(regs->uregs));			\
+	if (IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) &&			\
+	    current->personality & FDPIC_FUNCPTRS) {			\
+		regs->ARM_r7 = r7;					\
+		regs->ARM_r8 = r8;					\
+		regs->ARM_r9 = r9;					\
+		regs->ARM_r10 = current->mm->start_data;		\
+	} else if (!IS_ENABLED(CONFIG_MMU))				\
+		regs->ARM_r10 = current->mm->start_data;		\
 	if (current->personality & ADDR_LIMIT_32BIT)			\
 		regs->ARM_cpsr = USR_MODE;				\
 	else								\
@@ -65,7 +74,6 @@
 	regs->ARM_cpsr |= PSR_ENDSTATE;					\
 	regs->ARM_pc = pc & ~1;		/* pc */			\
 	regs->ARM_sp = sp;		/* sp */			\
-	nommu_start_thread(regs);					\
 })
 
 /* Forward declaration, a strange C thing */
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 3d6dc8b..709a559 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -60,7 +60,7 @@
  */
 struct secondary_data {
 	union {
-		unsigned long mpu_rgn_szr;
+		struct mpu_rgn_info *mpu_rgn_info;
 		u64 pgdir;
 	};
 	unsigned long swapper_pg_dir;
diff --git a/arch/arm/include/asm/ucontext.h b/arch/arm/include/asm/ucontext.h
index 3f0d95a..5c5e62c 100644
--- a/arch/arm/include/asm/ucontext.h
+++ b/arch/arm/include/asm/ucontext.h
@@ -3,6 +3,7 @@
 #define _ASMARM_UCONTEXT_H
 
 #include <asm/fpstate.h>
+#include <asm/user.h>
 
 /*
  * struct sigcontext only has room for the basic registers, but struct
diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h
index e6d9e29..634e771 100644
--- a/arch/arm/include/asm/v7m.h
+++ b/arch/arm/include/asm/v7m.h
@@ -58,6 +58,16 @@
 #define	V7M_SCB_CCSIDR		0x80	/* Cache size ID register */
 #define	V7M_SCB_CSSELR		0x84	/* Cache size selection register */
 
+/* Memory-mapped MPU registers for M-class */
+#define MPU_TYPE		0x90
+#define MPU_CTRL		0x94
+#define MPU_CTRL_ENABLE		1
+#define MPU_CTRL_PRIVDEFENA	(1 << 2)
+
+#define MPU_RNR			0x98
+#define MPU_RBAR		0x9c
+#define MPU_RASR		0xa0
+
 /* Cache opeartions */
 #define	V7M_SCB_ICIALLU		0x250	/* I-cache invalidate all to PoU */
 #define	V7M_SCB_ICIMVAU		0x258	/* I-cache invalidate by MVA to PoU */
diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h
index b67cda5..3287d79 100644
--- a/arch/arm/include/uapi/asm/ptrace.h
+++ b/arch/arm/include/uapi/asm/ptrace.h
@@ -32,6 +32,10 @@
 #define PTRACE_SETVFPREGS	28
 #define PTRACE_GETHBPREGS	29
 #define PTRACE_SETHBPREGS	30
+#define PTRACE_GETFDPIC		31
+
+#define PTRACE_GETFDPIC_EXEC	0
+#define PTRACE_GETFDPIC_INTERP	1
 
 /*
  * PSR bits
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index 39b2ad9..93ecf8a 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -36,5 +36,6 @@
 #define __ARM_NR_usr26			(__ARM_NR_BASE+3)
 #define __ARM_NR_usr32			(__ARM_NR_BASE+4)
 #define __ARM_NR_set_tls		(__ARM_NR_BASE+5)
+#define __ARM_NR_get_tls		(__ARM_NR_BASE+6)
 
 #endif /* _UAPI__ASM_ARM_UNISTD_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 499f978..b59ac4b 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -88,6 +88,11 @@
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
+# This is executed very early using a temporary stack when no memory allocator
+# nor global data is available. Everything has to be allocated on the stack.
+CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240)
+obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o
+
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
 AFLAGS_hyp-stub.o		:=-Wa,-march=armv7-a
 ifeq ($(CONFIG_ARM_PSCI),y)
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 6080082..f369ece 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -23,11 +23,13 @@
 #include <asm/mach/arch.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
+#include <asm/mpu.h>
 #include <asm/procinfo.h>
 #include <asm/suspend.h>
 #include <asm/vdso_datapage.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
+#include "signal.h"
 
 /*
  * Make sure that the compiler and target are compatible.
@@ -112,6 +114,9 @@
   DEFINE(SVC_ADDR_LIMIT,	offsetof(struct svc_pt_regs, addr_limit));
   DEFINE(SVC_REGS_SIZE,		sizeof(struct svc_pt_regs));
   BLANK();
+  DEFINE(SIGFRAME_RC3_OFFSET,	offsetof(struct sigframe, retcode[3]));
+  DEFINE(RT_SIGFRAME_RC3_OFFSET, offsetof(struct rt_sigframe, sig.retcode[3]));
+  BLANK();
 #ifdef CONFIG_CACHE_L2X0
   DEFINE(L2X0_R_PHY_BASE,	offsetof(struct l2x0_regs, phy_base));
   DEFINE(L2X0_R_AUX_CTRL,	offsetof(struct l2x0_regs, aux_ctrl));
@@ -183,5 +188,15 @@
 #ifdef CONFIG_VDSO
   DEFINE(VDSO_DATA_SIZE,	sizeof(union vdso_data_store));
 #endif
+  BLANK();
+#ifdef CONFIG_ARM_MPU
+  DEFINE(MPU_RNG_INFO_RNGS,	offsetof(struct mpu_rgn_info, rgns));
+  DEFINE(MPU_RNG_INFO_USED,	offsetof(struct mpu_rgn_info, used));
+
+  DEFINE(MPU_RNG_SIZE,		sizeof(struct mpu_rgn));
+  DEFINE(MPU_RGN_DRBAR,		offsetof(struct mpu_rgn, drbar));
+  DEFINE(MPU_RGN_DRSR,		offsetof(struct mpu_rgn, drsr));
+  DEFINE(MPU_RGN_DRACR,		offsetof(struct mpu_rgn, dracr));
+#endif
   return 0; 
 }
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 98fbfd2..c10a3e8 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -196,11 +196,8 @@
 			break;
 		}
 
-	if (!mdesc) {
-		early_print("\nError: unrecognized/unsupported machine ID"
-			    " (r1 = 0x%08x).\n\n", machine_nr);
-		dump_machine_table(); /* does not return */
-	}
+	if (!mdesc)
+		return NULL;
 
 	if (__atags_pointer)
 		tags = phys_to_virt(__atags_pointer);
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index 0a498cb..b795dc2 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -55,7 +55,9 @@
 
 ENTRY(printhex2)
 		mov	r1, #2
-printhex:	adr	r2, hexbuf
+printhex:	adr	r2, hexbuf_rel
+		ldr	r3, [r2]
+		add	r2, r2, r3
 		add	r3, r2, r1
 		mov	r1, #0
 		strb	r1, [r3]
@@ -71,7 +73,11 @@
 		b	printascii
 ENDPROC(printhex2)
 
-hexbuf:		.space 16
+		.pushsection .bss
+hexbuf_addr:	.space 16
+		.popsection
+		.align
+hexbuf_rel:	.long	hexbuf_addr - .
 
 		.ltorg
 
@@ -79,25 +85,28 @@
 
 ENTRY(printascii)
 		addruart_current r3, r1, r2
-		b	2f
-1:		waituart r2, r3
-		senduart r1, r3
-		busyuart r2, r3
-		teq	r1, #'\n'
-		moveq	r1, #'\r'
-		beq	1b
-2:		teq	r0, #0
+1:		teq	r0, #0
 		ldrneb	r1, [r0], #1
 		teqne	r1, #0
-		bne	1b
-		ret	lr
+		reteq	lr
+2:		teq     r1, #'\n'
+		bne	3f
+		mov	r1, #'\r'
+		waituart r2, r3
+		senduart r1, r3
+		busyuart r2, r3
+		mov	r1, #'\n'
+3:		waituart r2, r3
+		senduart r1, r3
+		busyuart r2, r3
+		b	1b
 ENDPROC(printascii)
 
 ENTRY(printch)
 		addruart_current r3, r1, r2
 		mov	r1, r0
 		mov	r0, #0
-		b	1b
+		b	2b
 ENDPROC(printch)
 
 #ifdef CONFIG_MMU
@@ -124,7 +133,9 @@
 ENDPROC(printascii)
 
 ENTRY(printch)
-		adr	r1, hexbuf
+		adr	r1, hexbuf_rel
+		ldr	r2, [r1]
+		add	r1, r1, r2
 		strb	r0, [r1]
 		mov	r0, #0x03		@ SYS_WRITEC
 	ARM(	svc	#0x123456	)
diff --git a/arch/arm/kernel/early_printk.c b/arch/arm/kernel/early_printk.c
index 4307653..9257736 100644
--- a/arch/arm/kernel/early_printk.c
+++ b/arch/arm/kernel/early_printk.c
@@ -11,16 +11,20 @@
 #include <linux/kernel.h>
 #include <linux/console.h>
 #include <linux/init.h>
+#include <linux/string.h>
 
-extern void printch(int);
+extern void printascii(const char *);
 
 static void early_write(const char *s, unsigned n)
 {
-	while (n-- > 0) {
-		if (*s == '\n')
-			printch('\r');
-		printch(*s);
-		s++;
+	char buf[128];
+	while (n) {
+		unsigned l = min(n, sizeof(buf)-1);
+		memcpy(buf, s, l);
+		buf[l] = 0;
+		s += l;
+		n -= l;
+		printascii(buf);
 	}
 }
 
diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c
index 846dda2..1824229 100644
--- a/arch/arm/kernel/elf.c
+++ b/arch/arm/kernel/elf.c
@@ -4,6 +4,7 @@
 #include <linux/personality.h>
 #include <linux/binfmts.h>
 #include <linux/elf.h>
+#include <linux/elf-fdpic.h>
 #include <asm/system_info.h>
 
 int elf_check_arch(const struct elf32_hdr *x)
@@ -81,7 +82,7 @@
  *  - the binary requires an executable stack
  *  - we're running on a CPU which doesn't support NX.
  */
-int arm_elf_read_implies_exec(const struct elf32_hdr *x, int executable_stack)
+int arm_elf_read_implies_exec(int executable_stack)
 {
 	if (executable_stack != EXSTACK_DISABLE_X)
 		return 1;
@@ -90,3 +91,24 @@
 	return 0;
 }
 EXPORT_SYMBOL(arm_elf_read_implies_exec);
+
+#if defined(CONFIG_MMU) && defined(CONFIG_BINFMT_ELF_FDPIC)
+
+void elf_fdpic_arch_lay_out_mm(struct elf_fdpic_params *exec_params,
+			       struct elf_fdpic_params *interp_params,
+			       unsigned long *start_stack,
+			       unsigned long *start_brk)
+{
+	elf_set_personality(&exec_params->hdr);
+
+	exec_params->load_addr = 0x8000;
+	interp_params->load_addr = ELF_ET_DYN_BASE;
+	*start_stack = TASK_SIZE - SZ_16M;
+
+	if ((exec_params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) == ELF_FDPIC_FLAG_INDEPENDENT) {
+		exec_params->flags &= ~ELF_FDPIC_FLAG_ARRANGEMENT;
+		exec_params->flags |= ELF_FDPIC_FLAG_CONSTDISP;
+	}
+}
+
+#endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 99c9082..e655dcd 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -400,17 +400,8 @@
  * offset, we return EINVAL.
  */
 sys_mmap2:
-#if PAGE_SHIFT > 12
-		tst	r5, #PGOFF_MASK
-		moveq	r5, r5, lsr #PAGE_SHIFT - 12
-		streq	r5, [sp, #4]
-		beq	sys_mmap_pgoff
-		mov	r0, #-EINVAL
-		ret	lr
-#else
 		str	r5, [sp, #4]
 		b	sys_mmap_pgoff
-#endif
 ENDPROC(sys_mmap2)
 
 #ifdef CONFIG_OABI_COMPAT
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 8733012..21dde77 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -79,47 +79,69 @@
  */
 	__INIT
 __mmap_switched:
-	adr	r3, __mmap_switched_data
 
-	ldmia	r3!, {r4, r5, r6, r7}
-	cmp	r4, r5				@ Copy data segment if needed
-1:	cmpne	r5, r6
-	ldrne	fp, [r4], #4
-	strne	fp, [r5], #4
-	bne	1b
+	mov	r7, r1
+	mov	r8, r2
+	mov	r10, r0
 
-	mov	fp, #0				@ Clear BSS (and zero fp)
-1:	cmp	r6, r7
-	strcc	fp, [r6],#4
-	bcc	1b
+	adr	r4, __mmap_switched_data
+	mov	fp, #0
 
- ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
- THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
- THUMB(	ldr	sp, [r3, #16]		)
-	str	r9, [r4]			@ Save processor ID
-	str	r1, [r5]			@ Save machine type
-	str	r2, [r6]			@ Save atags pointer
-	cmp	r7, #0
-	strne	r0, [r7]			@ Save control register values
+#if defined(CONFIG_XIP_DEFLATED_DATA)
+   ARM(	ldr	sp, [r4], #4 )
+ THUMB(	ldr	sp, [r4] )
+ THUMB(	add	r4, #4 )
+	bl	__inflate_kernel_data		@ decompress .data to RAM
+	teq	r0, #0
+	bne	__error
+#elif defined(CONFIG_XIP_KERNEL)
+   ARM(	ldmia	r4!, {r0, r1, r2, sp} )
+ THUMB(	ldmia	r4!, {r0, r1, r2, r3} )
+ THUMB(	mov	sp, r3 )
+	sub	r2, r2, r1
+	bl	memcpy				@ copy .data to RAM
+#endif
+
+   ARM(	ldmia	r4!, {r0, r1, sp} )
+ THUMB(	ldmia	r4!, {r0, r1, r3} )
+ THUMB(	mov	sp, r3 )
+	sub	r1, r1, r0
+	bl	__memzero			@ clear .bss
+
+	ldmia	r4, {r0, r1, r2, r3}
+	str	r9, [r0]			@ Save processor ID
+	str	r7, [r1]			@ Save machine type
+	str	r8, [r2]			@ Save atags pointer
+	cmp	r3, #0
+	strne	r10, [r3]			@ Save control register values
+	mov	lr, #0
 	b	start_kernel
 ENDPROC(__mmap_switched)
 
 	.align	2
 	.type	__mmap_switched_data, %object
 __mmap_switched_data:
-	.long	__data_loc			@ r4
-	.long	_sdata				@ r5
-	.long	__bss_start			@ r6
-	.long	_end				@ r7
-	.long	processor_id			@ r4
-	.long	__machine_arch_type		@ r5
-	.long	__atags_pointer			@ r6
-#ifdef CONFIG_CPU_CP15
-	.long	cr_alignment			@ r7
-#else
-	.long	0				@ r7
+#ifdef CONFIG_XIP_KERNEL
+#ifndef CONFIG_XIP_DEFLATED_DATA
+	.long	_sdata				@ r0
+	.long	__data_loc			@ r1
+	.long	_edata_loc			@ r2
 #endif
+	.long	__bss_stop			@ sp (temporary stack in .bss)
+#endif
+
+	.long	__bss_start			@ r0
+	.long	__bss_stop			@ r1
 	.long	init_thread_union + THREAD_START_SP @ sp
+
+	.long	processor_id			@ r0
+	.long	__machine_arch_type		@ r1
+	.long	__atags_pointer			@ r2
+#ifdef CONFIG_CPU_CP15
+	.long	cr_alignment			@ r3
+#else
+	.long	0				@ r3
+#endif
 	.size	__mmap_switched_data, . - __mmap_switched_data
 
 /*
diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c
new file mode 100644
index 0000000..6dd0ce5
--- /dev/null
+++ b/arch/arm/kernel/head-inflate-data.c
@@ -0,0 +1,62 @@
+/*
+ * XIP kernel .data segment decompressor
+ *
+ * Created by:	Nicolas Pitre, August 2017
+ * Copyright:	(C) 2017  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/zutil.h>
+
+/* for struct inflate_state */
+#include "../../../lib/zlib_inflate/inftrees.h"
+#include "../../../lib/zlib_inflate/inflate.h"
+#include "../../../lib/zlib_inflate/infutil.h"
+
+extern char __data_loc[];
+extern char _edata_loc[];
+extern char _sdata[];
+
+/*
+ * This code is called very early during the boot process to decompress
+ * the .data segment stored compressed in ROM. Therefore none of the global
+ * variables are valid yet, hence no kernel services such as memory
+ * allocation is available. Everything must be allocated on the stack and
+ * we must avoid any global data access. We use a temporary stack located
+ * in the .bss area. The linker script makes sure the .bss is big enough
+ * to hold our stack frame plus some room for called functions.
+ *
+ * We mimic the code in lib/decompress_inflate.c to use the smallest work
+ * area possible. And because everything is statically allocated on the
+ * stack then there is no need to clean up before returning.
+ */
+
+int __init __inflate_kernel_data(void)
+{
+	struct z_stream_s stream, *strm = &stream;
+	struct inflate_state state;
+	char *in = __data_loc;
+	int rc;
+
+	/* Check and skip gzip header (assume no filename) */
+	if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3)
+		return -1;
+	in += 10;
+
+	strm->workspace = &state;
+	strm->next_in = in;
+	strm->avail_in = _edata_loc - __data_loc;  /* upper bound */
+	strm->next_out = _sdata;
+	strm->avail_out = _edata_loc - __data_loc;
+	zlib_inflateInit2(strm, -MAX_WBITS);
+	WS(strm)->inflate_state.wsize = 0;
+	WS(strm)->inflate_state.window = NULL;
+	rc = zlib_inflate(strm, Z_FINISH);
+	if (rc == Z_OK || rc == Z_STREAM_END)
+		rc = strm->avail_out;  /* should be 0 */
+	return rc;
+}
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 2e21e08..2e38f85 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -13,6 +13,7 @@
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <linux/errno.h>
 
 #include <asm/assembler.h>
 #include <asm/ptrace.h>
@@ -110,8 +111,8 @@
 
 #ifdef CONFIG_ARM_MPU
 	/* Use MPU region info supplied by __cpu_up */
-	ldr	r6, [r7]			@ get secondary_data.mpu_szr
-	bl      __setup_mpu			@ Initialize the MPU
+	ldr	r6, [r7]			@ get secondary_data.mpu_rgn_info
+	bl      __secondary_setup_mpu		@ Initialize the MPU
 #endif
 
 	badr	lr, 1f				@ return (PIC) address
@@ -175,19 +176,33 @@
 #ifdef CONFIG_ARM_MPU
 
 
+#ifndef CONFIG_CPU_V7M
 /* Set which MPU region should be programmed */
-.macro set_region_nr tmp, rgnr
+.macro set_region_nr tmp, rgnr, unused
 	mov	\tmp, \rgnr			@ Use static region numbers
 	mcr	p15, 0, \tmp, c6, c2, 0		@ Write RGNR
 .endm
 
 /* Setup a single MPU region, either D or I side (D-side for unified) */
-.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE
+.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE, unused
 	mcr	p15, 0, \bar, c6, c1, (0 + \side)	@ I/DRBAR
 	mcr	p15, 0, \acr, c6, c1, (4 + \side)	@ I/DRACR
 	mcr	p15, 0, \sr, c6, c1, (2 + \side)		@ I/DRSR
 .endm
+#else
+.macro set_region_nr tmp, rgnr, base
+	mov	\tmp, \rgnr
+	str     \tmp, [\base, #MPU_RNR]
+.endm
 
+.macro setup_region bar, acr, sr, unused, base
+	lsl     \acr, \acr, #16
+	orr     \acr, \acr, \sr
+	str     \bar, [\base, #MPU_RBAR]
+	str     \acr, [\base, #MPU_RASR]
+.endm
+
+#endif
 /*
  * Setup the MPU and initial MPU Regions. We create the following regions:
  * Region 0: Use this for probing the MPU details, so leave disabled.
@@ -201,64 +216,137 @@
 ENTRY(__setup_mpu)
 
 	/* Probe for v7 PMSA compliance */
-	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
+M_CLASS(movw	r12, #:lower16:BASEADDR_V7M_SCB)
+M_CLASS(movt	r12, #:upper16:BASEADDR_V7M_SCB)
+
+AR_CLASS(mrc	p15, 0, r0, c0, c1, 4)		@ Read ID_MMFR0
+M_CLASS(ldr	r0, [r12, 0x50])
 	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
 	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
-	bne	__error_p			@ Fail: ARM_MPU on NOT v7 PMSA
+	bxne	lr
 
 	/* Determine whether the D/I-side memory map is unified. We set the
 	 * flags here and continue to use them for the rest of this function */
-	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
+AR_CLASS(mrc	p15, 0, r0, c0, c0, 4)		@ MPUIR
+M_CLASS(ldr    r0, [r12, #MPU_TYPE])
 	ands	r5, r0, #MPUIR_DREGION_SZMASK	@ 0 size d region => No MPU
-	beq	__error_p			@ Fail: ARM_MPU and no MPU
+	bxeq	lr
 	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
 
 	/* Setup second region first to free up r6 */
-	set_region_nr r0, #MPU_RAM_REGION
+	set_region_nr r0, #MPU_RAM_REGION, r12
 	isb
 	/* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
 	ldr	r0, =PLAT_PHYS_OFFSET		@ RAM starts at PHYS_OFFSET
 	ldr	r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL)
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE	@ PHYS_OFFSET, shared, enabled
-	beq	1f				@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled
+	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ PHYS_OFFSET, shared, enabled
+	beq	1f					@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE, r12	@ PHYS_OFFSET, shared, enabled
 1:	isb
 
 	/* First/background region */
-	set_region_nr r0, #MPU_BG_REGION
+	set_region_nr r0, #MPU_BG_REGION, r12
 	isb
 	/* Execute Never,  strongly ordered, inaccessible to PL0, rw PL1  */
 	mov	r0, #0				@ BG region starts at 0x0
 	ldr	r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA)
 	mov	r6, #MPU_RSR_ALL_MEM		@ 4GB region, enabled
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE	@ 0x0, BG region, enabled
-	beq	2f				@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled
+	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ 0x0, BG region, enabled
+	beq	2f					@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE r12	@ 0x0, BG region, enabled
 2:	isb
 
-	/* Vectors region */
-	set_region_nr r0, #MPU_VECTORS_REGION
+#ifdef CONFIG_XIP_KERNEL
+	set_region_nr r0, #MPU_ROM_REGION, r12
 	isb
-	/* Shared, inaccessible to PL0, rw PL1 */
-	mov	r0, #CONFIG_VECTORS_BASE	@ Cover from VECTORS_BASE
-	ldr	r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL)
-	/* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */
-	mov	r6, #(((2 * PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN)
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE	@ VECTORS_BASE, PL0 NA, enabled
-	beq	3f				@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE	@ VECTORS_BASE, PL0 NA, enabled
+	ldr	r5,=(MPU_AP_PL1RO_PL0NA | MPU_RGN_NORMAL)
+
+	ldr	r0, =CONFIG_XIP_PHYS_ADDR		@ ROM start
+	ldr     r6, =(_exiprom)				@ ROM end
+	sub	r6, r6, r0				@ Minimum size of region to map
+	clz	r6, r6					@ Region size must be 2^N...
+	rsb	r6, r6, #31				@ ...so round up region size
+	lsl	r6, r6, #MPU_RSR_SZ			@ Put size in right field
+	orr	r6, r6, #(1 << MPU_RSR_EN)		@ Set region enabled bit
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
+	beq	3f					@ Memory-map not unified
+	setup_region r0, r5, r6, MPU_INSTR_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
 3:	isb
+#endif
+
+	/* Enable the MPU */
+AR_CLASS(mrc	p15, 0, r0, c1, c0, 0)		@ Read SCTLR
+AR_CLASS(bic	r0, r0, #CR_BR)			@ Disable the 'default mem-map'
+AR_CLASS(orr	r0, r0, #CR_M)			@ Set SCTRL.M (MPU on)
+AR_CLASS(mcr	p15, 0, r0, c1, c0, 0)		@ Enable MPU
+
+M_CLASS(ldr	r0, [r12, #MPU_CTRL])
+M_CLASS(bic	r0, #MPU_CTRL_PRIVDEFENA)
+M_CLASS(orr	r0, #MPU_CTRL_ENABLE)
+M_CLASS(str	r0, [r12, #MPU_CTRL])
+	isb
+
+	ret	lr
+ENDPROC(__setup_mpu)
+
+#ifdef CONFIG_SMP
+/*
+ * r6: pointer at mpu_rgn_info
+ */
+
+ENTRY(__secondary_setup_mpu)
+	/* Probe for v7 PMSA compliance */
+	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
+	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
+	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
+	bne	__error_p
+
+	/* Determine whether the D/I-side memory map is unified. We set the
+	 * flags here and continue to use them for the rest of this function */
+	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
+	ands	r5, r0, #MPUIR_DREGION_SZMASK	@ 0 size d region => No MPU
+	beq	__error_p
+
+	ldr	r4, [r6, #MPU_RNG_INFO_USED]
+	mov	r5, #MPU_RNG_SIZE
+	add	r3, r6, #MPU_RNG_INFO_RNGS
+	mla	r3, r4, r5, r3
+
+1:
+	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
+	sub	r3, r3, #MPU_RNG_SIZE
+	sub	r4, r4, #1
+
+	set_region_nr r0, r4
+	isb
+
+	ldr	r0, [r3, #MPU_RGN_DRBAR]
+	ldr	r6, [r3, #MPU_RGN_DRSR]
+	ldr	r5, [r3, #MPU_RGN_DRACR]
+
+	setup_region r0, r5, r6, MPU_DATA_SIDE
+	beq	2f
+	setup_region r0, r5, r6, MPU_INSTR_SIDE
+2:	isb
+
+	mrc	p15, 0, r0, c0, c0, 4		@ Reevaluate the MPUIR
+	cmp	r4, #0
+	bgt	1b
 
 	/* Enable the MPU */
 	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
-	bic     r0, r0, #CR_BR			@ Disable the 'default mem-map'
+	bic	r0, r0, #CR_BR			@ Disable the 'default mem-map'
 	orr	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
 	mcr	p15, 0, r0, c1, c0, 0		@ Enable MPU
 	isb
+
 	ret	lr
-ENDPROC(__setup_mpu)
-#endif
+ENDPROC(__secondary_setup_mpu)
+
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_ARM_MPU */
 #include "head-common.S"
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 8e9a3e4..fc40a2b 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1069,6 +1069,16 @@
 	mdesc = setup_machine_fdt(__atags_pointer);
 	if (!mdesc)
 		mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type);
+	if (!mdesc) {
+		early_print("\nError: invalid dtb and unrecognized/unsupported machine ID\n");
+		early_print("  r1=0x%08x, r2=0x%08x\n", __machine_arch_type,
+			    __atags_pointer);
+		if (__atags_pointer)
+			early_print("  r2[]=%*ph\n", 16,
+				    phys_to_virt(__atags_pointer));
+		dump_machine_table();
+	}
+
 	machine_desc = mdesc;
 	machine_name = mdesc->name;
 	dump_stack_set_arch_desc("%s", mdesc->name);
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index b67ae12..bd8810d 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -19,11 +19,12 @@
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
-#include <asm/ucontext.h>
 #include <asm/unistd.h>
 #include <asm/vfp.h>
 
-extern const unsigned long sigreturn_codes[7];
+#include "signal.h"
+
+extern const unsigned long sigreturn_codes[17];
 
 static unsigned long signal_return_offset;
 
@@ -172,15 +173,6 @@
 /*
  * Do a signal return; undo the signal stack.  These are aligned to 64-bit.
  */
-struct sigframe {
-	struct ucontext uc;
-	unsigned long retcode[2];
-};
-
-struct rt_sigframe {
-	struct siginfo info;
-	struct sigframe sig;
-};
 
 static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf)
 {
@@ -366,9 +358,20 @@
 	     unsigned long __user *rc, void __user *frame)
 {
 	unsigned long handler = (unsigned long)ksig->ka.sa.sa_handler;
+	unsigned long handler_fdpic_GOT = 0;
 	unsigned long retcode;
-	int thumb = 0;
+	unsigned int idx, thumb = 0;
 	unsigned long cpsr = regs->ARM_cpsr & ~(PSR_f | PSR_E_BIT);
+	bool fdpic = IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) &&
+		     (current->personality & FDPIC_FUNCPTRS);
+
+	if (fdpic) {
+		unsigned long __user *fdpic_func_desc =
+					(unsigned long __user *)handler;
+		if (__get_user(handler, &fdpic_func_desc[0]) ||
+		    __get_user(handler_fdpic_GOT, &fdpic_func_desc[1]))
+			return 1;
+	}
 
 	cpsr |= PSR_ENDSTATE;
 
@@ -408,9 +411,26 @@
 
 	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
 		retcode = (unsigned long)ksig->ka.sa.sa_restorer;
+		if (fdpic) {
+			/*
+			 * We need code to load the function descriptor.
+			 * That code follows the standard sigreturn code
+			 * (6 words), and is made of 3 + 2 words for each
+			 * variant. The 4th copied word is the actual FD
+			 * address that the assembly code expects.
+			 */
+			idx = 6 + thumb * 3;
+			if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+				idx += 5;
+			if (__put_user(sigreturn_codes[idx],   rc  ) ||
+			    __put_user(sigreturn_codes[idx+1], rc+1) ||
+			    __put_user(sigreturn_codes[idx+2], rc+2) ||
+			    __put_user(retcode,                rc+3))
+				return 1;
+			goto rc_finish;
+		}
 	} else {
-		unsigned int idx = thumb << 1;
-
+		idx = thumb << 1;
 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
 			idx += 3;
 
@@ -422,6 +442,7 @@
 		    __put_user(sigreturn_codes[idx+1], rc+1))
 			return 1;
 
+rc_finish:
 #ifdef CONFIG_MMU
 		if (cpsr & MODE32_BIT) {
 			struct mm_struct *mm = current->mm;
@@ -441,7 +462,7 @@
 			 * the return code written onto the stack.
 			 */
 			flush_icache_range((unsigned long)rc,
-					   (unsigned long)(rc + 2));
+					   (unsigned long)(rc + 3));
 
 			retcode = ((unsigned long)rc) + thumb;
 		}
@@ -451,6 +472,8 @@
 	regs->ARM_sp = (unsigned long)frame;
 	regs->ARM_lr = retcode;
 	regs->ARM_pc = handler;
+	if (fdpic)
+		regs->ARM_r9 = handler_fdpic_GOT;
 	regs->ARM_cpsr = cpsr;
 
 	return 0;
diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h
new file mode 100644
index 0000000..b7b838b
--- /dev/null
+++ b/arch/arm/kernel/signal.h
@@ -0,0 +1,11 @@
+#include <asm/ucontext.h>
+
+struct sigframe {
+	struct ucontext uc;
+	unsigned long retcode[4];
+};
+
+struct rt_sigframe {
+	struct siginfo info;
+	struct sigframe sig;
+};
diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S
index b84d0cb..2c7b22e 100644
--- a/arch/arm/kernel/sigreturn_codes.S
+++ b/arch/arm/kernel/sigreturn_codes.S
@@ -14,6 +14,8 @@
  * GNU General Public License for more details.
  */
 
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
 #include <asm/unistd.h>
 
 /*
@@ -51,6 +53,17 @@
 	.thumb
 	.endm
 
+	.macro arm_fdpic_slot n
+	.org	sigreturn_codes + 24 + 20 * (\n)
+ARM_OK(	.arm	)
+	.endm
+
+	.macro thumb_fdpic_slot n
+	.org	sigreturn_codes + 24 + 20 * (\n) + 12
+	.thumb
+	.endm
+
+
 #if __LINUX_ARM_ARCH__ <= 4
 	/*
 	 * Note we manually set minimally required arch that supports
@@ -90,13 +103,46 @@
 	movs	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)
 	swi	#0
 
+	/* ARM sigreturn restorer FDPIC bounce code snippet */
+	arm_fdpic_slot 0
+ARM_OK(	ldr	r3, [sp, #SIGFRAME_RC3_OFFSET] )
+ARM_OK(	ldmia	r3, {r3, r9} )
+#ifdef CONFIG_ARM_THUMB
+ARM_OK(	bx	r3 )
+#else
+ARM_OK(	ret	r3 )
+#endif
+
+	/* Thumb sigreturn restorer FDPIC bounce code snippet */
+	thumb_fdpic_slot 0
+	ldr	r3, [sp, #SIGFRAME_RC3_OFFSET]
+	ldmia	r3, {r2, r3}
+	mov	r9, r3
+	bx	r2
+
+	/* ARM sigreturn_rt restorer FDPIC bounce code snippet */
+	arm_fdpic_slot 1
+ARM_OK(	ldr	r3, [sp, #RT_SIGFRAME_RC3_OFFSET] )
+ARM_OK(	ldmia	r3, {r3, r9} )
+#ifdef CONFIG_ARM_THUMB
+ARM_OK(	bx	r3 )
+#else
+ARM_OK(	ret	r3 )
+#endif
+
+	/* Thumb sigreturn_rt restorer FDPIC bounce code snippet */
+	thumb_fdpic_slot 1
+	ldr	r3, [sp, #RT_SIGFRAME_RC3_OFFSET]
+	ldmia	r3, {r2, r3}
+	mov	r9, r3
+	bx	r2
+
 	/*
-	 * Note on addtional space: setup_return in signal.c
-	 * algorithm uses two words copy regardless whether
-	 * it is thumb case or not, so we need additional
-	 * word after real last entry.
+	 * Note on additional space: setup_return in signal.c
+	 * always copies the same number of words regardless whether
+	 * it is thumb case or not, so we need one additional padding
+	 * word after the last entry.
 	 */
-	arm_slot 2
 	.space	4
 
 	.size	sigreturn_codes, . - sigreturn_codes
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index c9a0a52..b4fbf00 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -114,7 +114,7 @@
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
 #ifdef CONFIG_ARM_MPU
-	secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr;
+	secondary_data.mpu_rgn_info = &mpu_rgn_info;
 #endif
 
 #ifdef CONFIG_MMU
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 0fcd82f..5cf0488 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -655,6 +655,9 @@
 		set_tls(regs->ARM_r0);
 		return 0;
 
+	case NR(get_tls):
+		return current_thread_info()->tp_value[0];
+
 	default:
 		/* Calls 9f00xx..9f07ff are defined to return -ENOSYS
 		   if not implemented, rather than raising SIGILL.  This
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 0951df9..ec4b3f9 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -7,6 +7,8 @@
 /* No __ro_after_init data in the .rodata section - which will always be ro */
 #define RO_AFTER_INIT_DATA
 
+#include <linux/sizes.h>
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/cache.h>
 #include <asm/thread_info.h>
@@ -78,9 +80,7 @@
 		*(.text.fixup)
 		*(__ex_table)
 #endif
-#ifndef CONFIG_SMP_ON_UP
 		*(.alt.smp.init)
-#endif
 		*(.discard)
 		*(.discard.*)
 	}
@@ -182,19 +182,7 @@
 		*(.taglist.init)
 		__tagtable_end = .;
 	}
-#ifdef CONFIG_SMP_ON_UP
-	.init.smpalt : {
-		__smpalt_begin = .;
-		*(.alt.smp.init)
-		__smpalt_end = .;
-	}
-#endif
-	.init.pv_table : {
-		__pv_table_begin = .;
-		*(.pv_table)
-		__pv_table_end = .;
-	}
-	.init.data : {
+	.init.rodata : {
 		INIT_SETUP(16)
 		INIT_CALLS
 		CON_INITCALL
@@ -202,48 +190,49 @@
 		INIT_RAM_FS
 	}
 
+#ifdef CONFIG_ARM_MPU
+	. = ALIGN(SZ_128K);
+#endif
+	_exiprom = .;			/* End of XIP ROM area */
+
+/*
+ * From this point, stuff is considered writable and will be copied to RAM
+ */
+	__data_loc = ALIGN(4);		/* location in file */
+	. = PAGE_OFFSET + TEXT_OFFSET;	/* location in memory */
+#undef LOAD_OFFSET
+#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc)
+
+	. = ALIGN(THREAD_SIZE);
+	_sdata = .;
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	.data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
+		*(.data..ro_after_init)
+	}
+	_edata = .;
+
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = .;
+	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+		INIT_DATA
+	}
+	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+		ARM_EXIT_KEEP(EXIT_DATA)
+	}
 #ifdef CONFIG_SMP
 	PERCPU_SECTION(L1_CACHE_BYTES)
 #endif
 
-	_exiprom = .;			/* End of XIP ROM area */
-	__data_loc = ALIGN(4);		/* location in binary */
-	. = PAGE_OFFSET + TEXT_OFFSET;
+	/*
+	 * End of copied data. We need a dummy section to get its LMA.
+	 * Also located before final ALIGN() as trailing padding is not stored
+	 * in the resulting binary file and useless to copy.
+	 */
+	.data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
+	_edata_loc = LOADADDR(.data.endmark);
 
-	.data : AT(__data_loc) {
-		_data = .;		/* address in memory */
-		_sdata = .;
-
-		/*
-		 * first, the init task union, aligned
-		 * to an 8192 byte boundary.
-		 */
-		INIT_TASK_DATA(THREAD_SIZE)
-
-		. = ALIGN(PAGE_SIZE);
-		__init_begin = .;
-		INIT_DATA
-		ARM_EXIT_KEEP(EXIT_DATA)
-		. = ALIGN(PAGE_SIZE);
-		__init_end = .;
-
-		*(.data..ro_after_init)
-
-		NOSAVE_DATA
-		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
-		READ_MOSTLY_DATA(L1_CACHE_BYTES)
-
-		/*
-		 * and the usual data section
-		 */
-		DATA_DATA
-		CONSTRUCTORS
-
-		_edata = .;
-	}
-	_edata_loc = __data_loc + SIZEOF(.data);
-
-	BUG_TABLE
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
 
 #ifdef CONFIG_HAVE_TCM
         /*
@@ -302,7 +291,7 @@
 	}
 #endif
 
-	BSS_SECTION(0, 0, 0)
+	BSS_SECTION(0, 0, 8)
 	_end = .;
 
 	STABS_DEBUG
@@ -323,3 +312,29 @@
  */
 ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
 	"HYP init code too big or misaligned")
+
+#ifdef CONFIG_XIP_DEFLATED_DATA
+/*
+ * The .bss is used as a stack area for __inflate_kernel_data() whose stack
+ * frame is 9568 bytes. Make sure it has extra room left.
+ */
+ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
+#endif
+
+#ifdef CONFIG_ARM_MPU
+/*
+ * Due to PMSAv7 restriction on base address and size we have to
+ * enforce minimal alignment restrictions. It was seen that weaker
+ * alignment restriction on _xiprom will likely force XIP address
+ * space spawns multiple MPU regions thus it is likely we run in
+ * situation when we are reprogramming MPU region we run on with
+ * something which doesn't cover reprogramming code itself, so as soon
+ * as we update MPU settings we'd immediately try to execute straight
+ * from background region which is XN.
+ * It seem that alignment in 1M should suit most users.
+ * _exiprom is aligned as 1/8 of 1M so can be covered by subregion
+ * disable
+ */
+ASSERT(!(_xiprom & (SZ_1M - 1)), "XIP start address may cause MPU programming issues")
+ASSERT(!(_exiprom & (SZ_128K - 1)), "XIP end address may cause MPU programming issues")
+#endif
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 1845a5a..ee53f65 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -215,14 +215,9 @@
 		*(.pv_table)
 		__pv_table_end = .;
 	}
-	.init.data : {
-		INIT_DATA
-		INIT_SETUP(16)
-		INIT_CALLS
-		CON_INITCALL
-		SECURITY_INITCALL
-		INIT_RAM_FS
-	}
+
+	INIT_DATA_SECTION(16)
+
 	.exit.data : {
 		ARM_EXIT_KEEP(EXIT_DATA)
 	}
@@ -237,33 +232,10 @@
 	. = ALIGN(THREAD_SIZE);
 #endif
 	__init_end = .;
-	__data_loc = .;
 
-	.data : AT(__data_loc) {
-		_data = .;		/* address in memory */
-		_sdata = .;
-
-		/*
-		 * first, the init task union, aligned
-		 * to an 8192 byte boundary.
-		 */
-		INIT_TASK_DATA(THREAD_SIZE)
-
-		NOSAVE_DATA
-		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
-		READ_MOSTLY_DATA(L1_CACHE_BYTES)
-
-		/*
-		 * and the usual data section
-		 */
-		DATA_DATA
-		CONSTRUCTORS
-
-		_edata = .;
-	}
-	_edata_loc = __data_loc + SIZEOF(.data);
-
-	BUG_TABLE
+	_sdata = .;
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	_edata = .;
 
 #ifdef CONFIG_HAVE_TCM
         /*
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index f353ee5..01bcc33 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -10,6 +10,7 @@
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
+obj-$(CONFIG_ARM_MPU)		+= pmsa-v7.o
 endif
 
 obj-$(CONFIG_ARM_PTDUMP)	+= dump.o
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index fcf1473..ada8eb2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -382,9 +382,9 @@
 }
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE	SZ_256K
-static struct gen_pool *atomic_pool;
+static struct gen_pool *atomic_pool __ro_after_init;
 
-static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE;
+static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
 
 static int __init early_coherent_pool(char *p)
 {
@@ -393,21 +393,6 @@
 }
 early_param("coherent_pool", early_coherent_pool);
 
-void __init init_dma_coherent_pool_size(unsigned long size)
-{
-	/*
-	 * Catch any attempt to set the pool size too late.
-	 */
-	BUG_ON(atomic_pool);
-
-	/*
-	 * Set architecture specific coherent pool size only if
-	 * it has not been changed by kernel command line parameter.
-	 */
-	if (atomic_pool_size == DEFAULT_DMA_COHERENT_POOL_SIZE)
-		atomic_pool_size = size;
-}
-
 /*
  * Initialise the coherent pool for atomic allocations.
  */
@@ -443,7 +428,7 @@
 
 		gen_pool_set_algo(atomic_pool,
 				gen_pool_first_fit_order_align,
-				(void *)PAGE_SHIFT);
+				NULL);
 		pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n",
 		       atomic_pool_size / 1024);
 		return 0;
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index ad80548..81d4482 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -580,16 +580,6 @@
 	BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET);
 	BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE	> PAGE_OFFSET);
 #endif
-
-	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
-		extern int sysctl_overcommit_memory;
-		/*
-		 * On a machine this small we won't get
-		 * anywhere without overcommit, so turn
-		 * it on by default.
-		 */
-		sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
-	}
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 91537d9..e437081 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -27,259 +27,7 @@
 
 #ifdef CONFIG_ARM_MPU
 struct mpu_rgn_info mpu_rgn_info;
-
-/* Region number */
-static void rgnr_write(u32 v)
-{
-	asm("mcr        p15, 0, %0, c6, c2, 0" : : "r" (v));
-}
-
-/* Data-side / unified region attributes */
-
-/* Region access control register */
-static void dracr_write(u32 v)
-{
-	asm("mcr        p15, 0, %0, c6, c1, 4" : : "r" (v));
-}
-
-/* Region size register */
-static void drsr_write(u32 v)
-{
-	asm("mcr        p15, 0, %0, c6, c1, 2" : : "r" (v));
-}
-
-/* Region base address register */
-static void drbar_write(u32 v)
-{
-	asm("mcr        p15, 0, %0, c6, c1, 0" : : "r" (v));
-}
-
-static u32 drbar_read(void)
-{
-	u32 v;
-	asm("mrc        p15, 0, %0, c6, c1, 0" : "=r" (v));
-	return v;
-}
-/* Optional instruction-side region attributes */
-
-/* I-side Region access control register */
-static void iracr_write(u32 v)
-{
-	asm("mcr        p15, 0, %0, c6, c1, 5" : : "r" (v));
-}
-
-/* I-side Region size register */
-static void irsr_write(u32 v)
-{
-	asm("mcr        p15, 0, %0, c6, c1, 3" : : "r" (v));
-}
-
-/* I-side Region base address register */
-static void irbar_write(u32 v)
-{
-	asm("mcr        p15, 0, %0, c6, c1, 1" : : "r" (v));
-}
-
-static unsigned long irbar_read(void)
-{
-	unsigned long v;
-	asm("mrc        p15, 0, %0, c6, c1, 1" : "=r" (v));
-	return v;
-}
-
-/* MPU initialisation functions */
-void __init adjust_lowmem_bounds_mpu(void)
-{
-	phys_addr_t phys_offset = PHYS_OFFSET;
-	phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size;
-	struct memblock_region *reg;
-	bool first = true;
-	phys_addr_t mem_start;
-	phys_addr_t mem_end;
-
-	for_each_memblock(memory, reg) {
-		if (first) {
-			/*
-			 * Initially only use memory continuous from
-			 * PHYS_OFFSET */
-			if (reg->base != phys_offset)
-				panic("First memory bank must be contiguous from PHYS_OFFSET");
-
-			mem_start = reg->base;
-			mem_end = reg->base + reg->size;
-			specified_mem_size = reg->size;
-			first = false;
-		} else {
-			/*
-			 * memblock auto merges contiguous blocks, remove
-			 * all blocks afterwards in one go (we can't remove
-			 * blocks separately while iterating)
-			 */
-			pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
-				  &mem_end, &reg->base);
-			memblock_remove(reg->base, 0 - reg->base);
-			break;
-		}
-	}
-
-	/*
-	 * MPU has curious alignment requirements: Size must be power of 2, and
-	 * region start must be aligned to the region size
-	 */
-	if (phys_offset != 0)
-		pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n");
-
-	/*
-	 * Maximum aligned region might overflow phys_addr_t if phys_offset is
-	 * 0. Hence we keep everything below 4G until we take the smaller of
-	 * the aligned_region_size and rounded_mem_size, one of which is
-	 * guaranteed to be smaller than the maximum physical address.
-	 */
-	aligned_region_size = (phys_offset - 1) ^ (phys_offset);
-	/* Find the max power-of-two sized region that fits inside our bank */
-	rounded_mem_size = (1 <<  __fls(specified_mem_size)) - 1;
-
-	/* The actual region size is the smaller of the two */
-	aligned_region_size = aligned_region_size < rounded_mem_size
-				? aligned_region_size + 1
-				: rounded_mem_size + 1;
-
-	if (aligned_region_size != specified_mem_size) {
-		pr_warn("Truncating memory from %pa to %pa (MPU region constraints)",
-				&specified_mem_size, &aligned_region_size);
-		memblock_remove(mem_start + aligned_region_size,
-				specified_mem_size - aligned_region_size);
-
-		mem_end = mem_start + aligned_region_size;
-	}
-
-	pr_debug("MPU Region from %pa size %pa (end %pa))\n",
-		&phys_offset, &aligned_region_size, &mem_end);
-
-}
-
-static int mpu_present(void)
-{
-	return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7);
-}
-
-static int mpu_max_regions(void)
-{
-	/*
-	 * We don't support a different number of I/D side regions so if we
-	 * have separate instruction and data memory maps then return
-	 * whichever side has a smaller number of supported regions.
-	 */
-	u32 dregions, iregions, mpuir;
-	mpuir = read_cpuid(CPUID_MPUIR);
-
-	dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION;
-
-	/* Check for separate d-side and i-side memory maps */
-	if (mpuir & MPUIR_nU)
-		iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION;
-
-	/* Use the smallest of the two maxima */
-	return min(dregions, iregions);
-}
-
-static int mpu_iside_independent(void)
-{
-	/* MPUIR.nU specifies whether there is *not* a unified memory map */
-	return read_cpuid(CPUID_MPUIR) & MPUIR_nU;
-}
-
-static int mpu_min_region_order(void)
-{
-	u32 drbar_result, irbar_result;
-	/* We've kept a region free for this probing */
-	rgnr_write(MPU_PROBE_REGION);
-	isb();
-	/*
-	 * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum
-	 * region order
-	*/
-	drbar_write(0xFFFFFFFC);
-	drbar_result = irbar_result = drbar_read();
-	drbar_write(0x0);
-	/* If the MPU is non-unified, we use the larger of the two minima*/
-	if (mpu_iside_independent()) {
-		irbar_write(0xFFFFFFFC);
-		irbar_result = irbar_read();
-		irbar_write(0x0);
-	}
-	isb(); /* Ensure that MPU region operations have completed */
-	/* Return whichever result is larger */
-	return __ffs(max(drbar_result, irbar_result));
-}
-
-static int mpu_setup_region(unsigned int number, phys_addr_t start,
-			unsigned int size_order, unsigned int properties)
-{
-	u32 size_data;
-
-	/* We kept a region free for probing resolution of MPU regions*/
-	if (number > mpu_max_regions() || number == MPU_PROBE_REGION)
-		return -ENOENT;
-
-	if (size_order > 32)
-		return -ENOMEM;
-
-	if (size_order < mpu_min_region_order())
-		return -ENOMEM;
-
-	/* Writing N to bits 5:1 (RSR_SZ)  specifies region size 2^N+1 */
-	size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN;
-
-	dsb(); /* Ensure all previous data accesses occur with old mappings */
-	rgnr_write(number);
-	isb();
-	drbar_write(start);
-	dracr_write(properties);
-	isb(); /* Propagate properties before enabling region */
-	drsr_write(size_data);
-
-	/* Check for independent I-side registers */
-	if (mpu_iside_independent()) {
-		irbar_write(start);
-		iracr_write(properties);
-		isb();
-		irsr_write(size_data);
-	}
-	isb();
-
-	/* Store region info (we treat i/d side the same, so only store d) */
-	mpu_rgn_info.rgns[number].dracr = properties;
-	mpu_rgn_info.rgns[number].drbar = start;
-	mpu_rgn_info.rgns[number].drsr = size_data;
-	return 0;
-}
-
-/*
-* Set up default MPU regions, doing nothing if there is no MPU
-*/
-void __init mpu_setup(void)
-{
-	int region_err;
-	if (!mpu_present())
-		return;
-
-	region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET,
-					ilog2(memblock.memory.regions[0].size),
-					MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL);
-	if (region_err) {
-		panic("MPU region initialization failure! %d", region_err);
-	} else {
-		pr_info("Using ARMv7 PMSA Compliant MPU. "
-			 "Region independence: %s, Max regions: %d\n",
-			mpu_iside_independent() ? "Yes" : "No",
-			mpu_max_regions());
-	}
-}
-#else
-static void adjust_lowmem_bounds_mpu(void) {}
-static void __init mpu_setup(void) {}
-#endif /* CONFIG_ARM_MPU */
+#endif
 
 #ifdef CONFIG_CPU_CP15
 #ifdef CONFIG_CPU_HIGH_VECTOR
diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c
new file mode 100644
index 0000000..976df60
--- /dev/null
+++ b/arch/arm/mm/pmsa-v7.c
@@ -0,0 +1,484 @@
+/*
+ * Based on linux/arch/arm/mm/nommu.c
+ *
+ * ARM PMSAv7 supporting functions.
+ */
+
+#include <linux/bitops.h>
+#include <linux/memblock.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/mpu.h>
+#include <asm/sections.h>
+
+#include "mm.h"
+
+struct region {
+	phys_addr_t base;
+	phys_addr_t size;
+	unsigned long subreg;
+};
+
+static struct region __initdata mem[MPU_MAX_REGIONS];
+#ifdef CONFIG_XIP_KERNEL
+static struct region __initdata xip[MPU_MAX_REGIONS];
+#endif
+
+static unsigned int __initdata mpu_min_region_order;
+static unsigned int __initdata mpu_max_regions;
+
+static int __init __mpu_min_region_order(void);
+static int __init __mpu_max_regions(void);
+
+#ifndef CONFIG_CPU_V7M
+
+#define DRBAR	__ACCESS_CP15(c6, 0, c1, 0)
+#define IRBAR	__ACCESS_CP15(c6, 0, c1, 1)
+#define DRSR	__ACCESS_CP15(c6, 0, c1, 2)
+#define IRSR	__ACCESS_CP15(c6, 0, c1, 3)
+#define DRACR	__ACCESS_CP15(c6, 0, c1, 4)
+#define IRACR	__ACCESS_CP15(c6, 0, c1, 5)
+#define RNGNR	__ACCESS_CP15(c6, 0, c2, 0)
+
+/* Region number */
+static inline void rgnr_write(u32 v)
+{
+	write_sysreg(v, RNGNR);
+}
+
+/* Data-side / unified region attributes */
+
+/* Region access control register */
+static inline void dracr_write(u32 v)
+{
+	write_sysreg(v, DRACR);
+}
+
+/* Region size register */
+static inline void drsr_write(u32 v)
+{
+	write_sysreg(v, DRSR);
+}
+
+/* Region base address register */
+static inline void drbar_write(u32 v)
+{
+	write_sysreg(v, DRBAR);
+}
+
+static inline u32 drbar_read(void)
+{
+	return read_sysreg(DRBAR);
+}
+/* Optional instruction-side region attributes */
+
+/* I-side Region access control register */
+static inline void iracr_write(u32 v)
+{
+	write_sysreg(v, IRACR);
+}
+
+/* I-side Region size register */
+static inline void irsr_write(u32 v)
+{
+	write_sysreg(v, IRSR);
+}
+
+/* I-side Region base address register */
+static inline void irbar_write(u32 v)
+{
+	write_sysreg(v, IRBAR);
+}
+
+static inline u32 irbar_read(void)
+{
+	return read_sysreg(IRBAR);
+}
+
+#else
+
+static inline void rgnr_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + MPU_RNR);
+}
+
+/* Data-side / unified region attributes */
+
+/* Region access control register */
+static inline void dracr_write(u32 v)
+{
+	u32 rsr = readl_relaxed(BASEADDR_V7M_SCB + MPU_RASR) & GENMASK(15, 0);
+
+	writel_relaxed((v << 16) | rsr, BASEADDR_V7M_SCB + MPU_RASR);
+}
+
+/* Region size register */
+static inline void drsr_write(u32 v)
+{
+	u32 racr = readl_relaxed(BASEADDR_V7M_SCB + MPU_RASR) & GENMASK(31, 16);
+
+	writel_relaxed(v | racr, BASEADDR_V7M_SCB + MPU_RASR);
+}
+
+/* Region base address register */
+static inline void drbar_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + MPU_RBAR);
+}
+
+static inline u32 drbar_read(void)
+{
+	return readl_relaxed(BASEADDR_V7M_SCB + MPU_RBAR);
+}
+
+/* ARMv7-M only supports a unified MPU, so I-side operations are nop */
+
+static inline void iracr_write(u32 v) {}
+static inline void irsr_write(u32 v) {}
+static inline void irbar_write(u32 v) {}
+static inline unsigned long irbar_read(void) {return 0;}
+
+#endif
+
+static int __init mpu_present(void)
+{
+	return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7);
+}
+
+static bool __init try_split_region(phys_addr_t base, phys_addr_t size, struct region *region)
+{
+	unsigned long  subreg, bslots, sslots;
+	phys_addr_t abase = base & ~(size - 1);
+	phys_addr_t asize = base + size - abase;
+	phys_addr_t p2size = 1 << __fls(asize);
+	phys_addr_t bdiff, sdiff;
+
+	if (p2size != asize)
+		p2size *= 2;
+
+	bdiff = base - abase;
+	sdiff = p2size - asize;
+	subreg = p2size / MPU_NR_SUBREGS;
+
+	if ((bdiff % subreg) || (sdiff % subreg))
+		return false;
+
+	bslots = bdiff / subreg;
+	sslots = sdiff / subreg;
+
+	if (bslots || sslots) {
+		int i;
+
+		if (subreg < MPU_MIN_SUBREG_SIZE)
+			return false;
+
+		if (bslots + sslots > MPU_NR_SUBREGS)
+			return false;
+
+		for (i = 0; i < bslots; i++)
+			_set_bit(i, &region->subreg);
+
+		for (i = 1; i <= sslots; i++)
+			_set_bit(MPU_NR_SUBREGS - i, &region->subreg);
+	}
+
+	region->base = abase;
+	region->size = p2size;
+
+	return true;
+}
+
+static int __init allocate_region(phys_addr_t base, phys_addr_t size,
+				  unsigned int limit, struct region *regions)
+{
+	int count = 0;
+	phys_addr_t diff = size;
+	int attempts = MPU_MAX_REGIONS;
+
+	while (diff) {
+		/* Try cover region as is (maybe with help of subregions) */
+		if (try_split_region(base, size, &regions[count])) {
+			count++;
+			base += size;
+			diff -= size;
+			size = diff;
+		} else {
+			/*
+			 * Maximum aligned region might overflow phys_addr_t
+			 * if "base" is 0. Hence we keep everything below 4G
+			 * until we take the smaller of the aligned region
+			 * size ("asize") and rounded region size ("p2size"),
+			 * one of which is guaranteed to be smaller than the
+			 * maximum physical address.
+			 */
+			phys_addr_t asize = (base - 1) ^ base;
+			phys_addr_t p2size = (1 <<  __fls(diff)) - 1;
+
+			size = asize < p2size ? asize + 1 : p2size + 1;
+		}
+
+		if (count > limit)
+			break;
+
+		if (!attempts)
+			break;
+
+		attempts--;
+	}
+
+	return count;
+}
+
+/* MPU initialisation functions */
+void __init adjust_lowmem_bounds_mpu(void)
+{
+	phys_addr_t  specified_mem_size = 0, total_mem_size = 0;
+	struct memblock_region *reg;
+	bool first = true;
+	phys_addr_t mem_start;
+	phys_addr_t mem_end;
+	unsigned int mem_max_regions;
+	int num, i;
+
+	if (!mpu_present())
+		return;
+
+	/* Free-up MPU_PROBE_REGION */
+	mpu_min_region_order = __mpu_min_region_order();
+
+	/* How many regions are supported */
+	mpu_max_regions = __mpu_max_regions();
+
+	mem_max_regions = min((unsigned int)MPU_MAX_REGIONS, mpu_max_regions);
+
+	/* We need to keep one slot for background region */
+	mem_max_regions--;
+
+#ifndef CONFIG_CPU_V7M
+	/* ... and one for vectors */
+	mem_max_regions--;
+#endif
+
+#ifdef CONFIG_XIP_KERNEL
+	/* plus some regions to cover XIP ROM */
+	num = allocate_region(CONFIG_XIP_PHYS_ADDR, __pa(_exiprom) - CONFIG_XIP_PHYS_ADDR,
+			      mem_max_regions, xip);
+
+	mem_max_regions -= num;
+#endif
+
+	for_each_memblock(memory, reg) {
+		if (first) {
+			phys_addr_t phys_offset = PHYS_OFFSET;
+
+			/*
+			 * Initially only use memory continuous from
+			 * PHYS_OFFSET */
+			if (reg->base != phys_offset)
+				panic("First memory bank must be contiguous from PHYS_OFFSET");
+
+			mem_start = reg->base;
+			mem_end = reg->base + reg->size;
+			specified_mem_size = reg->size;
+			first = false;
+		} else {
+			/*
+			 * memblock auto merges contiguous blocks, remove
+			 * all blocks afterwards in one go (we can't remove
+			 * blocks separately while iterating)
+			 */
+			pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
+				  &mem_end, &reg->base);
+			memblock_remove(reg->base, 0 - reg->base);
+			break;
+		}
+	}
+
+	num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem);
+
+	for (i = 0; i < num; i++) {
+		unsigned long  subreg = mem[i].size / MPU_NR_SUBREGS;
+
+		total_mem_size += mem[i].size - subreg * hweight_long(mem[i].subreg);
+
+		pr_debug("MPU: base %pa size %pa disable subregions: %*pbl\n",
+			 &mem[i].base, &mem[i].size, MPU_NR_SUBREGS, &mem[i].subreg);
+	}
+
+	if (total_mem_size != specified_mem_size) {
+		pr_warn("Truncating memory from %pa to %pa (MPU region constraints)",
+				&specified_mem_size, &total_mem_size);
+		memblock_remove(mem_start + total_mem_size,
+				specified_mem_size - total_mem_size);
+	}
+}
+
+static int __init __mpu_max_regions(void)
+{
+	/*
+	 * We don't support a different number of I/D side regions so if we
+	 * have separate instruction and data memory maps then return
+	 * whichever side has a smaller number of supported regions.
+	 */
+	u32 dregions, iregions, mpuir;
+
+	mpuir = read_cpuid_mputype();
+
+	dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION;
+
+	/* Check for separate d-side and i-side memory maps */
+	if (mpuir & MPUIR_nU)
+		iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION;
+
+	/* Use the smallest of the two maxima */
+	return min(dregions, iregions);
+}
+
+static int __init mpu_iside_independent(void)
+{
+	/* MPUIR.nU specifies whether there is *not* a unified memory map */
+	return read_cpuid_mputype() & MPUIR_nU;
+}
+
+static int __init __mpu_min_region_order(void)
+{
+	u32 drbar_result, irbar_result;
+
+	/* We've kept a region free for this probing */
+	rgnr_write(MPU_PROBE_REGION);
+	isb();
+	/*
+	 * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum
+	 * region order
+	*/
+	drbar_write(0xFFFFFFFC);
+	drbar_result = irbar_result = drbar_read();
+	drbar_write(0x0);
+	/* If the MPU is non-unified, we use the larger of the two minima*/
+	if (mpu_iside_independent()) {
+		irbar_write(0xFFFFFFFC);
+		irbar_result = irbar_read();
+		irbar_write(0x0);
+	}
+	isb(); /* Ensure that MPU region operations have completed */
+	/* Return whichever result is larger */
+
+	return __ffs(max(drbar_result, irbar_result));
+}
+
+static int __init mpu_setup_region(unsigned int number, phys_addr_t start,
+				   unsigned int size_order, unsigned int properties,
+				   unsigned int subregions, bool need_flush)
+{
+	u32 size_data;
+
+	/* We kept a region free for probing resolution of MPU regions*/
+	if (number > mpu_max_regions
+	    || number >= MPU_MAX_REGIONS)
+		return -ENOENT;
+
+	if (size_order > 32)
+		return -ENOMEM;
+
+	if (size_order < mpu_min_region_order)
+		return -ENOMEM;
+
+	/* Writing N to bits 5:1 (RSR_SZ)  specifies region size 2^N+1 */
+	size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN;
+	size_data |= subregions << MPU_RSR_SD;
+
+	if (need_flush)
+		flush_cache_all();
+
+	dsb(); /* Ensure all previous data accesses occur with old mappings */
+	rgnr_write(number);
+	isb();
+	drbar_write(start);
+	dracr_write(properties);
+	isb(); /* Propagate properties before enabling region */
+	drsr_write(size_data);
+
+	/* Check for independent I-side registers */
+	if (mpu_iside_independent()) {
+		irbar_write(start);
+		iracr_write(properties);
+		isb();
+		irsr_write(size_data);
+	}
+	isb();
+
+	/* Store region info (we treat i/d side the same, so only store d) */
+	mpu_rgn_info.rgns[number].dracr = properties;
+	mpu_rgn_info.rgns[number].drbar = start;
+	mpu_rgn_info.rgns[number].drsr = size_data;
+
+	mpu_rgn_info.used++;
+
+	return 0;
+}
+
+/*
+* Set up default MPU regions, doing nothing if there is no MPU
+*/
+void __init mpu_setup(void)
+{
+	int i, region = 0, err = 0;
+
+	if (!mpu_present())
+		return;
+
+	/* Setup MPU (order is important) */
+
+	/* Background */
+	err |= mpu_setup_region(region++, 0, 32,
+				MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA,
+				0, false);
+
+#ifdef CONFIG_XIP_KERNEL
+	/* ROM */
+	for (i = 0; i < ARRAY_SIZE(xip); i++) {
+		/*
+                 * In case we overwrite RAM region we set earlier in
+                 * head-nommu.S (which is cachable) all subsequent
+                 * data access till we setup RAM bellow would be done
+                 * with BG region (which is uncachable), thus we need
+                 * to clean and invalidate cache.
+		 */
+		bool need_flush = region == MPU_RAM_REGION;
+
+		if (!xip[i].size)
+			continue;
+
+		err |= mpu_setup_region(region++, xip[i].base, ilog2(xip[i].size),
+					MPU_AP_PL1RO_PL0NA | MPU_RGN_NORMAL,
+					xip[i].subreg, need_flush);
+	}
+#endif
+
+	/* RAM */
+	for (i = 0; i < ARRAY_SIZE(mem); i++) {
+		if (!mem[i].size)
+			continue;
+
+		err |= mpu_setup_region(region++, mem[i].base, ilog2(mem[i].size),
+					MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL,
+					mem[i].subreg, false);
+	}
+
+	/* Vectors */
+#ifndef CONFIG_CPU_V7M
+	err |= mpu_setup_region(region++, vectors_base, ilog2(2 * PAGE_SIZE),
+				MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL,
+				0, false);
+#endif
+	if (err) {
+		panic("MPU region initialization failure! %d", err);
+	} else {
+		pr_info("Using ARMv7 PMSA Compliant MPU. "
+			 "Region independence: %s, Used %d of %d regions\n",
+			mpu_iside_independent() ? "Yes" : "No",
+			mpu_rgn_info.used, mpu_max_regions);
+	}
+}
diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c
index 3d95dff..5ef351f 100644
--- a/drivers/pcmcia/sa1111_generic.c
+++ b/drivers/pcmcia/sa1111_generic.c
@@ -63,11 +63,12 @@
 #define IDX_IRQ_S1_READY_NINT	(3)
 #define IDX_IRQ_S1_CD_VALID	(4)
 #define IDX_IRQ_S1_BVD1_STSCHG	(5)
+#define NUM_IRQS		(6)
 
 void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *state)
 {
 	struct sa1111_pcmcia_socket *s = to_skt(skt);
-	unsigned long status = sa1111_readl(s->dev->mapbase + PCSR);
+	u32 status = readl_relaxed(s->dev->mapbase + PCSR);
 
 	switch (skt->nr) {
 	case 0:
@@ -95,7 +96,7 @@
 int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state)
 {
 	struct sa1111_pcmcia_socket *s = to_skt(skt);
-	unsigned int pccr_skt_mask, pccr_set_mask, val;
+	u32 pccr_skt_mask, pccr_set_mask, val;
 	unsigned long flags;
 
 	switch (skt->nr) {
@@ -123,10 +124,10 @@
 		pccr_set_mask |= PCCR_S0_FLT|PCCR_S1_FLT;
 
 	local_irq_save(flags);
-	val = sa1111_readl(s->dev->mapbase + PCCR);
+	val = readl_relaxed(s->dev->mapbase + PCCR);
 	val &= ~pccr_skt_mask;
 	val |= pccr_set_mask & pccr_skt_mask;
-	sa1111_writel(val, s->dev->mapbase + PCCR);
+	writel_relaxed(val, s->dev->mapbase + PCCR);
 	local_irq_restore(flags);
 
 	return 0;
@@ -137,12 +138,18 @@
 {
 	struct sa1111_pcmcia_socket *s;
 	struct clk *clk;
-	int i, ret = 0;
+	int i, ret = 0, irqs[NUM_IRQS];
 
 	clk = devm_clk_get(&dev->dev, NULL);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
+	for (i = 0; i < NUM_IRQS; i++) {
+		irqs[i] = sa1111_get_irq(dev, i);
+		if (irqs[i] <= 0)
+			return irqs[i] ? : -ENXIO;
+	}
+
 	ops->socket_state = sa1111_pcmcia_socket_state;
 
 	for (i = 0; i < ops->nr; i++) {
@@ -156,16 +163,16 @@
 		soc_pcmcia_init_one(&s->soc, ops, &dev->dev);
 		s->dev = dev;
 		if (s->soc.nr) {
-			s->soc.socket.pci_irq = dev->irq[IDX_IRQ_S1_READY_NINT];
-			s->soc.stat[SOC_STAT_CD].irq = dev->irq[IDX_IRQ_S1_CD_VALID];
+			s->soc.socket.pci_irq = irqs[IDX_IRQ_S1_READY_NINT];
+			s->soc.stat[SOC_STAT_CD].irq = irqs[IDX_IRQ_S1_CD_VALID];
 			s->soc.stat[SOC_STAT_CD].name = "SA1111 CF card detect";
-			s->soc.stat[SOC_STAT_BVD1].irq = dev->irq[IDX_IRQ_S1_BVD1_STSCHG];
+			s->soc.stat[SOC_STAT_BVD1].irq = irqs[IDX_IRQ_S1_BVD1_STSCHG];
 			s->soc.stat[SOC_STAT_BVD1].name = "SA1111 CF BVD1";
 		} else {
-			s->soc.socket.pci_irq = dev->irq[IDX_IRQ_S0_READY_NINT];
-			s->soc.stat[SOC_STAT_CD].irq = dev->irq[IDX_IRQ_S0_CD_VALID];
+			s->soc.socket.pci_irq = irqs[IDX_IRQ_S0_READY_NINT];
+			s->soc.stat[SOC_STAT_CD].irq = irqs[IDX_IRQ_S0_CD_VALID];
 			s->soc.stat[SOC_STAT_CD].name = "SA1111 PCMCIA card detect";
-			s->soc.stat[SOC_STAT_BVD1].irq = dev->irq[IDX_IRQ_S0_BVD1_STSCHG];
+			s->soc.stat[SOC_STAT_BVD1].irq = irqs[IDX_IRQ_S0_BVD1_STSCHG];
 			s->soc.stat[SOC_STAT_BVD1].name = "SA1111 PCMCIA BVD1";
 		}
 
@@ -201,8 +208,8 @@
 	/*
 	 * Initialise the suspend state.
 	 */
-	sa1111_writel(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR);
-	sa1111_writel(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR);
+	writel_relaxed(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR);
+	writel_relaxed(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR);
 
 	ret = -ENODEV;
 #ifdef CONFIG_SA1100_BADGE4
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index b2f82cf..58c2bbd 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -34,8 +34,8 @@
 
 config BINFMT_ELF_FDPIC
 	bool "Kernel support for FDPIC ELF binaries"
-	default y
-	depends on (FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X)
+	default y if !BINFMT_ELF
+	depends on (ARM || FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X)
 	select ELFCORE
 	help
 	  ELF FDPIC binaries are based on ELF, but allow the individual load
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index c697882..83732fe 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -51,6 +51,11 @@
 #define user_siginfo_t siginfo_t
 #endif
 
+/* That's for binfmt_elf_fdpic to deal with */
+#ifndef elf_check_fdpic
+#define elf_check_fdpic(ex) false
+#endif
+
 static int load_elf_binary(struct linux_binprm *bprm);
 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
 				int, int, unsigned long);
@@ -541,7 +546,8 @@
 	if (interp_elf_ex->e_type != ET_EXEC &&
 	    interp_elf_ex->e_type != ET_DYN)
 		goto out;
-	if (!elf_check_arch(interp_elf_ex))
+	if (!elf_check_arch(interp_elf_ex) ||
+	    elf_check_fdpic(interp_elf_ex))
 		goto out;
 	if (!interpreter->f_op->mmap)
 		goto out;
@@ -718,6 +724,8 @@
 		goto out;
 	if (!elf_check_arch(&loc->elf_ex))
 		goto out;
+	if (elf_check_fdpic(&loc->elf_ex))
+		goto out;
 	if (!bprm->file->f_op->mmap)
 		goto out;
 
@@ -817,7 +825,8 @@
 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 			goto out_free_dentry;
 		/* Verify the interpreter has a valid arch */
-		if (!elf_check_arch(&loc->interp_elf_ex))
+		if (!elf_check_arch(&loc->interp_elf_ex) ||
+		    elf_check_fdpic(&loc->interp_elf_ex))
 			goto out_free_dentry;
 
 		/* Load the interpreter program headers */
@@ -1190,6 +1199,8 @@
 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
 		goto out;
+	if (elf_check_fdpic(&elf_ex))
+		goto out;
 
 	/* Now read in all of the header information */
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index e70c039..5429b03 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -378,6 +378,11 @@
 				 executable_stack);
 	if (retval < 0)
 		goto error;
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+	retval = arch_setup_additional_pages(bprm, !!interpreter_name);
+	if (retval < 0)
+		goto error;
+#endif
 #endif
 
 	/* load the executable and interpreter into memory */
@@ -831,6 +836,9 @@
 			if (phdr->p_vaddr >= seg->p_vaddr &&
 			    phdr->p_vaddr + phdr->p_memsz <=
 			    seg->p_vaddr + seg->p_memsz) {
+				Elf32_Dyn __user *dyn;
+				Elf32_Sword d_tag;
+
 				params->dynamic_addr =
 					(phdr->p_vaddr - seg->p_vaddr) +
 					seg->addr;
@@ -843,8 +851,9 @@
 					goto dynamic_error;
 
 				tmp = phdr->p_memsz / sizeof(Elf32_Dyn);
-				if (((Elf32_Dyn *)
-				     params->dynamic_addr)[tmp - 1].d_tag != 0)
+				dyn = (Elf32_Dyn __user *)params->dynamic_addr;
+				__get_user(d_tag, &dyn[tmp - 1].d_tag);
+				if (d_tag != 0)
 					goto dynamic_error;
 				break;
 			}