ARM: P2V: extend to 16-bit translation offsets
MSM's memory is aligned to 2MB, which is more than we can do with our
existing method as we're limited to the upper 8 bits. Extend this by
using two instructions to 16 bits, automatically selected when MSM is
enabled.
Acked-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Tested-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4147f76..b357c29 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -195,7 +195,6 @@
bool "Patch physical to virtual translations at runtime (EXPERIMENTAL)"
depends on EXPERIMENTAL
depends on !XIP_KERNEL && !THUMB2_KERNEL && MMU
- depends on !ARCH_MSM
depends on !ARCH_REALVIEW || !SPARSEMEM
help
Patch phys-to-virt translation functions at runtime according to
@@ -204,6 +203,10 @@
This can only be used with non-XIP, non-Thumb2, MMU kernels where
the base of physical memory is at a 16MB boundary.
+config ARM_PATCH_PHYS_VIRT_16BIT
+ def_bool y
+ depends on ARM_PATCH_PHYS_VIRT && ARCH_MSM
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 7197879..2398b3f 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -154,29 +154,42 @@
#ifndef __virt_to_phys
#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+/*
+ * Constants used to force the right instruction encodings and shifts
+ * so that all we need to do is modify the 8-bit constant field.
+ */
+#define __PV_BITS_31_24 0x81000000
+#define __PV_BITS_23_16 0x00810000
+
extern unsigned long __pv_phys_offset;
#define PHYS_OFFSET __pv_phys_offset
-#define __pv_stub(from,to,instr) \
+#define __pv_stub(from,to,instr,type) \
__asm__("@ __pv_stub\n" \
"1: " instr " %0, %1, %2\n" \
" .pushsection .pv_table,\"a\"\n" \
" .long 1b\n" \
" .popsection\n" \
: "=r" (to) \
- : "r" (from), "I" (0x81000000))
+ : "r" (from), "I" (type))
static inline unsigned long __virt_to_phys(unsigned long x)
{
unsigned long t;
- __pv_stub(x, t, "add");
+ __pv_stub(x, t, "add", __PV_BITS_31_24);
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
+ __pv_stub(t, t, "add", __PV_BITS_23_16);
+#endif
return t;
}
static inline unsigned long __phys_to_virt(unsigned long x)
{
unsigned long t;
- __pv_stub(x, t, "sub");
+ __pv_stub(x, t, "sub", __PV_BITS_31_24);
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
+ __pv_stub(t, t, "sub", __PV_BITS_23_16);
+#endif
return t;
}
#else
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index d072c21..a2b775b 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -31,7 +31,11 @@
/* Add __virt_to_phys patching state as well */
#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
+#define MODULE_ARCH_VERMAGIC_P2V "p2v16 "
+#else
#define MODULE_ARCH_VERMAGIC_P2V "p2v8 "
+#endif
#else
#define MODULE_ARCH_VERMAGIC_P2V ""
#endif
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 1db8ead..a94dd99 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -456,8 +456,13 @@
add r4, r4, r3 @ adjust table start address
add r5, r5, r3 @ adjust table end address
str r8, [r7, r3]! @ save computed PHYS_OFFSET to __pv_phys_offset
+#ifndef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
mov r6, r3, lsr #24 @ constant for add/sub instructions
teq r3, r6, lsl #24 @ must be 16MiB aligned
+#else
+ mov r6, r3, lsr #16 @ constant for add/sub instructions
+ teq r3, r6, lsl #16 @ must be 64kiB aligned
+#endif
bne __error
str r6, [r7, #4] @ save to __pv_offset
b __fixup_a_pv_table
@@ -471,10 +476,18 @@
.text
__fixup_a_pv_table:
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
+ and r0, r6, #255 @ offset bits 23-16
+ mov r6, r6, lsr #8 @ offset bits 31-24
+#else
+ mov r0, #0 @ just in case...
+#endif
b 3f
2: ldr ip, [r7, r3]
bic ip, ip, #0x000000ff
- orr ip, ip, r6
+ tst ip, #0x400 @ rotate shift tells us LS or MS byte
+ orrne ip, ip, r6 @ mask in offset bits 31-24
+ orreq ip, ip, r0 @ mask in offset bits 23-16
str ip, [r7, r3]
3: cmp r4, r5
ldrcc r7, [r4], #4 @ use branch for delay slot