Merge branch 'zImage_fixes' of git://git.linaro.org/people/nico/linux into devel-stable
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 8ebbb51..23aad07 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -74,7 +74,7 @@
 ZBSSADDR	:= $(CONFIG_ZBOOT_ROM_BSS)
 else
 ZTEXTADDR	:= 0
-ZBSSADDR	:= ALIGN(4)
+ZBSSADDR	:= ALIGN(8)
 endif
 
 SEDFLAGS	= s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
@@ -98,8 +98,6 @@
 ccflags-y := -fpic -fno-builtin
 asflags-y := -Wa,-march=all
 
-# Provide size of uncompressed kernel to the decompressor via a linker symbol.
-LDFLAGS_vmlinux = --defsym _image_size=$(shell stat -c "%s" $(obj)/../Image)
 # Supply ZRELADDR to the decompressor via a linker symbol.
 ifneq ($(CONFIG_AUTO_ZRELADDR),y)
 LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
@@ -122,10 +120,23 @@
 $(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE
 	$(call cmd,shipped)
 
+# We need to prevent any GOTOFF relocs being used with references
+# to symbols in the .bss section since we cannot relocate them
+# independently from the rest at run time.  This can be achieved by
+# ensuring that no private .bss symbols exist, as global symbols
+# always have a GOT entry which is what we need.
+# The .data section is already discarded by the linker script so no need
+# to bother about it here.
+check_for_bad_syms = \
+bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \
+[ -z "$$bad_syms" ] || \
+  ( echo "following symbols must have non local/private scope:" >&2; \
+    echo "$$bad_syms" >&2; rm -f $@; false )
+
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
 	 	$(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE
 	$(call if_changed,ld)
-	@:
+	@$(check_for_bad_syms)
 
 $(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE
 	$(call if_changed,$(suffix_y))
diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index 4c72a97..07be5a2 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -44,7 +44,7 @@
 #include "../../../../lib/decompress_unlzma.c"
 #endif
 
-void do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x))
+int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x))
 {
-	decompress(input, len, NULL, NULL, output, NULL, error);
+	return decompress(input, len, NULL, NULL, output, NULL, error);
 }
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index a36f452..c363458 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -179,16 +179,29 @@
 		bl	cache_on
 
 restart:	adr	r0, LC0
-		ldmia	r0, {r1, r2, r3, r5, r6, r9, r11, r12}
-		ldr	sp, [r0, #32]
+		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
+		ldr	sp, [r0, #28]
 
 		/*
 		 * We might be running at a different address.  We need
 		 * to fix up various pointers.
 		 */
 		sub	r0, r0, r1		@ calculate the delta offset
-		add	r5, r5, r0		@ _start
 		add	r6, r6, r0		@ _edata
+		add	r10, r10, r0		@ inflated kernel size location
+
+		/*
+		 * The kernel build system appends the size of the
+		 * decompressed kernel at the end of the compressed data
+		 * in little-endian form.
+		 */
+		ldrb	r9, [r10, #0]
+		ldrb	lr, [r10, #1]
+		orr	r9, r9, lr, lsl #8
+		ldrb	lr, [r10, #2]
+		ldrb	r10, [r10, #3]
+		orr	r9, r9, lr, lsl #16
+		orr	r9, r9, r10, lsl #24
 
 #ifndef CONFIG_ZBOOT_ROM
 		/* malloc space is above the relocated stack (64k max) */
@@ -206,31 +219,40 @@
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
- *   r5  = start of this image
  *   r9  = size of decompressed image
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
- *   r4 >= r10 -> OK
- *   r4 + image length <= r5 -> OK
+ *   r4 - 16k page directory >= r10 -> OK
+ *   r4 + image length <= current position (pc) -> OK
  */
+		add	r10, r10, #16384
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-		cmp	r10, r5
+   ARM(		cmp	r10, pc		)
+ THUMB(		mov	lr, pc		)
+ THUMB(		cmp	r10, lr		)
 		bls	wont_overwrite
 
 /*
  * Relocate ourselves past the end of the decompressed kernel.
- *   r5  = start of this image
  *   r6  = _edata
  *   r10 = end of the decompressed kernel
  * Because we always copy ahead, we need to do it from the end and go
  * backward in case the source and destination overlap.
  */
-		/* Round up to next 256-byte boundary. */
-		add	r10, r10, #256
+		/*
+		 * Bump to the next 256-byte boundary with the size of
+		 * the relocation code added. This avoids overwriting
+		 * ourself when the offset is small.
+		 */
+		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
 		bic	r10, r10, #255
 
+		/* Get start of code we want to copy and align it down. */
+		adr	r5, restart
+		bic	r5, r5, #31
+
 		sub	r9, r6, r5		@ size to copy
 		add	r9, r9, #31		@ rounded up to a multiple
 		bic	r9, r9, #31		@ ... of 32 bytes
@@ -245,6 +267,11 @@
 		/* Preserve offset to relocated code. */
 		sub	r6, r9, r6
 
+#ifndef CONFIG_ZBOOT_ROM
+		/* cache_clean_flush may use the stack, so relocate it */
+		add	sp, sp, r6
+#endif
+
 		bl	cache_clean_flush
 
 		adr	r0, BSYM(restart)
@@ -333,12 +360,11 @@
 LC0:		.word	LC0			@ r1
 		.word	__bss_start		@ r2
 		.word	_end			@ r3
-		.word	_start			@ r5
 		.word	_edata			@ r6
-		.word	_image_size		@ r9
+		.word	input_data_end - 4	@ r10 (inflated size location)
 		.word	_got_start		@ r11
 		.word	_got_end		@ ip
-		.word	user_stack_end		@ sp
+		.word	.L_user_stack_end	@ sp
 		.size	LC0, . - LC0
 
 #ifdef CONFIG_ARCH_RPC
@@ -1056,8 +1082,9 @@
 #endif
 
 		.ltorg
+reloc_code_end:
 
 		.align
 		.section ".stack", "aw", %nobits
-user_stack:	.space	4096
-user_stack_end:
+.L_user_stack:	.space	4096
+.L_user_stack_end:
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 2df3826..832d372 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -26,8 +26,6 @@
 #include <linux/linkage.h>
 #include <asm/string.h>
 
-#include <asm/unaligned.h>
-
 
 static void putstr(const char *ptr);
 extern void error(char *x);
@@ -139,13 +137,12 @@
 }
 
 /*
- * gzip delarations
+ * gzip declarations
  */
 extern char input_data[];
 extern char input_data_end[];
 
 unsigned char *output_data;
-unsigned long output_ptr;
 
 unsigned long free_mem_ptr;
 unsigned long free_mem_end_ptr;
@@ -170,15 +167,15 @@
 	error("Attempting division by 0!");
 }
 
-extern void do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x));
+extern int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x));
 
 
-unsigned long
+void
 decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
 		unsigned long free_mem_ptr_end_p,
 		int arch_id)
 {
-	unsigned char *tmp;
+	int ret;
 
 	output_data		= (unsigned char *)output_start;
 	free_mem_ptr		= free_mem_ptr_p;
@@ -187,12 +184,11 @@
 
 	arch_decomp_setup();
 
-	tmp = (unsigned char *) (((unsigned long)input_data_end) - 4);
-	output_ptr = get_unaligned_le32(tmp);
-
 	putstr("Uncompressing Linux...");
-	do_decompress(input_data, input_data_end - input_data,
-			output_data, error);
-	putstr(" done, booting the kernel.\n");
-	return output_ptr;
+	ret = do_decompress(input_data, input_data_end - input_data,
+			    output_data, error);
+	if (ret)
+		error("decompressor returned an error");
+	else
+		putstr(" done, booting the kernel.\n");
 }
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 5309909..ea80abe 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -54,6 +54,7 @@
   .bss			: { *(.bss) }
   _end = .;
 
+  . = ALIGN(8);		/* the stack must be 64-bit aligned */
   .stack		: { *(.stack) }
 
   .stab 0		: { *(.stab) }
diff --git a/arch/arm/mach-davinci/include/mach/uncompress.h b/arch/arm/mach-davinci/include/mach/uncompress.h
index 47723e8..78d8068 100644
--- a/arch/arm/mach-davinci/include/mach/uncompress.h
+++ b/arch/arm/mach-davinci/include/mach/uncompress.h
@@ -25,8 +25,7 @@
 
 #include <mach/serial.h>
 
-static u32 *uart;
-static u32 *uart_info = (u32 *)(DAVINCI_UART_INFO);
+u32 *uart;
 
 /* PORT_16C550A, in polled non-fifo mode */
 static void putc(char c)
@@ -44,6 +43,8 @@
 
 static inline void set_uart_info(u32 phys, void * __iomem virt)
 {
+	u32 *uart_info = (u32 *)(DAVINCI_UART_INFO);
+
 	uart = (u32 *)phys;
 	uart_info[0] = phys;
 	uart_info[1] = (u32)virt;
diff --git a/arch/arm/mach-gemini/include/mach/uncompress.h b/arch/arm/mach-gemini/include/mach/uncompress.h
index 5483f61..0efa262 100644
--- a/arch/arm/mach-gemini/include/mach/uncompress.h
+++ b/arch/arm/mach-gemini/include/mach/uncompress.h
@@ -16,7 +16,7 @@
 #include <linux/serial_reg.h>
 #include <mach/hardware.h>
 
-static volatile unsigned long *UART = (unsigned long *)GEMINI_UART_BASE;
+static volatile unsigned long * const UART = (unsigned long *)GEMINI_UART_BASE;
 
 /*
  * The following code assumes the serial port has already been
diff --git a/arch/arm/mach-iop32x/include/mach/uncompress.h b/arch/arm/mach-iop32x/include/mach/uncompress.h
index b247551..4fd7154 100644
--- a/arch/arm/mach-iop32x/include/mach/uncompress.h
+++ b/arch/arm/mach-iop32x/include/mach/uncompress.h
@@ -7,7 +7,7 @@
 #include <linux/serial_reg.h>
 #include <mach/hardware.h>
 
-static volatile u8 *uart_base;
+volatile u8 *uart_base;
 
 #define TX_DONE		(UART_LSR_TEMT | UART_LSR_THRE)
 
diff --git a/arch/arm/mach-iop33x/include/mach/uncompress.h b/arch/arm/mach-iop33x/include/mach/uncompress.h
index b42423f..f99bb84 100644
--- a/arch/arm/mach-iop33x/include/mach/uncompress.h
+++ b/arch/arm/mach-iop33x/include/mach/uncompress.h
@@ -7,7 +7,7 @@
 #include <linux/serial_reg.h>
 #include <mach/hardware.h>
 
-static volatile u32 *uart_base;
+volatile u32 *uart_base;
 
 #define TX_DONE		(UART_LSR_TEMT | UART_LSR_THRE)
 
diff --git a/arch/arm/mach-ixp4xx/include/mach/uncompress.h b/arch/arm/mach-ixp4xx/include/mach/uncompress.h
index 2db0078..219d7c1 100644
--- a/arch/arm/mach-ixp4xx/include/mach/uncompress.h
+++ b/arch/arm/mach-ixp4xx/include/mach/uncompress.h
@@ -19,7 +19,7 @@
 
 #define TX_DONE (UART_LSR_TEMT|UART_LSR_THRE)
 
-static volatile u32* uart_base;
+volatile u32* uart_base;
 
 static inline void putc(int c)
 {
diff --git a/arch/arm/mach-mmp/include/mach/uncompress.h b/arch/arm/mach-mmp/include/mach/uncompress.h
index 85bd8a2..d6daeb7 100644
--- a/arch/arm/mach-mmp/include/mach/uncompress.h
+++ b/arch/arm/mach-mmp/include/mach/uncompress.h
@@ -14,7 +14,7 @@
 #define UART2_BASE	(APB_PHYS_BASE + 0x17000)
 #define UART3_BASE	(APB_PHYS_BASE + 0x18000)
 
-static volatile unsigned long *UART;
+volatile unsigned long *UART;
 
 static inline void putc(char c)
 {
diff --git a/arch/arm/mach-mxs/include/mach/uncompress.h b/arch/arm/mach-mxs/include/mach/uncompress.h
index f12a173..7f8bf65 100644
--- a/arch/arm/mach-mxs/include/mach/uncompress.h
+++ b/arch/arm/mach-mxs/include/mach/uncompress.h
@@ -20,7 +20,7 @@
 
 #include <asm/mach-types.h>
 
-static unsigned long mxs_duart_base;
+unsigned long mxs_duart_base;
 
 #define MXS_DUART(x)	(*(volatile unsigned long *)(mxs_duart_base + (x)))
 
diff --git a/arch/arm/mach-ns9xxx/include/mach/uncompress.h b/arch/arm/mach-ns9xxx/include/mach/uncompress.h
index 770a68c..00ef4a6 100644
--- a/arch/arm/mach-ns9xxx/include/mach/uncompress.h
+++ b/arch/arm/mach-ns9xxx/include/mach/uncompress.h
@@ -20,7 +20,7 @@
 	/* nothing */
 }
 
-static int timeout;
+int timeout;
 
 static void putc_ns9360(char c, void __iomem *base)
 {
diff --git a/arch/arm/mach-nuc93x/include/mach/uncompress.h b/arch/arm/mach-nuc93x/include/mach/uncompress.h
index 73082cd..381cb9b 100644
--- a/arch/arm/mach-nuc93x/include/mach/uncompress.h
+++ b/arch/arm/mach-nuc93x/include/mach/uncompress.h
@@ -27,7 +27,7 @@
 #define arch_decomp_wdog()
 
 #define TX_DONE	(UART_LSR_TEMT | UART_LSR_THRE)
-static u32 * uart_base = (u32 *)UART0_PA;
+static u32 * const uart_base = (u32 *)UART0_PA;
 
 static void putc(int ch)
 {
diff --git a/arch/arm/mach-pxa/include/mach/uncompress.h b/arch/arm/mach-pxa/include/mach/uncompress.h
index 759b851..5519a34 100644
--- a/arch/arm/mach-pxa/include/mach/uncompress.h
+++ b/arch/arm/mach-pxa/include/mach/uncompress.h
@@ -16,9 +16,9 @@
 #define BTUART_BASE	(0x40200000)
 #define STUART_BASE	(0x40700000)
 
-static unsigned long uart_base;
-static unsigned int uart_shift;
-static unsigned int uart_is_pxa;
+unsigned long uart_base;
+unsigned int uart_shift;
+unsigned int uart_is_pxa;
 
 static inline unsigned char uart_read(int offset)
 {
diff --git a/arch/arm/mach-rpc/include/mach/uncompress.h b/arch/arm/mach-rpc/include/mach/uncompress.h
index 8c9e2c7..9cd9bcd 100644
--- a/arch/arm/mach-rpc/include/mach/uncompress.h
+++ b/arch/arm/mach-rpc/include/mach/uncompress.h
@@ -66,12 +66,12 @@
 #define params (params())
 
 #ifndef STANDALONE_DEBUG 
-static unsigned long video_num_cols;
-static unsigned long video_num_rows;
-static unsigned long video_x;
-static unsigned long video_y;
-static unsigned char bytes_per_char_v;
-static int white;
+unsigned long video_num_cols;
+unsigned long video_num_rows;
+unsigned long video_x;
+unsigned long video_y;
+unsigned char bytes_per_char_v;
+int white;
 
 /*
  * This does not append a newline
diff --git a/arch/arm/mach-s5p64x0/include/mach/uncompress.h b/arch/arm/mach-s5p64x0/include/mach/uncompress.h
index c65b229..1608faf 100644
--- a/arch/arm/mach-s5p64x0/include/mach/uncompress.h
+++ b/arch/arm/mach-s5p64x0/include/mach/uncompress.h
@@ -24,8 +24,8 @@
 
 /* uart setup */
 
-static unsigned int fifo_mask;
-static unsigned int fifo_max;
+unsigned int fifo_mask;
+unsigned int fifo_max;
 
 /* forward declerations */
 
@@ -43,7 +43,7 @@
 /* how many bytes we allow into the FIFO at a time in FIFO mode */
 #define FIFO_MAX	 (14)
 
-static unsigned long uart_base;
+unsigned long uart_base;
 
 static __inline__ void get_uart_base(void)
 {
diff --git a/arch/arm/mach-ux500/include/mach/uncompress.h b/arch/arm/mach-ux500/include/mach/uncompress.h
index ab0fe14..088b550 100644
--- a/arch/arm/mach-ux500/include/mach/uncompress.h
+++ b/arch/arm/mach-ux500/include/mach/uncompress.h
@@ -24,7 +24,7 @@
 #include <linux/amba/serial.h>
 #include <mach/hardware.h>
 
-static u32 ux500_uart_base;
+u32 ux500_uart_base;
 
 static void putc(const char c)
 {
diff --git a/arch/arm/mach-w90x900/include/mach/uncompress.h b/arch/arm/mach-w90x900/include/mach/uncompress.h
index 56f1a74..0313021 100644
--- a/arch/arm/mach-w90x900/include/mach/uncompress.h
+++ b/arch/arm/mach-w90x900/include/mach/uncompress.h
@@ -27,7 +27,7 @@
 #define arch_decomp_wdog()
 
 #define TX_DONE	(UART_LSR_TEMT | UART_LSR_THRE)
-static volatile u32 * uart_base = (u32 *)UART0_PA;
+static volatile u32 * const uart_base = (u32 *)UART0_PA;
 
 static void putc(int ch)
 {
diff --git a/arch/arm/plat-mxc/include/mach/uncompress.h b/arch/arm/plat-mxc/include/mach/uncompress.h
index 4864b0a..d85e2d1 100644
--- a/arch/arm/plat-mxc/include/mach/uncompress.h
+++ b/arch/arm/plat-mxc/include/mach/uncompress.h
@@ -21,7 +21,7 @@
 
 #include <asm/mach-types.h>
 
-static unsigned long uart_base;
+unsigned long uart_base;
 
 #define UART(x) (*(volatile unsigned long *)(uart_base + (x)))
 
diff --git a/arch/arm/plat-omap/include/plat/uncompress.h b/arch/arm/plat-omap/include/plat/uncompress.h
index 30b891c4..565d266 100644
--- a/arch/arm/plat-omap/include/plat/uncompress.h
+++ b/arch/arm/plat-omap/include/plat/uncompress.h
@@ -27,8 +27,8 @@
 
 #define MDR1_MODE_MASK			0x07
 
-static volatile u8 *uart_base;
-static int uart_shift;
+volatile u8 *uart_base;
+int uart_shift;
 
 /*
  * Store the DEBUG_LL uart number into memory.
diff --git a/arch/arm/plat-samsung/include/plat/uncompress.h b/arch/arm/plat-samsung/include/plat/uncompress.h
index 7d6ed72..ee48e12 100644
--- a/arch/arm/plat-samsung/include/plat/uncompress.h
+++ b/arch/arm/plat-samsung/include/plat/uncompress.h
@@ -18,8 +18,8 @@
 
 /* uart setup */
 
-static unsigned int fifo_mask;
-static unsigned int fifo_max;
+unsigned int fifo_mask;
+unsigned int fifo_max;
 
 /* forward declerations */