Blackfin: initial XIP support

Signed-off-by: Barry Song <barry.song@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index c0d6d96..f46db59 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -26,9 +26,9 @@
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_IDE
-	select HAVE_KERNEL_GZIP
-	select HAVE_KERNEL_BZIP2
-	select HAVE_KERNEL_LZMA
+	select HAVE_KERNEL_GZIP if RAMKERNEL
+	select HAVE_KERNEL_BZIP2 if RAMKERNEL
+	select HAVE_KERNEL_LZMA if RAMKERNEL
 	select HAVE_OPROFILE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 
@@ -407,10 +407,18 @@
 config ROM_BASE
 	hex "Kernel ROM Base"
 	depends on ROMKERNEL
-	default "0x20040000"
+	default "0x20040040"
 	range 0x20000000 0x20400000 if !(BF54x || BF561)
 	range 0x20000000 0x30000000 if (BF54x || BF561)
 	help
+	  Make sure your ROM base does not include any file-header
+	  information that is prepended to the kernel.
+
+	  For example, the bootable U-Boot format (created with
+	  mkimage) has a 64 byte header (0x40).  So while the image
+	  you write to flash might start at say 0x20080000, you have
+	  to add 0x40 to get the kernel's ROM base as it will come
+	  after the header.
 
 comment "Clock/PLL Setup"
 
diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile
index d4c7177..ba84206 100644
--- a/arch/blackfin/Makefile
+++ b/arch/blackfin/Makefile
@@ -14,6 +14,9 @@
 GZFLAGS          := -9
 
 KBUILD_CFLAGS           += $(call cc-option,-mno-fdpic)
+ifeq ($(CONFIG_ROMKERNEL),y)
+KBUILD_CFLAGS           += -mlong-calls
+endif
 KBUILD_AFLAGS           += $(call cc-option,-mno-fdpic)
 CFLAGS_MODULE    += -mlong-calls
 LDFLAGS_MODULE   += -m elf32bfin
@@ -138,7 +141,7 @@
 
 INSTALL_PATH ?= /tftpboot
 boot := arch/$(ARCH)/boot
-BOOT_TARGETS = vmImage vmImage.bin vmImage.bz2 vmImage.gz vmImage.lzma
+BOOT_TARGETS = vmImage vmImage.bin vmImage.bz2 vmImage.gz vmImage.lzma vmImage.xip
 PHONY += $(BOOT_TARGETS) install
 KBUILD_IMAGE := $(boot)/vmImage
 
@@ -156,6 +159,7 @@
   echo  '  vmImage.bz2     - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.bz2)'
   echo  '* vmImage.gz      - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.gz)'
   echo  '  vmImage.lzma    - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.lzma)'
+  echo  '  vmImage.xip     - XIP Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.xip)'
   echo  '  install         - Install kernel using'
   echo  '                     (your) ~/bin/$(INSTALLKERNEL) or'
   echo  '                     (distribution) PATH: $(INSTALLKERNEL) or'
diff --git a/arch/blackfin/boot/Makefile b/arch/blackfin/boot/Makefile
index e9c48c6..d1b3d60 100644
--- a/arch/blackfin/boot/Makefile
+++ b/arch/blackfin/boot/Makefile
@@ -8,14 +8,18 @@
 
 MKIMAGE := $(srctree)/scripts/mkuboot.sh
 
-targets := vmImage vmImage.bin vmImage.bz2 vmImage.gz vmImage.lzma
-extra-y += vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma
+targets := vmImage vmImage.bin vmImage.bz2 vmImage.gz vmImage.lzma vmImage.xip
+extra-y += vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xip
+
+UIMAGE_OPTS-y :=
+UIMAGE_OPTS-$(CONFIG_RAMKERNEL) += -a $(CONFIG_BOOT_LOAD)
+UIMAGE_OPTS-$(CONFIG_ROMKERNEL) += -a $(CONFIG_ROM_BASE) -x
 
 quiet_cmd_uimage = UIMAGE  $@
       cmd_uimage = $(CONFIG_SHELL) $(MKIMAGE) -A $(ARCH) -O linux -T kernel \
-                   -C $(2) -n '$(MACHINE)-$(KERNELRELEASE)' -a $(CONFIG_BOOT_LOAD) \
+                   -C $(2) -n '$(MACHINE)-$(KERNELRELEASE)' \
                    -e $(shell $(NM) vmlinux | awk '$$NF == "__start" {print $$1}') \
-                   -d $< $@
+                   $(UIMAGE_OPTS-y) -d $< $@
 
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
@@ -29,6 +33,12 @@
 $(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
 	$(call if_changed,lzma)
 
+# The mkimage tool wants 64bytes prepended to the image
+quiet_cmd_mk_bin_xip = BIN     $@
+      cmd_mk_bin_xip = ( printf '%64s' | tr ' ' '\377' ; cat $< ) > $@
+$(obj)/vmlinux.bin.xip: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,mk_bin_xip)
+
 $(obj)/vmImage.bin: $(obj)/vmlinux.bin
 	$(call if_changed,uimage,none)
 
@@ -41,10 +51,15 @@
 $(obj)/vmImage.lzma: $(obj)/vmlinux.bin.lzma
 	$(call if_changed,uimage,lzma)
 
+$(obj)/vmImage.xip: $(obj)/vmlinux.bin.xip
+	$(call if_changed,uimage,none)
+
 suffix-y                      := bin
 suffix-$(CONFIG_KERNEL_GZIP)  := gz
 suffix-$(CONFIG_KERNEL_BZIP2) := bz2
 suffix-$(CONFIG_KERNEL_LZMA)  := lzma
+suffix-$(CONFIG_ROMKERNEL)    := xip
+
 $(obj)/vmImage: $(obj)/vmImage.$(suffix-y)
 	@ln -sf $(notdir $<) $@
 
diff --git a/arch/blackfin/include/asm/context.S b/arch/blackfin/include/asm/context.S
index 5dffaf5..fada8e9 100644
--- a/arch/blackfin/include/asm/context.S
+++ b/arch/blackfin/include/asm/context.S
@@ -374,3 +374,13 @@
 
 	(R7:0, P5:0) = [SP++];
 .endm
+
+.macro pseudo_long_call func:req, scratch:req
+#ifdef CONFIG_ROMKERNEL
+	\scratch\().l = \func;
+	\scratch\().h = \func;
+	call (\scratch);
+#else
+	call \func;
+#endif
+.endm
diff --git a/arch/blackfin/include/asm/sections.h b/arch/blackfin/include/asm/sections.h
index 42f6c53..14a3e66 100644
--- a/arch/blackfin/include/asm/sections.h
+++ b/arch/blackfin/include/asm/sections.h
@@ -21,6 +21,9 @@
 extern char _stext_l1[], _etext_l1[], _text_l1_lma[], __weak _text_l1_len[];
 extern char _sdata_l1[], _edata_l1[], _sbss_l1[], _ebss_l1[],
 	_data_l1_lma[], __weak _data_l1_len[];
+#ifdef CONFIG_ROMKERNEL
+extern char _data_lma[], _data_len[], _sinitdata[], _einitdata[], _init_data_lma[], _init_data_len[];
+#endif
 extern char _sdata_b_l1[], _edata_b_l1[], _sbss_b_l1[], _ebss_b_l1[],
 	_data_b_l1_lma[], __weak _data_b_l1_len[];
 extern char _stext_l2[], _etext_l2[], _sdata_l2[], _edata_l2[],
diff --git a/arch/blackfin/kernel/cplb-mpu/cplbinit.c b/arch/blackfin/kernel/cplb-mpu/cplbinit.c
index 8d42b9e..30fd641 100644
--- a/arch/blackfin/kernel/cplb-mpu/cplbinit.c
+++ b/arch/blackfin/kernel/cplb-mpu/cplbinit.c
@@ -64,6 +64,15 @@
 		icplb_tbl[cpu][i_i++].data = i_data | (addr == 0 ? CPLB_USER_RD : 0);
 	}
 
+#ifdef CONFIG_ROMKERNEL
+	/* Cover kernel XIP flash area */
+	addr = CONFIG_ROM_BASE & ~(4 * 1024 * 1024 - 1);
+	dcplb_tbl[cpu][i_d].addr = addr;
+	dcplb_tbl[cpu][i_d++].data = d_data | CPLB_USER_RD;
+	icplb_tbl[cpu][i_i].addr = addr;
+	icplb_tbl[cpu][i_i++].data = i_data | CPLB_USER_RD;
+#endif
+
 	/* Cover L1 memory.  One 4M area for code and data each is enough.  */
 #if L1_DATA_A_LENGTH > 0 || L1_DATA_B_LENGTH > 0
 	dcplb_tbl[cpu][i_d].addr = get_l1_data_a_start_cpu(cpu);
diff --git a/arch/blackfin/kernel/cplb-nompu/cplbinit.c b/arch/blackfin/kernel/cplb-nompu/cplbinit.c
index 282a791..bfe75af 100644
--- a/arch/blackfin/kernel/cplb-nompu/cplbinit.c
+++ b/arch/blackfin/kernel/cplb-nompu/cplbinit.c
@@ -56,6 +56,15 @@
 		i_tbl[i_i++].data = SDRAM_IGENERIC | PAGE_SIZE_4MB;
 	}
 
+#ifdef CONFIG_ROMKERNEL
+	/* Cover kernel XIP flash area */
+	addr = CONFIG_ROM_BASE & ~(4 * 1024 * 1024 - 1);
+	d_tbl[i_d].addr = addr;
+	d_tbl[i_d++].data = SDRAM_DGENERIC | PAGE_SIZE_4MB;
+	i_tbl[i_i].addr = addr;
+	i_tbl[i_i++].data = SDRAM_IGENERIC | PAGE_SIZE_4MB;
+#endif
+
 	/* Cover L1 memory.  One 4M area for code and data each is enough.  */
 	if (cpu == 0) {
 		if (L1_DATA_A_LENGTH || L1_DATA_B_LENGTH) {
diff --git a/arch/blackfin/kernel/entry.S b/arch/blackfin/kernel/entry.S
index f27dc22..686478f 100644
--- a/arch/blackfin/kernel/entry.S
+++ b/arch/blackfin/kernel/entry.S
@@ -44,7 +44,7 @@
 	sti r4;
 #endif /* CONFIG_IPIPE */
 	SP += -12;
-	call _schedule_tail;
+	pseudo_long_call _schedule_tail, p5;
 	SP += 12;
 	r0 = [sp + PT_IPEND];
 	cc = bittst(r0,1);
@@ -79,7 +79,7 @@
 	r0 += 24;
 	[--sp] = rets;
 	SP += -12;
-	call _bfin_vfork;
+	pseudo_long_call _bfin_vfork, p2;
 	SP += 12;
 	rets = [sp++];
 	rts;
@@ -90,7 +90,7 @@
 	r0 += 24;
 	[--sp] = rets;
 	SP += -12;
-	call _bfin_clone;
+	pseudo_long_call _bfin_clone, p2;
 	SP += 12;
 	rets = [sp++];
 	rts;
@@ -101,7 +101,7 @@
 	r0 += 24;
 	[--sp] = rets;
 	SP += -12;
-	call _do_rt_sigreturn;
+	pseudo_long_call _do_rt_sigreturn, p2;
 	SP += 12;
 	rets = [sp++];
 	rts;
diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index a0bc7d3..b54ba45 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -220,6 +220,16 @@
 		memcpy(_stext_l2, _l2_lma, l2_len);
 }
 
+#ifdef CONFIG_ROMKERNEL
+void __init bfin_relocate_xip_data(void)
+{
+	early_shadow_stamp();
+
+	memcpy(_sdata, _data_lma, (unsigned long)_data_len - THREAD_SIZE + sizeof(struct thread_info));
+	memcpy(_sinitdata, _init_data_lma, (unsigned long)_init_data_len);
+}
+#endif
+
 /* add_memory_region to memmap */
 static void __init add_memory_region(unsigned long long start,
 			      unsigned long long size, int type)
@@ -504,7 +514,7 @@
 #endif
 	unsigned long max_mem;
 
-	_rambase = (unsigned long)_stext;
+	_rambase = CONFIG_BOOT_LOAD;
 	_ramstart = (unsigned long)_end;
 
 	if (DMA_UNCACHED_REGION > (_ramend - _ramstart)) {
@@ -1261,8 +1271,8 @@
 	seq_printf(m, "board memory\t: %ld kB (0x%p -> 0x%p)\n",
 		 physical_mem_end >> 10, (void *)0, (void *)physical_mem_end);
 	seq_printf(m, "kernel memory\t: %d kB (0x%p -> 0x%p)\n",
-		((int)memory_end - (int)_stext) >> 10,
-		_stext,
+		((int)memory_end - (int)_rambase) >> 10,
+		(void *)_rambase,
 		(void *)memory_end);
 	seq_printf(m, "\n");
 
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index be4b1bb..984c781 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -15,7 +15,12 @@
 
 SECTIONS
 {
+#ifdef CONFIG_RAMKERNEL
 	. = CONFIG_BOOT_LOAD;
+#else
+	. = CONFIG_ROM_BASE;
+#endif
+
 	/* Neither the text, ro_data or bss section need to be aligned
 	 * So pack them back to back
 	 */
@@ -31,6 +36,12 @@
 		LOCK_TEXT
 		IRQENTRY_TEXT
 		KPROBES_TEXT
+#ifdef CONFIG_ROMKERNEL
+		__sinittext = .;
+		INIT_TEXT
+		__einittext = .;
+		EXIT_TEXT
+#endif
 		*(.text.*)
 		*(.fixup)
 
@@ -50,8 +61,14 @@
 
 	/* Just in case the first read only is a 32-bit access */
 	RO_DATA(4)
+	__rodata_end = .;
 
+#ifdef CONFIG_ROMKERNEL
+	. = CONFIG_BOOT_LOAD;
+	.bss : AT(__rodata_end)
+#else
 	.bss :
+#endif
 	{
 		. = ALIGN(4);
 		___bss_start = .;
@@ -67,7 +84,11 @@
 		___bss_stop = .;
 	}
 
+#if defined(CONFIG_ROMKERNEL)
+	.data : AT(LOADADDR(.bss) + SIZEOF(.bss))
+#else
 	.data :
+#endif
 	{
 		__sdata = .;
 		/* This gets done first, so the glob doesn't suck it in */
@@ -94,6 +115,8 @@
 
 		__edata = .;
 	}
+	__data_lma = LOADADDR(.data);
+	__data_len = SIZEOF(.data);
 
 	/* The init section should be last, so when we free it, it goes into
 	 * the general memory pool, and (hopefully) will decrease fragmentation
@@ -103,6 +126,7 @@
 	. = ALIGN(PAGE_SIZE);
 	___init_begin = .;
 
+#ifdef CONFIG_RAMKERNEL
 	INIT_TEXT_SECTION(PAGE_SIZE)
 
 	/* We have to discard exit text and such at runtime, not link time, to
@@ -125,6 +149,35 @@
 	}
 
 	.text_l1 L1_CODE_START : AT(LOADADDR(.exit.data) + SIZEOF(.exit.data))
+#else
+	.init.data : AT(__data_lma + __data_len)
+	{
+		__sinitdata = .;
+		INIT_DATA
+		INIT_SETUP(16)
+		INIT_CALLS
+		CON_INITCALL
+		SECURITY_INITCALL
+		INIT_RAM_FS
+
+		. = ALIGN(4);
+		___per_cpu_load = .;
+		___per_cpu_start = .;
+		*(.data.percpu.first)
+		*(.data.percpu.page_aligned)
+		*(.data.percpu)
+		*(.data.percpu.shared_aligned)
+		___per_cpu_end = .;
+
+		EXIT_DATA
+		__einitdata = .;
+	}
+	__init_data_lma = LOADADDR(.init.data);
+	__init_data_len = SIZEOF(.init.data);
+	__init_data_end = .;
+
+	.text_l1 L1_CODE_START : AT(__init_data_lma + __init_data_len)
+#endif
 	{
 		. = ALIGN(4);
 		__stext_l1 = .;
@@ -205,7 +258,11 @@
 	/* Force trailing alignment of our init section so that when we
 	 * free our init memory, we don't leave behind a partial page.
 	 */
+#ifdef CONFIG_RAMKERNEL
 	. = __l2_lma + __l2_len;
+#else
+	. = __init_data_end;
+#endif
 	. = ALIGN(PAGE_SIZE);
 	___init_end = .;
 
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 0df7ef2..ccfa7c4 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -405,7 +405,7 @@
 
 	r0 = sp;        /* stack frame pt_regs pointer argument ==> r0 */
 	SP += -12;
-	call _double_fault_c;
+	pseudo_long_call _double_fault_c, p5;
 	SP += 12;
 .L_double_fault_panic:
         JUMP .L_double_fault_panic
@@ -447,7 +447,7 @@
 
 	r0 = sp; 	/* stack frame pt_regs pointer argument ==> r0 */
 	SP += -12;
-	call _trap_c;
+	pseudo_long_call _trap_c, p4;
 	SP += 12;
 
 	/* If interrupts were off during the exception (IPEND[4] = 1), turn them off
@@ -551,7 +551,7 @@
 	p0 = sp;
 	sp += -16;
 	[sp + 12] = p0;
-	call _do_execve;
+	pseudo_long_call _do_execve, p5;
 	SP += 16;
 	cc = r0 == 0;
 	if ! cc jump .Lexecve_failed;
@@ -704,7 +704,7 @@
 	sp += 4;
 
 	SP += -12;
-	call _schedule;
+	pseudo_long_call _schedule, p4;
 	SP += 12;
 
 	jump .Lresume_userspace_1;
@@ -723,7 +723,7 @@
 
 	r0 = sp;
 	SP += -12;
-	call _do_notify_resume;
+	pseudo_long_call _do_notify_resume, p5;
 	SP += 12;
 
 .Lsyscall_really_exit:
@@ -736,7 +736,7 @@
  * this symbol need not be global anyways, so ...
  */
 _sys_trace:
-	call _syscall_trace;
+	pseudo_long_call _syscall_trace, p5;
 
 	/* Execute the appropriate system call */
 
@@ -760,7 +760,7 @@
 	SP += 24;
 	[sp + PT_R0] = r0;
 
-	call _syscall_trace;
+	pseudo_long_call _syscall_trace, p5;
 	jump .Lresume_userspace;
 ENDPROC(_sys_trace)
 
@@ -1007,7 +1007,8 @@
 
 	r0 = sp;
 	sp += -12;
-	call _finish_atomic_sections;
+
+	pseudo_long_call _finish_atomic_sections, p5;
 	sp += 12;
 	jump.s .Lresume_userspace;
 ENDPROC(_schedule_and_signal_from_int)
diff --git a/arch/blackfin/mach-common/head.S b/arch/blackfin/mach-common/head.S
index cf03778..4391621 100644
--- a/arch/blackfin/mach-common/head.S
+++ b/arch/blackfin/mach-common/head.S
@@ -186,6 +186,11 @@
 
 	/* Put The Code for PLL Programming and SDRAM Programming in L1 ISRAM */
 	call _bfin_relocate_l1_mem;
+
+#ifdef CONFIG_ROMKERNEL
+	call _bfin_relocate_xip_data;
+#endif
+
 #ifdef CONFIG_BFIN_KERNEL_CLOCK
 	/* Only use on-chip scratch space for stack when absolutely required
 	 * to avoid Anomaly 05000227 ... we know the init_clocks() func only
diff --git a/arch/blackfin/mach-common/interrupt.S b/arch/blackfin/mach-common/interrupt.S
index 8085ff1..df98496 100644
--- a/arch/blackfin/mach-common/interrupt.S
+++ b/arch/blackfin/mach-common/interrupt.S
@@ -109,10 +109,10 @@
 	cc = r0 == 0;
 	if cc jump .Lcommon_restore_context;
 #else /* CONFIG_IPIPE */
-	call _do_irq;
+	pseudo_long_call _do_irq, p2;
 	SP += 12;
 #endif /* CONFIG_IPIPE */
-	call _return_from_int;
+	pseudo_long_call _return_from_int, p2;
 .Lcommon_restore_context:
 	RESTORE_CONTEXT
 	rti;
@@ -168,7 +168,7 @@
 
 	r0 = sp;        /* stack frame pt_regs pointer argument ==> r0 */
 	SP += -12;
-	call _trap_c;
+	pseudo_long_call _trap_c, p5;
 	SP += 12;
 
 #ifdef EBIU_ERRMST
@@ -179,7 +179,7 @@
 	w[p0] = r0.l;
 #endif
 
-	call _ret_from_exception;
+	pseudo_long_call _ret_from_exception, p2;
 
 .Lcommon_restore_all_sys:
 	RESTORE_ALL_SYS
@@ -223,7 +223,7 @@
 #ifdef CONFIG_FRAME_POINTER
 	fp = 0;
 #endif
-	call _system_call;
+	pseudo_long_call _system_call, p2;
 	jump .Lcommon_restore_context;
 ENDPROC(_evt_system_call)