[ARM] 5227/1: Add the ENDPROC declarations to the .S files

This declaration specifies the "function" type and size for various
assembly functions, mainly needed for generating the correct branch
instructions in Thumb-2.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index d42f89b..1f1729b 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -421,6 +421,7 @@
 		add	r1, r1, #1048576
 		str	r1, [r0]
 		mov	pc, lr
+ENDPROC(__setup_mmu)
 
 __armv4_mmu_cache_on:
 		mov	r12, lr
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index 9550ff0..f53c582 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -89,10 +89,12 @@
 ENTRY(printhex8)
 		mov	r1, #8
 		b	printhex
+ENDPROC(printhex8)
 
 ENTRY(printhex4)
 		mov	r1, #4
 		b	printhex
+ENDPROC(printhex4)
 
 ENTRY(printhex2)
 		mov	r1, #2
@@ -110,6 +112,7 @@
 		bne	1b
 		mov	r0, r2
 		b	printascii
+ENDPROC(printhex2)
 
 		.ltorg
 
@@ -127,11 +130,13 @@
 		teqne	r1, #0
 		bne	1b
 		mov	pc, lr
+ENDPROC(printascii)
 
 ENTRY(printch)
 		addruart r3
 		mov	r1, r0
 		mov	r0, #0
 		b	1b
+ENDPROC(printch)
 
 hexbuf:		.space 16
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 617e509..77b0474 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -76,14 +76,17 @@
 __pabt_invalid:
 	inv_entry BAD_PREFETCH
 	b	common_invalid
+ENDPROC(__pabt_invalid)
 
 __dabt_invalid:
 	inv_entry BAD_DATA
 	b	common_invalid
+ENDPROC(__dabt_invalid)
 
 __irq_invalid:
 	inv_entry BAD_IRQ
 	b	common_invalid
+ENDPROC(__irq_invalid)
 
 __und_invalid:
 	inv_entry BAD_UNDEFINSTR
@@ -107,6 +110,7 @@
 
 	mov	r0, sp
 	b	bad_mode
+ENDPROC(__und_invalid)
 
 /*
  * SVC mode handlers
@@ -192,6 +196,7 @@
 	ldr	r0, [sp, #S_PSR]
 	msr	spsr_cxsf, r0
 	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+ENDPROC(__dabt_svc)
 
 	.align	5
 __irq_svc:
@@ -223,6 +228,7 @@
 	bleq	trace_hardirqs_on
 #endif
 	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+ENDPROC(__irq_svc)
 
 	.ltorg
 
@@ -272,6 +278,7 @@
 	ldr	lr, [sp, #S_PSR]		@ Get SVC cpsr
 	msr	spsr_cxsf, lr
 	ldmia	sp, {r0 - pc}^			@ Restore SVC registers
+ENDPROC(__und_svc)
 
 	.align	5
 __pabt_svc:
@@ -313,6 +320,7 @@
 	ldr	r0, [sp, #S_PSR]
 	msr	spsr_cxsf, r0
 	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+ENDPROC(__pabt_svc)
 
 	.align	5
 .LCcralign:
@@ -412,6 +420,7 @@
 	mov	r2, sp
 	adr	lr, ret_from_exception
 	b	do_DataAbort
+ENDPROC(__dabt_usr)
 
 	.align	5
 __irq_usr:
@@ -441,6 +450,7 @@
 
 	mov	why, #0
 	b	ret_to_user
+ENDPROC(__irq_usr)
 
 	.ltorg
 
@@ -474,6 +484,7 @@
 #else
 	b	__und_usr_unknown
 #endif
+ENDPROC(__und_usr)
 
 	@
 	@ fallthrough to call_fpe
@@ -642,6 +653,7 @@
 	mov	r0, sp
 	adr	lr, ret_from_exception
 	b	do_undefinstr
+ENDPROC(__und_usr_unknown)
 
 	.align	5
 __pabt_usr:
@@ -666,6 +678,8 @@
 	get_thread_info tsk
 	mov	why, #0
 	b	ret_to_user
+ENDPROC(__pabt_usr)
+ENDPROC(ret_from_exception)
 
 /*
  * Register switch for ARMv3 and ARMv4 processors
@@ -702,6 +716,7 @@
 	bl	atomic_notifier_call_chain
 	mov	r0, r5
 	ldmia	r4, {r4 - sl, fp, sp, pc}	@ Load all regs saved previously
+ENDPROC(__switch_to)
 
 	__INIT
 
@@ -1029,6 +1044,7 @@
 	mov	r0, sp
 	ldr	lr, [pc, lr, lsl #2]
 	movs	pc, lr			@ branch to handler in SVC mode
+ENDPROC(vector_\name)
 	.endm
 
 	.globl	__stubs_start
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 060d7e2..3aa14dc 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -77,6 +77,7 @@
 	mov	r0, r0
 	add	sp, sp, #S_FRAME_SIZE - S_PC
 	movs	pc, lr				@ return & move spsr_svc into cpsr
+ENDPROC(ret_to_user)
 
 /*
  * This is how we return from a fork.
@@ -92,7 +93,7 @@
 	mov	r0, #1				@ trace exit [IP = 1]
 	bl	syscall_trace
 	b	ret_slow_syscall
-	
+ENDPROC(ret_from_fork)
 
 	.equ NR_syscalls,0
 #define CALL(x) .equ NR_syscalls,NR_syscalls+1
@@ -269,6 +270,7 @@
 	eor	r0, scno, #__NR_SYSCALL_BASE	@ put OS number back
 	bcs	arm_syscall	
 	b	sys_ni_syscall			@ not private func
+ENDPROC(vector_swi)
 
 	/*
 	 * This is the really slow path.  We're going to be doing
@@ -326,7 +328,6 @@
  */
 @ r0 = syscall number
 @ r8 = syscall table
-		.type	sys_syscall, #function
 sys_syscall:
 		bic	scno, r0, #__NR_OABI_SYSCALL_BASE
 		cmp	scno, #__NR_syscall - __NR_SYSCALL_BASE
@@ -338,53 +339,65 @@
 		movlo	r3, r4
 		ldrlo	pc, [tbl, scno, lsl #2]
 		b	sys_ni_syscall
+ENDPROC(sys_syscall)
 
 sys_fork_wrapper:
 		add	r0, sp, #S_OFF
 		b	sys_fork
+ENDPROC(sys_fork_wrapper)
 
 sys_vfork_wrapper:
 		add	r0, sp, #S_OFF
 		b	sys_vfork
+ENDPROC(sys_vfork_wrapper)
 
 sys_execve_wrapper:
 		add	r3, sp, #S_OFF
 		b	sys_execve
+ENDPROC(sys_execve_wrapper)
 
 sys_clone_wrapper:
 		add	ip, sp, #S_OFF
 		str	ip, [sp, #4]
 		b	sys_clone
+ENDPROC(sys_clone_wrapper)
 
 sys_sigsuspend_wrapper:
 		add	r3, sp, #S_OFF
 		b	sys_sigsuspend
+ENDPROC(sys_sigsuspend_wrapper)
 
 sys_rt_sigsuspend_wrapper:
 		add	r2, sp, #S_OFF
 		b	sys_rt_sigsuspend
+ENDPROC(sys_rt_sigsuspend_wrapper)
 
 sys_sigreturn_wrapper:
 		add	r0, sp, #S_OFF
 		b	sys_sigreturn
+ENDPROC(sys_sigreturn_wrapper)
 
 sys_rt_sigreturn_wrapper:
 		add	r0, sp, #S_OFF
 		b	sys_rt_sigreturn
+ENDPROC(sys_rt_sigreturn_wrapper)
 
 sys_sigaltstack_wrapper:
 		ldr	r2, [sp, #S_OFF + S_SP]
 		b	do_sigaltstack
+ENDPROC(sys_sigaltstack_wrapper)
 
 sys_statfs64_wrapper:
 		teq	r1, #88
 		moveq	r1, #84
 		b	sys_statfs64
+ENDPROC(sys_statfs64_wrapper)
 
 sys_fstatfs64_wrapper:
 		teq	r1, #88
 		moveq	r1, #84
 		b	sys_fstatfs64
+ENDPROC(sys_fstatfs64_wrapper)
 
 /*
  * Note: off_4k (r5) is always units of 4K.  If we can't do the requested
@@ -402,11 +415,14 @@
 		str	r5, [sp, #4]
 		b	do_mmap2
 #endif
+ENDPROC(sys_mmap2)
 
 ENTRY(pabort_ifar)
 		mrc	p15, 0, r0, cr6, cr0, 2
 ENTRY(pabort_noifar)
 		mov	pc, lr
+ENDPROC(pabort_ifar)
+ENDPROC(pabort_noifar)
 
 #ifdef CONFIG_OABI_COMPAT
 
@@ -417,26 +433,31 @@
 sys_oabi_pread64:
 		stmia	sp, {r3, r4}
 		b	sys_pread64
+ENDPROC(sys_oabi_pread64)
 
 sys_oabi_pwrite64:
 		stmia	sp, {r3, r4}
 		b	sys_pwrite64
+ENDPROC(sys_oabi_pwrite64)
 
 sys_oabi_truncate64:
 		mov	r3, r2
 		mov	r2, r1
 		b	sys_truncate64
+ENDPROC(sys_oabi_truncate64)
 
 sys_oabi_ftruncate64:
 		mov	r3, r2
 		mov	r2, r1
 		b	sys_ftruncate64
+ENDPROC(sys_oabi_ftruncate64)
 
 sys_oabi_readahead:
 		str	r3, [sp]
 		mov	r3, r2
 		mov	r2, r1
 		b	sys_readahead
+ENDPROC(sys_oabi_readahead)
 
 /*
  * Let's declare a second syscall table for old ABI binaries
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 1c3c6ea..bde52df 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -36,7 +36,6 @@
  *  r2  = atags pointer
  *  r9  = processor ID
  */
-	.type	__mmap_switched, %function
 __mmap_switched:
 	adr	r3, __switch_data + 4
 
@@ -59,6 +58,7 @@
 	bic	r4, r0, #CR_A			@ Clear 'A' bit
 	stmia	r7, {r0, r4}			@ Save control register values
 	b	start_kernel
+ENDPROC(__mmap_switched)
 
 /*
  * Exception handling.  Something went wrong and we can't proceed.  We
@@ -69,8 +69,6 @@
  * and hope for the best (useful if bootloader fails to pass a proper
  * machine ID for example).
  */
-
-	.type	__error_p, %function
 __error_p:
 #ifdef CONFIG_DEBUG_LL
 	adr	r0, str_p1
@@ -84,8 +82,8 @@
 str_p2:	.asciz	").\n"
 	.align
 #endif
+ENDPROC(__error_p)
 
-	.type	__error_a, %function
 __error_a:
 #ifdef CONFIG_DEBUG_LL
 	mov	r4, r1				@ preserve machine ID
@@ -115,13 +113,14 @@
 	adr	r0, str_a3
 	bl	printascii
 	b	__error
+ENDPROC(__error_a)
+
 str_a1:	.asciz	"\nError: unrecognized/unsupported machine ID (r1 = 0x"
 str_a2:	.asciz	").\n\nAvailable machine support:\n\nID (hex)\tNAME\n"
 str_a3:	.asciz	"\nPlease check your kernel config and/or bootloader.\n"
 	.align
 #endif
 
-	.type	__error, %function
 __error:
 #ifdef CONFIG_ARCH_RPC
 /*
@@ -138,6 +137,7 @@
 #endif
 1:	mov	r0, r0
 	b	1b
+ENDPROC(__error)
 
 
 /*
@@ -153,7 +153,6 @@
  *	r5 = proc_info pointer in physical address space
  *	r9 = cpuid (preserved)
  */
-	.type	__lookup_processor_type, %function
 __lookup_processor_type:
 	adr	r3, 3f
 	ldmda	r3, {r5 - r7}
@@ -169,6 +168,7 @@
 	blo	1b
 	mov	r5, #0				@ unknown processor
 2:	mov	pc, lr
+ENDPROC(__lookup_processor_type)
 
 /*
  * This provides a C-API version of the above function.
@@ -179,6 +179,7 @@
 	bl	__lookup_processor_type
 	mov	r0, r5
 	ldmfd	sp!, {r4 - r7, r9, pc}
+ENDPROC(lookup_processor_type)
 
 /*
  * Look in <asm/procinfo.h> and arch/arm/kernel/arch.[ch] for
@@ -201,7 +202,6 @@
  *  r3, r4, r6 corrupted
  *  r5 = mach_info pointer in physical address space
  */
-	.type	__lookup_machine_type, %function
 __lookup_machine_type:
 	adr	r3, 3b
 	ldmia	r3, {r4, r5, r6}
@@ -216,6 +216,7 @@
 	blo	1b
 	mov	r5, #0				@ unknown machine
 2:	mov	pc, lr
+ENDPROC(__lookup_machine_type)
 
 /*
  * This provides a C-API version of the above function.
@@ -226,6 +227,7 @@
 	bl	__lookup_machine_type
 	mov	r0, r5
 	ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(lookup_machine_type)
 
 /* Determine validity of the r2 atags pointer.  The heuristic requires
  * that the pointer be aligned, in the first 16k of physical RAM and
@@ -239,8 +241,6 @@
  *  r2 either valid atags pointer, or zero
  *  r5, r6 corrupted
  */
-
-	.type	__vet_atags, %function
 __vet_atags:
 	tst	r2, #0x3			@ aligned?
 	bne	1f
@@ -257,3 +257,4 @@
 
 1:	mov	r2, #0
 	mov	pc, lr
+ENDPROC(__vet_atags)
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 27329bd..cc87e17 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -33,7 +33,6 @@
  *
  */
 	.section ".text.head", "ax"
-	.type	stext, %function
 ENTRY(stext)
 	msr	cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE @ ensure svc mode
 						@ and irqs disabled
@@ -53,11 +52,11 @@
 						@ the initialization is done
 	adr	lr, __after_proc_init		@ return (PIC) address
 	add	pc, r10, #PROCINFO_INITFUNC
+ENDPROC(stext)
 
 /*
  * Set the Control Register and Read the process ID.
  */
-	.type	__after_proc_init, %function
 __after_proc_init:
 #ifdef CONFIG_CPU_CP15
 	mrc	p15, 0, r0, c1, c0, 0		@ read control reg
@@ -85,6 +84,7 @@
 
 	mov	pc, r13				@ clear the BSS and jump
 						@ to start_kernel
+ENDPROC(__after_proc_init)
 	.ltorg
 
 #include "head-common.S"
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index bff4c6e..21e17dc 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -75,7 +75,6 @@
  * circumstances, zImage) is for.
  */
 	.section ".text.head", "ax"
-	.type	stext, %function
 ENTRY(stext)
 	msr	cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE @ ensure svc mode
 						@ and irqs disabled
@@ -100,9 +99,9 @@
 						@ mmu has been enabled
 	adr	lr, __enable_mmu		@ return (PIC) address
 	add	pc, r10, #PROCINFO_INITFUNC
+ENDPROC(stext)
 
 #if defined(CONFIG_SMP)
-	.type   secondary_startup, #function
 ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
@@ -128,6 +127,7 @@
 	adr	lr, __enable_mmu		@ return address
 	add	pc, r10, #PROCINFO_INITFUNC	@ initialise processor
 						@ (return control reg)
+ENDPROC(secondary_startup)
 
 	/*
 	 * r6  = &secondary_data
@@ -136,6 +136,7 @@
 	ldr	sp, [r7, #4]			@ get secondary_data.stack
 	mov	fp, #0
 	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
 
 	.type	__secondary_data, %object
 __secondary_data:
@@ -151,7 +152,6 @@
  * this is just loading the page table pointer and domain access
  * registers.
  */
-	.type	__enable_mmu, %function
 __enable_mmu:
 #ifdef CONFIG_ALIGNMENT_TRAP
 	orr	r0, r0, #CR_A
@@ -174,6 +174,7 @@
 	mcr	p15, 0, r5, c3, c0, 0		@ load domain access register
 	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
 	b	__turn_mmu_on
+ENDPROC(__enable_mmu)
 
 /*
  * Enable the MMU.  This completely changes the structure of the visible
@@ -187,7 +188,6 @@
  * other registers depend on the function called upon completion
  */
 	.align	5
-	.type	__turn_mmu_on, %function
 __turn_mmu_on:
 	mov	r0, r0
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
@@ -195,7 +195,7 @@
 	mov	r3, r3
 	mov	r3, r3
 	mov	pc, r13
-
+ENDPROC(__turn_mmu_on)
 
 
 /*
@@ -211,7 +211,6 @@
  *  r0, r3, r6, r7 corrupted
  *  r4 = physical page table address
  */
-	.type	__create_page_tables, %function
 __create_page_tables:
 	pgtbl	r4				@ page table address
 
@@ -325,6 +324,7 @@
 #endif
 #endif
 	mov	pc, lr
+ENDPROC(__create_page_tables)
 	.ltorg
 
 #include "head-common.S"
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
index 55e57a1..1154d92 100644
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -47,3 +47,5 @@
 	mov	al, al, lsl r2
 	mov	pc, lr
 
+ENDPROC(__ashldi3)
+ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
index 0b31398..9f8b355 100644
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -47,3 +47,5 @@
 	mov	ah, ah, asr r2
 	mov	pc, lr
 
+ENDPROC(__ashrdi3)
+ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index 84dc890..b0951d0 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -30,6 +30,8 @@
 
 #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
 		mov	pc, lr
+ENDPROC(__backtrace)
+ENDPROC(c_backtrace)
 #else
 		stmfd	sp!, {r4 - r8, lr}	@ Save an extra register so we have a location...
 		movs	frame, r0		@ if frame pointer is zero
@@ -103,6 +105,8 @@
 		mov	r1, frame
 		bl	printk
 no_frame:	ldmfd	sp!, {r4 - r8, pc}
+ENDPROC(__backtrace)
+ENDPROC(c_backtrace)
 		
 		.section __ex_table,"a"
 		.align	3
diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S
index 389567c..80f3115 100644
--- a/arch/arm/lib/changebit.S
+++ b/arch/arm/lib/changebit.S
@@ -19,3 +19,5 @@
 		eor	r0, r0, #0x18		@ big endian byte ordering
 ENTRY(_change_bit_le)
 	bitop	eor
+ENDPROC(_change_bit_be)
+ENDPROC(_change_bit_le)
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index ecb28dc..81041a3 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -44,6 +44,7 @@
 USER(		strnebt	r2, [r0], #1)
 		mov	r0, #0
 		ldmfd	sp!, {r1, pc}
+ENDPROC(__clear_user)
 
 		.section .fixup,"ax"
 		.align	0
diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S
index 3475165..1a63e43 100644
--- a/arch/arm/lib/clearbit.S
+++ b/arch/arm/lib/clearbit.S
@@ -20,3 +20,5 @@
 		eor	r0, r0, #0x18		@ big endian byte ordering
 ENTRY(_clear_bit_le)
 	bitop	bic
+ENDPROC(_clear_bit_be)
+ENDPROC(_clear_bit_le)
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 6b7363c..56799a1 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -87,6 +87,8 @@
 
 #include "copy_template.S"
 
+ENDPROC(__copy_from_user)
+
 	.section .fixup,"ax"
 	.align 0
 	copy_abort_preamble
diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
index 666c99c..6ae04db 100644
--- a/arch/arm/lib/copy_page.S
+++ b/arch/arm/lib/copy_page.S
@@ -44,3 +44,4 @@
 	PLD(	ldmeqia r1!, {r3, r4, ip, lr}	)
 	PLD(	beq	2b			)
 		ldmfd	sp!, {r4, pc}			@	3
+ENDPROC(copy_page)
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index 5224d94..22f968b 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -90,6 +90,8 @@
 
 #include "copy_template.S"
 
+ENDPROC(__copy_to_user)
+
 	.section .fixup,"ax"
 	.align 0
 	copy_abort_preamble
diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S
index 9621469..3ac6ef0 100644
--- a/arch/arm/lib/csumipv6.S
+++ b/arch/arm/lib/csumipv6.S
@@ -29,4 +29,5 @@
 		adcs	r0, r0, r2
 		adcs	r0, r0, #0
 		ldmfd	sp!, {pc}
+ENDPROC(__csum_ipv6_magic)
 
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
index a78dae5..31d3cb3 100644
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -139,3 +139,4 @@
 		tst	len, #0x1c
 		bne	4b
 		b	.Lless4
+ENDPROC(csum_partial)
diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S
index 21effe0..80aa2c7 100644
--- a/arch/arm/lib/csumpartialcopy.S
+++ b/arch/arm/lib/csumpartialcopy.S
@@ -50,5 +50,6 @@
 		.endm
 
 #define FN_ENTRY	ENTRY(csum_partial_copy_nocheck)
+#define FN_EXIT		ENDPROC(csum_partial_copy_nocheck)
 
 #include "csumpartialcopygeneric.S"
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index c50e8f5..d620a5f 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -329,3 +329,4 @@
 		adcs	sum, sum, r4, push #24
 		mov	r5, r4, get_byte_1
 		b	.Lexit
+FN_EXIT
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index c3b93e2..e8b9c24 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -82,6 +82,7 @@
  */
 
 #define FN_ENTRY	ENTRY(csum_partial_copy_from_user)
+#define FN_EXIT		ENDPROC(csum_partial_copy_from_user)
 
 #include "csumpartialcopygeneric.S"
 
diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S
index 930a702..8d6a876 100644
--- a/arch/arm/lib/delay.S
+++ b/arch/arm/lib/delay.S
@@ -60,3 +60,6 @@
 #endif
 		bhi	__delay
 		mov	pc, lr
+ENDPROC(__udelay)
+ENDPROC(__const_udelay)
+ENDPROC(__delay)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index 58eef66..1425e78 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -198,3 +198,4 @@
 	mov	xh, #0
 	ldr	pc, [sp], #8
 
+ENDPROC(__do_div64)
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index a5ca024..8c4defc 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -33,6 +33,7 @@
 		blo	1b
 3:		mov	r0, r1			@ no free bits
 		mov	pc, lr
+ENDPROC(_find_first_zero_bit_le)
 
 /*
  * Purpose  : Find next 'zero' bit
@@ -50,6 +51,7 @@
 		orr	r2, r2, #7		@ if zero, then no bits here
 		add	r2, r2, #1		@ align bit pointer
 		b	2b			@ loop for next bit
+ENDPROC(_find_next_zero_bit_le)
 
 /*
  * Purpose  : Find a 'one' bit
@@ -67,6 +69,7 @@
 		blo	1b
 3:		mov	r0, r1			@ no free bits
 		mov	pc, lr
+ENDPROC(_find_first_bit_le)
 
 /*
  * Purpose  : Find next 'one' bit
@@ -83,6 +86,7 @@
 		orr	r2, r2, #7		@ if zero, then no bits here
 		add	r2, r2, #1		@ align bit pointer
 		b	2b			@ loop for next bit
+ENDPROC(_find_next_bit_le)
 
 #ifdef __ARMEB__
 
@@ -99,6 +103,7 @@
 		blo	1b
 3:		mov	r0, r1			@ no free bits
 		mov	pc, lr
+ENDPROC(_find_first_zero_bit_be)
 
 ENTRY(_find_next_zero_bit_be)
 		teq	r1, #0
@@ -113,6 +118,7 @@
 		orr	r2, r2, #7		@ if zero, then no bits here
 		add	r2, r2, #1		@ align bit pointer
 		b	2b			@ loop for next bit
+ENDPROC(_find_next_zero_bit_be)
 
 ENTRY(_find_first_bit_be)
 		teq	r1, #0
@@ -127,6 +133,7 @@
 		blo	1b
 3:		mov	r0, r1			@ no free bits
 		mov	pc, lr
+ENDPROC(_find_first_bit_be)
 
 ENTRY(_find_next_bit_be)
 		teq	r1, #0
@@ -140,6 +147,7 @@
 		orr	r2, r2, #7		@ if zero, then no bits here
 		add	r2, r2, #1		@ align bit pointer
 		b	2b			@ loop for next bit
+ENDPROC(_find_next_bit_be)
 
 #endif
 
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 2034d4d..6763088 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -26,16 +26,16 @@
  * Note that ADDR_LIMIT is either 0 or 0xc0000000.
  * Note also that it is intended that __get_user_bad is not global.
  */
+#include <linux/linkage.h>
 #include <asm/errno.h>
 
-	.global	__get_user_1
-__get_user_1:
+ENTRY(__get_user_1)
 1:	ldrbt	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
+ENDPROC(__get_user_1)
 
-	.global	__get_user_2
-__get_user_2:
+ENTRY(__get_user_2)
 2:	ldrbt	r2, [r0], #1
 3:	ldrbt	r3, [r0]
 #ifndef __ARMEB__
@@ -45,17 +45,19 @@
 #endif
 	mov	r0, #0
 	mov	pc, lr
+ENDPROC(__get_user_2)
 
-	.global	__get_user_4
-__get_user_4:
+ENTRY(__get_user_4)
 4:	ldrt	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
+ENDPROC(__get_user_4)
 
 __get_user_bad:
 	mov	r2, #0
 	mov	r0, #-EFAULT
 	mov	pc, lr
+ENDPROC(__get_user_bad)
 
 .section __ex_table, "a"
 	.long	1b, __get_user_bad
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
index fb966ad..9f42389 100644
--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -120,3 +120,4 @@
 		strgtb	r3, [r1]
 
 		ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(__raw_readsb)
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 75a9121..5fb97e7 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -76,3 +76,4 @@
 8:		mov	r3, ip, get_byte_0
 		strb	r3, [r1, #0]
 		mov	pc, lr
+ENDPROC(__raw_readsl)
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
index 4db1c5f..1f393d4 100644
--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -128,3 +128,4 @@
    _BE_ONLY_(	movne	ip, ip, lsr #24		)
 		strneb	ip, [r1]
 		ldmfd	sp!, {r4, pc}
+ENDPROC(__raw_readsw)
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
index 7eba2b6..68b92f4 100644
--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -91,3 +91,4 @@
 		strgtb	r3, [r0]
 
 		ldmfd	sp!, {r4, r5, pc}
+ENDPROC(__raw_writesb)
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index f8f14dd..8d3b781 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -64,3 +64,4 @@
 		str	ip, [r0]
 		bne	6b
 		mov	pc, lr
+ENDPROC(__raw_writesl)
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
index c8e85bd..d658561 100644
--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -94,3 +94,4 @@
 3:		movne	ip, r3, lsr #8
 		strneh	ip, [r0]
 		mov	pc, lr
+ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index 4e492f4..67964bc 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -230,6 +230,8 @@
 	mov	r0, r0, lsr r2
 	mov	pc, lr
 
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
 
 ENTRY(__umodsi3)
 
@@ -245,6 +247,7 @@
 
 	mov	pc, lr
 
+ENDPROC(__umodsi3)
 
 ENTRY(__divsi3)
 ENTRY(__aeabi_idiv)
@@ -284,6 +287,8 @@
 	rsbmi	r0, r0, #0
 	mov	pc, lr
 
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
 
 ENTRY(__modsi3)
 
@@ -305,6 +310,8 @@
 	rsbmi	r0, r0, #0
 	mov	pc, lr
 
+ENDPROC(__modsi3)
+
 #ifdef CONFIG_AEABI
 
 ENTRY(__aeabi_uidivmod)
@@ -316,6 +323,8 @@
 	sub	r1, r1, r3
 	mov	pc, lr
 
+ENDPROC(__aeabi_uidivmod)
+
 ENTRY(__aeabi_idivmod)
 
 	stmfd	sp!, {r0, r1, ip, lr}
@@ -325,6 +334,8 @@
 	sub	r1, r1, r3
 	mov	pc, lr
 
+ENDPROC(__aeabi_idivmod)
+
 #endif
 
 Ldiv0:
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
index a86dbdd..99ea338 100644
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -47,3 +47,5 @@
 	mov	ah, ah, lsr r2
 	mov	pc, lr
 
+ENDPROC(__lshrdi3)
+ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
index e7ab1ea..1da8699 100644
--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -23,3 +23,4 @@
 	sub	r0, r0, #1
 2:	movne	r0, #0
 	mov	pc, lr
+ENDPROC(memchr)
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index 7e71d67..e0d0026 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -57,3 +57,4 @@
 
 #include "copy_template.S"
 
+ENDPROC(memcpy)
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 2e301b7..1254918 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -196,3 +196,4 @@
 
 18:		backward_copy_shift	push=24	pull=8
 
+ENDPROC(memmove)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index b477d4a..761eefa 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -124,3 +124,4 @@
 	tst	r2, #1
 	strneb	r1, [r0], #1
 	mov	pc, lr
+ENDPROC(memset)
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index b8f79d8..3fbdef5 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -122,3 +122,4 @@
 	tst	r1, #1			@ 1 a byte left over
 	strneb	r2, [r0], #1		@ 1
 	mov	pc, lr			@ 1
+ENDPROC(__memzero)
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
index d89c606..36c91b4 100644
--- a/arch/arm/lib/muldi3.S
+++ b/arch/arm/lib/muldi3.S
@@ -43,3 +43,5 @@
 	adc	xh, xh, ip, lsr #16
 	mov	pc, lr
 
+ENDPROC(__muldi3)
+ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 08ec7df..864f3c1 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -26,16 +26,16 @@
  * Note that ADDR_LIMIT is either 0 or 0xc0000000
  * Note also that it is intended that __put_user_bad is not global.
  */
+#include <linux/linkage.h>
 #include <asm/errno.h>
 
-	.global	__put_user_1
-__put_user_1:
+ENTRY(__put_user_1)
 1:	strbt	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
+ENDPROC(__put_user_1)
 
-	.global	__put_user_2
-__put_user_2:
+ENTRY(__put_user_2)
 	mov	ip, r2, lsr #8
 #ifndef __ARMEB__
 2:	strbt	r2, [r0], #1
@@ -46,23 +46,25 @@
 #endif
 	mov	r0, #0
 	mov	pc, lr
+ENDPROC(__put_user_2)
 
-	.global	__put_user_4
-__put_user_4:
+ENTRY(__put_user_4)
 4:	strt	r2, [r0]
 	mov	r0, #0
 	mov	pc, lr
+ENDPROC(__put_user_4)
 
-	.global	__put_user_8
-__put_user_8:
+ENTRY(__put_user_8)
 5:	strt	r2, [r0], #4
 6:	strt	r3, [r0]
 	mov	r0, #0
 	mov	pc, lr
+ENDPROC(__put_user_8)
 
 __put_user_bad:
 	mov	r0, #-EFAULT
 	mov	pc, lr
+ENDPROC(__put_user_bad)
 
 .section __ex_table, "a"
 	.long	1b, __put_user_bad
diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S
index 83bc23d..1dd7176 100644
--- a/arch/arm/lib/setbit.S
+++ b/arch/arm/lib/setbit.S
@@ -20,3 +20,5 @@
 		eor	r0, r0, #0x18		@ big endian byte ordering
 ENTRY(_set_bit_le)
 	bitop	orr
+ENDPROC(_set_bit_be)
+ENDPROC(_set_bit_le)
diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S
index ff6ece4..8a1c67f 100644
--- a/arch/arm/lib/sha1.S
+++ b/arch/arm/lib/sha1.S
@@ -185,6 +185,8 @@
 
 	ldmfd	sp!, {r4 - r8, pc}
 
+ENDPROC(sha_transform)
+
 .L_sha_K:
 	.word	0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
 
@@ -204,3 +206,4 @@
 	stmia	r0, {r1, r2, r3, ip, lr}
 	ldr	pc, [sp], #4
 
+ENDPROC(sha_init)
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
index 9f18d6f..d8f2a1c 100644
--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -24,3 +24,4 @@
 		movne	r0, #0
 		subeq	r0, r0, #1
 		mov	pc, lr
+ENDPROC(strchr)
diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S
index 36e3741..330373c 100644
--- a/arch/arm/lib/strncpy_from_user.S
+++ b/arch/arm/lib/strncpy_from_user.S
@@ -31,6 +31,7 @@
 	sub	r1, r1, #1	@ take NUL character out of count
 2:	sub	r0, r1, ip
 	mov	pc, lr
+ENDPROC(__strncpy_from_user)
 
 	.section .fixup,"ax"
 	.align	0
diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S
index 18d8fa4..90bb9d0 100644
--- a/arch/arm/lib/strnlen_user.S
+++ b/arch/arm/lib/strnlen_user.S
@@ -31,6 +31,7 @@
 	add	r0, r0, #1
 2:	sub	r0, r0, r2
 	mov	pc, lr
+ENDPROC(__strnlen_user)
 
 	.section .fixup,"ax"
 	.align	0
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
index 538df22..302f20c 100644
--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -23,3 +23,4 @@
 		bne	1b
 		mov	r0, r3
 		mov	pc, lr
+ENDPROC(strrchr)
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
index b25dcd2..5c98dc5 100644
--- a/arch/arm/lib/testchangebit.S
+++ b/arch/arm/lib/testchangebit.S
@@ -16,3 +16,5 @@
 		eor	r0, r0, #0x18		@ big endian byte ordering
 ENTRY(_test_and_change_bit_le)
 	testop	eor, strb
+ENDPROC(_test_and_change_bit_be)
+ENDPROC(_test_and_change_bit_le)
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
index 2dcc4b1..543d709 100644
--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -16,3 +16,5 @@
 		eor	r0, r0, #0x18		@ big endian byte ordering
 ENTRY(_test_and_clear_bit_le)
 	testop	bicne, strneb
+ENDPROC(_test_and_clear_bit_be)
+ENDPROC(_test_and_clear_bit_le)
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
index 9011c96..0b3f390 100644
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -16,3 +16,5 @@
 		eor	r0, r0, #0x18		@ big endian byte ordering
 ENTRY(_test_and_set_bit_le)
 	testop	orreq, streqb
+ENDPROC(_test_and_set_bit_be)
+ENDPROC(_test_and_set_bit_le)
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index b48bd6d..ffdd274 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -277,6 +277,7 @@
 		ldrgtb	r3, [r1], #0
 USER(		strgtbt	r3, [r0], #1)			@ May fault
 		b	.Lc2u_finished
+ENDPROC(__copy_to_user)
 
 		.section .fixup,"ax"
 		.align	0
@@ -542,6 +543,7 @@
 USER(		ldrgtbt	r3, [r1], #1)			@ May fault
 		strgtb	r3, [r0], #1
 		b	.Lcfu_finished
+ENDPROC(__copy_from_user)
 
 		.section .fixup,"ax"
 		.align	0
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
index f76de07..f0df6a91 100644
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -33,6 +33,8 @@
 	movhi	r0, #2
 	mov	pc, lr
 
+ENDPROC(__ucmpdi2)
+
 #ifdef CONFIG_AEABI
 
 ENTRY(__aeabi_ulcmp)
@@ -44,5 +46,7 @@
 	movhi	r0, #1
 	mov	pc, lr
 
+ENDPROC(__aeabi_ulcmp)
+
 #endif
 
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
index eb90bce..2e6dc04 100644
--- a/arch/arm/mm/abort-ev7.S
+++ b/arch/arm/mm/abort-ev7.S
@@ -30,3 +30,4 @@
 	 * New designs should not need to patch up faults.
 	 */
 	mov	pc, lr
+ENDPROC(v7_early_abort)
diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S
index a7cc7f9..625e580 100644
--- a/arch/arm/mm/abort-nommu.S
+++ b/arch/arm/mm/abort-nommu.S
@@ -17,3 +17,4 @@
 	mov	r0, #0				@ clear r0, r1 (no FSR/FAR)
 	mov	r1, #0
 	mov	pc, lr
+ENDPROC(nommu_early_abort)
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 35ffc4d..d19c2be 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -66,6 +66,7 @@
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
 	isb
 	mov	pc, lr
+ENDPROC(v7_flush_dcache_all)
 
 /*
  *	v7_flush_cache_all()
@@ -85,6 +86,7 @@
 	mcr	p15, 0, r0, c7, c5, 0		@ I+BTB cache invalidate
 	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}
 	mov	pc, lr
+ENDPROC(v7_flush_kern_cache_all)
 
 /*
  *	v7_flush_cache_all()
@@ -110,6 +112,8 @@
  */
 ENTRY(v7_flush_user_cache_range)
 	mov	pc, lr
+ENDPROC(v7_flush_user_cache_all)
+ENDPROC(v7_flush_user_cache_range)
 
 /*
  *	v7_coherent_kern_range(start,end)
@@ -155,6 +159,8 @@
 	dsb
 	isb
 	mov	pc, lr
+ENDPROC(v7_coherent_kern_range)
+ENDPROC(v7_coherent_user_range)
 
 /*
  *	v7_flush_kern_dcache_page(kaddr)
@@ -174,6 +180,7 @@
 	blo	1b
 	dsb
 	mov	pc, lr
+ENDPROC(v7_flush_kern_dcache_page)
 
 /*
  *	v7_dma_inv_range(start,end)
@@ -202,6 +209,7 @@
 	blo	1b
 	dsb
 	mov	pc, lr
+ENDPROC(v7_dma_inv_range)
 
 /*
  *	v7_dma_clean_range(start,end)
@@ -219,6 +227,7 @@
 	blo	1b
 	dsb
 	mov	pc, lr
+ENDPROC(v7_dma_clean_range)
 
 /*
  *	v7_dma_flush_range(start,end)
@@ -236,6 +245,7 @@
 	blo	1b
 	dsb
 	mov	pc, lr
+ENDPROC(v7_dma_flush_range)
 
 	__INITDATA
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b49f9a4..dff9677 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -25,9 +25,11 @@
 
 ENTRY(cpu_v7_proc_init)
 	mov	pc, lr
+ENDPROC(cpu_v7_proc_init)
 
 ENTRY(cpu_v7_proc_fin)
 	mov	pc, lr
+ENDPROC(cpu_v7_proc_fin)
 
 /*
  *	cpu_v7_reset(loc)
@@ -43,6 +45,7 @@
 	.align	5
 ENTRY(cpu_v7_reset)
 	mov	pc, r0
+ENDPROC(cpu_v7_reset)
 
 /*
  *	cpu_v7_do_idle()
@@ -54,6 +57,7 @@
 ENTRY(cpu_v7_do_idle)
 	.long	0xe320f003			@ ARM V7 WFI instruction
 	mov	pc, lr
+ENDPROC(cpu_v7_do_idle)
 
 ENTRY(cpu_v7_dcache_clean_area)
 #ifndef TLB_CAN_READ_FROM_L1_CACHE
@@ -65,6 +69,7 @@
 	dsb
 #endif
 	mov	pc, lr
+ENDPROC(cpu_v7_dcache_clean_area)
 
 /*
  *	cpu_v7_switch_mm(pgd_phys, tsk)
@@ -89,6 +94,7 @@
 	isb
 #endif
 	mov	pc, lr
+ENDPROC(cpu_v7_switch_mm)
 
 /*
  *	cpu_v7_set_pte_ext(ptep, pte)
@@ -141,6 +147,7 @@
 	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
 #endif
 	mov	pc, lr
+ENDPROC(cpu_v7_set_pte_ext)
 
 cpu_v7_name:
 	.ascii	"ARMv7 Processor"
@@ -188,6 +195,7 @@
 	bic	r0, r0, r5			@ clear bits them
 	orr	r0, r0, r6			@ set them
 	mov	pc, lr				@ return to head.S:__ret
+ENDPROC(__v7_setup)
 
 	/*
 	 *         V X F   I D LR
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index b56dda8..24ba510 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -51,6 +51,7 @@
 	mcr	p15, 0, ip, c7, c5, 6		@ flush BTAC/BTB
 	dsb
 	mov	pc, lr
+ENDPROC(v7wbi_flush_user_tlb_range)
 
 /*
  *	v7wbi_flush_kern_tlb_range(start,end)
@@ -77,6 +78,7 @@
 	dsb
 	isb
 	mov	pc, lr
+ENDPROC(v7wbi_flush_kern_tlb_range)
 
 	.section ".text.init", #alloc, #execinstr
 
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index 806ce26..ba592a9 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -21,13 +21,13 @@
 #include <asm/assembler.h>
 #include <asm/vfpmacros.h>
 
-	.globl	do_vfp
-do_vfp:
+ENTRY(do_vfp)
 	enable_irq
  	ldr	r4, .LCvfp
 	ldr	r11, [r10, #TI_CPU]	@ CPU number
 	add	r10, r10, #TI_VFPSTATE	@ r10 = workspace
 	ldr	pc, [r4]		@ call VFP entry point
+ENDPROC(do_vfp)
 
 ENTRY(vfp_null_entry)
 	mov	pc, lr
@@ -40,11 +40,11 @@
 @ failure to the VFP initialisation code.
 
 	__INIT
-	.globl	vfp_testing_entry
-vfp_testing_entry:
+ENTRY(vfp_testing_entry)
 	ldr	r0, VFP_arch_address
 	str	r5, [r0]		@ known non-zero value
 	mov	pc, r9			@ we have handled the fault
+ENDPROC(vfp_testing_entry)
 
 VFP_arch_address:
 	.word	VFP_arch
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 353f9e5..a62dcf7 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -68,8 +68,7 @@
 @  r11 = CPU number
 @  lr  = failure return
 
-	.globl	vfp_support_entry
-vfp_support_entry:
+ENTRY(vfp_support_entry)
 	DBGSTR3	"instr %08x pc %08x state %p", r0, r2, r10
 
 	VFPFMRX	r1, FPEXC		@ Is the VFP enabled?
@@ -165,11 +164,10 @@
 					@ code will raise an exception if
 					@ required. If not, the user code will
 					@ retry the faulted instruction
+ENDPROC(vfp_support_entry)
 
 #ifdef CONFIG_SMP
-	.globl	vfp_save_state
-	.type	vfp_save_state, %function
-vfp_save_state:
+ENTRY(vfp_save_state)
 	@ Save the current VFP state
 	@ r0 - save location
 	@ r1 - FPEXC
@@ -182,13 +180,13 @@
 	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed (and present)
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
 	mov	pc, lr
+ENDPROC(vfp_save_state)
 #endif
 
 last_VFP_context_address:
 	.word	last_VFP_context
 
-	.globl	vfp_get_float
-vfp_get_float:
+ENTRY(vfp_get_float)
 	add	pc, pc, r0, lsl #3
 	mov	r0, r0
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
@@ -197,9 +195,9 @@
 	mrc	p10, 0, r0, c\dr, c0, 4	@ fmrs	r0, s1
 	mov	pc, lr
 	.endr
+ENDPROC(vfp_get_float)
 
-	.globl	vfp_put_float
-vfp_put_float:
+ENTRY(vfp_put_float)
 	add	pc, pc, r1, lsl #3
 	mov	r0, r0
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
@@ -208,9 +206,9 @@
 	mcr	p10, 0, r0, c\dr, c0, 4	@ fmsr	r0, s1
 	mov	pc, lr
 	.endr
+ENDPROC(vfp_put_float)
 
-	.globl	vfp_get_double
-vfp_get_double:
+ENTRY(vfp_get_double)
 	add	pc, pc, r0, lsl #3
 	mov	r0, r0
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
@@ -229,9 +227,9 @@
 	mov	r0, #0
 	mov	r1, #0
 	mov	pc, lr
+ENDPROC(vfp_get_double)
 
-	.globl	vfp_put_double
-vfp_put_double:
+ENTRY(vfp_put_double)
 	add	pc, pc, r2, lsl #3
 	mov	r0, r0
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
@@ -245,3 +243,4 @@
 	mov	pc, lr
 	.endr
 #endif
+ENDPROC(vfp_put_double)