[MIPS] Eleminate local symbols from the symbol table.

These symbols appear in oprofile output, stacktraces and similar but only
make the output harder to read.  Many identical symbol names such as
"both_aligned" were also being used in multiple source files making it
impossible to see which file actually was meant.  So let's get rid of them.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 957a824..8d77841 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -96,13 +96,13 @@
 	move	t7, zero
 
 	sltiu	t8, a1, 0x8
-	bnez	t8, small_csumcpy		/* < 8 bytes to copy */
+	bnez	t8, .Lsmall_csumcpy		/* < 8 bytes to copy */
 	 move	t2, a1
 
 	andi	t7, src, 0x1			/* odd buffer? */
 
-hword_align:
-	beqz	t7, word_align
+.Lhword_align:
+	beqz	t7, .Lword_align
 	 andi	t8, src, 0x2
 
 	lbu	t0, (src)
@@ -114,8 +114,8 @@
 	PTR_ADDU	src, src, 0x1
 	andi	t8, src, 0x2
 
-word_align:
-	beqz	t8, dword_align
+.Lword_align:
+	beqz	t8, .Ldword_align
 	 sltiu	t8, a1, 56
 
 	lhu	t0, (src)
@@ -124,12 +124,12 @@
 	sltiu	t8, a1, 56
 	PTR_ADDU	src, src, 0x2
 
-dword_align:
-	bnez	t8, do_end_words
+.Ldword_align:
+	bnez	t8, .Ldo_end_words
 	 move	t8, a1
 
 	andi	t8, src, 0x4
-	beqz	t8, qword_align
+	beqz	t8, .Lqword_align
 	 andi	t8, src, 0x8
 
 	lw	t0, 0x00(src)
@@ -138,8 +138,8 @@
 	PTR_ADDU	src, src, 0x4
 	andi	t8, src, 0x8
 
-qword_align:
-	beqz	t8, oword_align
+.Lqword_align:
+	beqz	t8, .Loword_align
 	 andi	t8, src, 0x10
 
 #ifdef USE_DOUBLE
@@ -156,8 +156,8 @@
 	PTR_ADDU	src, src, 0x8
 	andi	t8, src, 0x10
 
-oword_align:
-	beqz	t8, begin_movement
+.Loword_align:
+	beqz	t8, .Lbegin_movement
 	 LONG_SRL	t8, a1, 0x7
 
 #ifdef USE_DOUBLE
@@ -172,11 +172,11 @@
 	PTR_ADDU	src, src, 0x10
 	LONG_SRL	t8, a1, 0x7
 
-begin_movement:
+.Lbegin_movement:
 	beqz	t8, 1f
 	 andi	t2, a1, 0x40
 
-move_128bytes:
+.Lmove_128bytes:
 	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 	CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
@@ -184,43 +184,43 @@
 	LONG_SUBU	t8, t8, 0x01
 	.set	reorder				/* DADDI_WAR */
 	PTR_ADDU	src, src, 0x80
-	bnez	t8, move_128bytes
+	bnez	t8, .Lmove_128bytes
 	.set	noreorder
 
 1:
 	beqz	t2, 1f
 	 andi	t2, a1, 0x20
 
-move_64bytes:
+.Lmove_64bytes:
 	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 	PTR_ADDU	src, src, 0x40
 
 1:
-	beqz	t2, do_end_words
+	beqz	t2, .Ldo_end_words
 	 andi	t8, a1, 0x1c
 
-move_32bytes:
+.Lmove_32bytes:
 	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 	andi	t8, a1, 0x1c
 	PTR_ADDU	src, src, 0x20
 
-do_end_words:
-	beqz	t8, small_csumcpy
+.Ldo_end_words:
+	beqz	t8, .Lsmall_csumcpy
 	 andi	t2, a1, 0x3
 	LONG_SRL	t8, t8, 0x2
 
-end_words:
+.Lend_words:
 	lw	t0, (src)
 	LONG_SUBU	t8, t8, 0x1
 	ADDC(sum, t0)
 	.set	reorder				/* DADDI_WAR */
 	PTR_ADDU	src, src, 0x4
-	bnez	t8, end_words
+	bnez	t8, .Lend_words
 	.set	noreorder
 
 /* unknown src alignment and < 8 bytes to go  */
-small_csumcpy:
+.Lsmall_csumcpy:
 	move	a1, t2
 
 	andi	t0, a1, 4
@@ -413,48 +413,48 @@
 	 */
 	sltu	t2, len, NBYTES
 	and	t1, dst, ADDRMASK
-	bnez	t2, copy_bytes_checklen
+	bnez	t2, .Lcopy_bytes_checklen
 	 and	t0, src, ADDRMASK
 	andi	odd, dst, 0x1			/* odd buffer? */
-	bnez	t1, dst_unaligned
+	bnez	t1, .Ldst_unaligned
 	 nop
-	bnez	t0, src_unaligned_dst_aligned
+	bnez	t0, .Lsrc_unaligned_dst_aligned
 	/*
 	 * use delay slot for fall-through
 	 * src and dst are aligned; need to compute rem
 	 */
-both_aligned:
+.Lboth_aligned:
 	 SRL	t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
-	beqz	t0, cleanup_both_aligned # len < 8*NBYTES
+	beqz	t0, .Lcleanup_both_aligned # len < 8*NBYTES
 	 nop
 	SUB	len, 8*NBYTES		# subtract here for bgez loop
 	.align	4
 1:
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
-EXC(	LOAD	t4, UNIT(4)(src),	l_exc_copy)
-EXC(	LOAD	t5, UNIT(5)(src),	l_exc_copy)
-EXC(	LOAD	t6, UNIT(6)(src),	l_exc_copy)
-EXC(	LOAD	t7, UNIT(7)(src),	l_exc_copy)
+EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy)
+EXC(	LOAD	t4, UNIT(4)(src),	.Ll_exc_copy)
+EXC(	LOAD	t5, UNIT(5)(src),	.Ll_exc_copy)
+EXC(	LOAD	t6, UNIT(6)(src),	.Ll_exc_copy)
+EXC(	LOAD	t7, UNIT(7)(src),	.Ll_exc_copy)
 	SUB	len, len, 8*NBYTES
 	ADD	src, src, 8*NBYTES
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc)
+EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc)
 	ADDC(sum, t0)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc)
+EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc)
 	ADDC(sum, t1)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc)
+EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc)
 	ADDC(sum, t2)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc)
+EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc)
 	ADDC(sum, t3)
-EXC(	STORE	t4, UNIT(4)(dst),	s_exc)
+EXC(	STORE	t4, UNIT(4)(dst),	.Ls_exc)
 	ADDC(sum, t4)
-EXC(	STORE	t5, UNIT(5)(dst),	s_exc)
+EXC(	STORE	t5, UNIT(5)(dst),	.Ls_exc)
 	ADDC(sum, t5)
-EXC(	STORE	t6, UNIT(6)(dst),	s_exc)
+EXC(	STORE	t6, UNIT(6)(dst),	.Ls_exc)
 	ADDC(sum, t6)
-EXC(	STORE	t7, UNIT(7)(dst),	s_exc)
+EXC(	STORE	t7, UNIT(7)(dst),	.Ls_exc)
 	ADDC(sum, t7)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 8*NBYTES
@@ -465,44 +465,44 @@
 	/*
 	 * len == the number of bytes left to copy < 8*NBYTES
 	 */
-cleanup_both_aligned:
+.Lcleanup_both_aligned:
 #define rem t7
-	beqz	len, done
+	beqz	len, .Ldone
 	 sltu	t0, len, 4*NBYTES
-	bnez	t0, less_than_4units
+	bnez	t0, .Lless_than_4units
 	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
 	/*
 	 * len >= 4*NBYTES
 	 */
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy)
 	SUB	len, len, 4*NBYTES
 	ADD	src, src, 4*NBYTES
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc)
+EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc)
 	ADDC(sum, t0)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc)
+EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc)
 	ADDC(sum, t1)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc)
+EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc)
 	ADDC(sum, t2)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc)
+EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc)
 	ADDC(sum, t3)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
-	beqz	len, done
+	beqz	len, .Ldone
 	.set	noreorder
-less_than_4units:
+.Lless_than_4units:
 	/*
 	 * rem = len % NBYTES
 	 */
-	beq	rem, len, copy_bytes
+	beq	rem, len, .Lcopy_bytes
 	 nop
 1:
-EXC(	LOAD	t0, 0(src),		l_exc)
+EXC(	LOAD	t0, 0(src),		.Ll_exc)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
-EXC(	STORE	t0, 0(dst),		s_exc)
+EXC(	STORE	t0, 0(dst),		.Ls_exc)
 	ADDC(sum, t0)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, NBYTES
@@ -521,20 +521,20 @@
 	 * more instruction-level parallelism.
 	 */
 #define bits t2
-	beqz	len, done
+	beqz	len, .Ldone
 	 ADD	t1, dst, len	# t1 is just past last byte of dst
 	li	bits, 8*NBYTES
 	SLL	rem, len, 3	# rem = number of bits to keep
-EXC(	LOAD	t0, 0(src),		l_exc)
+EXC(	LOAD	t0, 0(src),		.Ll_exc)
 	SUB	bits, bits, rem	# bits = number of bits to discard
 	SHIFT_DISCARD t0, t0, bits
-EXC(	STREST	t0, -1(t1),		s_exc)
+EXC(	STREST	t0, -1(t1),		.Ls_exc)
 	SHIFT_DISCARD_REVERT t0, t0, bits
 	.set reorder
 	ADDC(sum, t0)
-	b	done
+	b	.Ldone
 	.set noreorder
-dst_unaligned:
+.Ldst_unaligned:
 	/*
 	 * dst is unaligned
 	 * t0 = src & ADDRMASK
@@ -545,25 +545,25 @@
 	 * Set match = (src and dst have same alignment)
 	 */
 #define match rem
-EXC(	LDFIRST	t3, FIRST(0)(src),	l_exc)
+EXC(	LDFIRST	t3, FIRST(0)(src),	.Ll_exc)
 	ADD	t2, zero, NBYTES
-EXC(	LDREST	t3, REST(0)(src),	l_exc_copy)
+EXC(	LDREST	t3, REST(0)(src),	.Ll_exc_copy)
 	SUB	t2, t2, t1	# t2 = number of bytes copied
 	xor	match, t0, t1
-EXC(	STFIRST t3, FIRST(0)(dst),	s_exc)
+EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc)
 	SLL	t4, t1, 3		# t4 = number of bits to discard
 	SHIFT_DISCARD t3, t3, t4
 	/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
 	ADDC(sum, t3)
-	beq	len, t2, done
+	beq	len, t2, .Ldone
 	 SUB	len, len, t2
 	ADD	dst, dst, t2
-	beqz	match, both_aligned
+	beqz	match, .Lboth_aligned
 	 ADD	src, src, t2
 
-src_unaligned_dst_aligned:
+.Lsrc_unaligned_dst_aligned:
 	SRL	t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
-	beqz	t0, cleanup_src_unaligned
+	beqz	t0, .Lcleanup_src_unaligned
 	 and	rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
 1:
 /*
@@ -572,53 +572,53 @@
  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
  * are to the same unit (unless src is aligned, but it's not).
  */
-EXC(	LDFIRST	t0, FIRST(0)(src),	l_exc)
-EXC(	LDFIRST	t1, FIRST(1)(src),	l_exc_copy)
+EXC(	LDFIRST	t0, FIRST(0)(src),	.Ll_exc)
+EXC(	LDFIRST	t1, FIRST(1)(src),	.Ll_exc_copy)
 	SUB     len, len, 4*NBYTES
-EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
-EXC(	LDREST	t1, REST(1)(src),	l_exc_copy)
-EXC(	LDFIRST	t2, FIRST(2)(src),	l_exc_copy)
-EXC(	LDFIRST	t3, FIRST(3)(src),	l_exc_copy)
-EXC(	LDREST	t2, REST(2)(src),	l_exc_copy)
-EXC(	LDREST	t3, REST(3)(src),	l_exc_copy)
+EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy)
+EXC(	LDREST	t1, REST(1)(src),	.Ll_exc_copy)
+EXC(	LDFIRST	t2, FIRST(2)(src),	.Ll_exc_copy)
+EXC(	LDFIRST	t3, FIRST(3)(src),	.Ll_exc_copy)
+EXC(	LDREST	t2, REST(2)(src),	.Ll_exc_copy)
+EXC(	LDREST	t3, REST(3)(src),	.Ll_exc_copy)
 	ADD	src, src, 4*NBYTES
 #ifdef CONFIG_CPU_SB1
 	nop				# improves slotting
 #endif
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc)
+EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc)
 	ADDC(sum, t0)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc)
+EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc)
 	ADDC(sum, t1)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc)
+EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc)
 	ADDC(sum, t2)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc)
+EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc)
 	ADDC(sum, t3)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
 	bne	len, rem, 1b
 	.set	noreorder
 
-cleanup_src_unaligned:
-	beqz	len, done
+.Lcleanup_src_unaligned:
+	beqz	len, .Ldone
 	 and	rem, len, NBYTES-1  # rem = len % NBYTES
-	beq	rem, len, copy_bytes
+	beq	rem, len, .Lcopy_bytes
 	 nop
 1:
-EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
-EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
+EXC(	LDFIRST t0, FIRST(0)(src),	.Ll_exc)
+EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
-EXC(	STORE	t0, 0(dst),		s_exc)
+EXC(	STORE	t0, 0(dst),		.Ls_exc)
 	ADDC(sum, t0)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, NBYTES
 	bne	len, rem, 1b
 	.set	noreorder
 
-copy_bytes_checklen:
-	beqz	len, done
+.Lcopy_bytes_checklen:
+	beqz	len, .Ldone
 	 nop
-copy_bytes:
+.Lcopy_bytes:
 	/* 0 < len < NBYTES  */
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 #define SHIFT_START 0
@@ -629,14 +629,14 @@
 #endif
 	move	t2, zero	# partial word
 	li	t3, SHIFT_START	# shift
-/* use l_exc_copy here to return correct sum on fault */
+/* use .Ll_exc_copy here to return correct sum on fault */
 #define COPY_BYTE(N)			\
-EXC(	lbu	t0, N(src), l_exc_copy);	\
+EXC(	lbu	t0, N(src), .Ll_exc_copy);	\
 	SUB	len, len, 1;		\
-EXC(	sb	t0, N(dst), s_exc);	\
+EXC(	sb	t0, N(dst), .Ls_exc);	\
 	SLLV	t0, t0, t3;		\
 	addu	t3, SHIFT_INC;		\
-	beqz	len, copy_bytes_done;	\
+	beqz	len, .Lcopy_bytes_done;	\
 	 or	t2, t0
 
 	COPY_BYTE(0)
@@ -647,14 +647,14 @@
 	COPY_BYTE(4)
 	COPY_BYTE(5)
 #endif
-EXC(	lbu	t0, NBYTES-2(src), l_exc_copy)
+EXC(	lbu	t0, NBYTES-2(src), .Ll_exc_copy)
 	SUB	len, len, 1
-EXC(	sb	t0, NBYTES-2(dst), s_exc)
+EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)
 	SLLV	t0, t0, t3
 	or	t2, t0
-copy_bytes_done:
+.Lcopy_bytes_done:
 	ADDC(sum, t2)
-done:
+.Ldone:
 	/* fold checksum */
 	.set	push
 	.set	noat
@@ -685,7 +685,7 @@
 	jr	ra
 	.set noreorder
 
-l_exc_copy:
+.Ll_exc_copy:
 	/*
 	 * Copy bytes from src until faulting load address (or until a
 	 * lb faults)
@@ -700,7 +700,7 @@
 	 li	t2, SHIFT_START
 	LOAD	t0, THREAD_BUADDR(t0)
 1:
-EXC(	lbu	t1, 0(src),	l_exc)
+EXC(	lbu	t1, 0(src),	.Ll_exc)
 	ADD	src, src, 1
 	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
 	SLLV	t1, t1, t2
@@ -710,7 +710,7 @@
 	ADD	dst, dst, 1
 	bne	src, t0, 1b
 	.set	noreorder
-l_exc:
+.Ll_exc:
 	LOAD	t0, TI_TASK($28)
 	 nop
 	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
@@ -729,7 +729,7 @@
 	 */
 	.set	reorder				/* DADDI_WAR */
 	SUB	src, len, 1
-	beqz	len, done
+	beqz	len, .Ldone
 	.set	noreorder
 1:	sb	zero, 0(dst)
 	ADD	dst, dst, 1
@@ -744,10 +744,10 @@
 	 SUB	src, src, v1
 #endif
 	li	v1, -EFAULT
-	b	done
+	b	.Ldone
 	 sw	v1, (errptr)
 
-s_exc:
+.Ls_exc:
 	li	v0, -1 /* invalid checksum */
 	li	v1, -EFAULT
 	jr	ra
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S
index d1b08f5..736d0fb 100644
--- a/arch/mips/lib/memcpy-inatomic.S
+++ b/arch/mips/lib/memcpy-inatomic.S
@@ -209,36 +209,36 @@
 	and	t1, dst, ADDRMASK
 	PREF(	0, 1*32(src) )
 	PREF(	1, 1*32(dst) )
-	bnez	t2, copy_bytes_checklen
+	bnez	t2, .Lcopy_bytes_checklen
 	 and	t0, src, ADDRMASK
 	PREF(	0, 2*32(src) )
 	PREF(	1, 2*32(dst) )
-	bnez	t1, dst_unaligned
+	bnez	t1, .Ldst_unaligned
 	 nop
-	bnez	t0, src_unaligned_dst_aligned
+	bnez	t0, .Lsrc_unaligned_dst_aligned
 	/*
 	 * use delay slot for fall-through
 	 * src and dst are aligned; need to compute rem
 	 */
-both_aligned:
-	 SRL	t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
-	beqz	t0, cleanup_both_aligned # len < 8*NBYTES
-	 and	rem, len, (8*NBYTES-1)	 # rem = len % (8*NBYTES)
+.Lboth_aligned:
+	 SRL	t0, len, LOG_NBYTES+3    	# +3 for 8 units/iter
+	beqz	t0, .Lcleanup_both_aligned	# len < 8*NBYTES
+	 and	rem, len, (8*NBYTES-1)	 	# rem = len % (8*NBYTES)
 	PREF(	0, 3*32(src) )
 	PREF(	1, 3*32(dst) )
 	.align	4
 1:
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy)
 	SUB	len, len, 8*NBYTES
-EXC(	LOAD	t4, UNIT(4)(src),	l_exc_copy)
-EXC(	LOAD	t7, UNIT(5)(src),	l_exc_copy)
+EXC(	LOAD	t4, UNIT(4)(src),	.Ll_exc_copy)
+EXC(	LOAD	t7, UNIT(5)(src),	.Ll_exc_copy)
 	STORE	t0, UNIT(0)(dst)
 	STORE	t1, UNIT(1)(dst)
-EXC(	LOAD	t0, UNIT(6)(src),	l_exc_copy)
-EXC(	LOAD	t1, UNIT(7)(src),	l_exc_copy)
+EXC(	LOAD	t0, UNIT(6)(src),	.Ll_exc_copy)
+EXC(	LOAD	t1, UNIT(7)(src),	.Ll_exc_copy)
 	ADD	src, src, 8*NBYTES
 	ADD	dst, dst, 8*NBYTES
 	STORE	t2, UNIT(-6)(dst)
@@ -255,18 +255,18 @@
 	/*
 	 * len == rem == the number of bytes left to copy < 8*NBYTES
 	 */
-cleanup_both_aligned:
-	beqz	len, done
+.Lcleanup_both_aligned:
+	beqz	len, .Ldone
 	 sltu	t0, len, 4*NBYTES
-	bnez	t0, less_than_4units
+	bnez	t0, .Lless_than_4units
 	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
 	/*
 	 * len >= 4*NBYTES
 	 */
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy)
 	SUB	len, len, 4*NBYTES
 	ADD	src, src, 4*NBYTES
 	STORE	t0, UNIT(0)(dst)
@@ -275,16 +275,16 @@
 	STORE	t3, UNIT(3)(dst)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
-	beqz	len, done
+	beqz	len, .Ldone
 	.set	noreorder
-less_than_4units:
+.Lless_than_4units:
 	/*
 	 * rem = len % NBYTES
 	 */
-	beq	rem, len, copy_bytes
+	beq	rem, len, .Lcopy_bytes
 	 nop
 1:
-EXC(	LOAD	t0, 0(src),		l_exc)
+EXC(	LOAD	t0, 0(src),		.Ll_exc)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
 	STORE	t0, 0(dst)
@@ -305,17 +305,17 @@
 	 * more instruction-level parallelism.
 	 */
 #define bits t2
-	beqz	len, done
+	beqz	len, .Ldone
 	 ADD	t1, dst, len	# t1 is just past last byte of dst
 	li	bits, 8*NBYTES
 	SLL	rem, len, 3	# rem = number of bits to keep
-EXC(	LOAD	t0, 0(src),		l_exc)
+EXC(	LOAD	t0, 0(src),		.Ll_exc)
 	SUB	bits, bits, rem	# bits = number of bits to discard
 	SHIFT_DISCARD t0, t0, bits
 	STREST	t0, -1(t1)
 	jr	ra
 	 move	len, zero
-dst_unaligned:
+.Ldst_unaligned:
 	/*
 	 * dst is unaligned
 	 * t0 = src & ADDRMASK
@@ -326,22 +326,22 @@
 	 * Set match = (src and dst have same alignment)
 	 */
 #define match rem
-EXC(	LDFIRST	t3, FIRST(0)(src),	l_exc)
+EXC(	LDFIRST	t3, FIRST(0)(src),	.Ll_exc)
 	ADD	t2, zero, NBYTES
-EXC(	LDREST	t3, REST(0)(src),	l_exc_copy)
+EXC(	LDREST	t3, REST(0)(src),	.Ll_exc_copy)
 	SUB	t2, t2, t1	# t2 = number of bytes copied
 	xor	match, t0, t1
 	STFIRST t3, FIRST(0)(dst)
-	beq	len, t2, done
+	beq	len, t2, .Ldone
 	 SUB	len, len, t2
 	ADD	dst, dst, t2
-	beqz	match, both_aligned
+	beqz	match, .Lboth_aligned
 	 ADD	src, src, t2
 
-src_unaligned_dst_aligned:
+.Lsrc_unaligned_dst_aligned:
 	SRL	t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
 	PREF(	0, 3*32(src) )
-	beqz	t0, cleanup_src_unaligned
+	beqz	t0, .Lcleanup_src_unaligned
 	 and	rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
 	PREF(	1, 3*32(dst) )
 1:
@@ -351,15 +351,15 @@
  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
  * are to the same unit (unless src is aligned, but it's not).
  */
-EXC(	LDFIRST	t0, FIRST(0)(src),	l_exc)
-EXC(	LDFIRST	t1, FIRST(1)(src),	l_exc_copy)
+EXC(	LDFIRST	t0, FIRST(0)(src),	.Ll_exc)
+EXC(	LDFIRST	t1, FIRST(1)(src),	.Ll_exc_copy)
 	SUB     len, len, 4*NBYTES
-EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
-EXC(	LDREST	t1, REST(1)(src),	l_exc_copy)
-EXC(	LDFIRST	t2, FIRST(2)(src),	l_exc_copy)
-EXC(	LDFIRST	t3, FIRST(3)(src),	l_exc_copy)
-EXC(	LDREST	t2, REST(2)(src),	l_exc_copy)
-EXC(	LDREST	t3, REST(3)(src),	l_exc_copy)
+EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy)
+EXC(	LDREST	t1, REST(1)(src),	.Ll_exc_copy)
+EXC(	LDFIRST	t2, FIRST(2)(src),	.Ll_exc_copy)
+EXC(	LDFIRST	t3, FIRST(3)(src),	.Ll_exc_copy)
+EXC(	LDREST	t2, REST(2)(src),	.Ll_exc_copy)
+EXC(	LDREST	t3, REST(3)(src),	.Ll_exc_copy)
 	PREF(	0, 9*32(src) )		# 0 is PREF_LOAD  (not streamed)
 	ADD	src, src, 4*NBYTES
 #ifdef CONFIG_CPU_SB1
@@ -375,14 +375,14 @@
 	bne	len, rem, 1b
 	.set	noreorder
 
-cleanup_src_unaligned:
-	beqz	len, done
+.Lcleanup_src_unaligned:
+	beqz	len, .Ldone
 	 and	rem, len, NBYTES-1  # rem = len % NBYTES
-	beq	rem, len, copy_bytes
+	beq	rem, len, .Lcopy_bytes
 	 nop
 1:
-EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
-EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
+EXC(	LDFIRST t0, FIRST(0)(src),	.Ll_exc)
+EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
 	STORE	t0, 0(dst)
@@ -391,15 +391,15 @@
 	bne	len, rem, 1b
 	.set	noreorder
 
-copy_bytes_checklen:
-	beqz	len, done
+.Lcopy_bytes_checklen:
+	beqz	len, .Ldone
 	 nop
-copy_bytes:
+.Lcopy_bytes:
 	/* 0 < len < NBYTES  */
 #define COPY_BYTE(N)			\
-EXC(	lb	t0, N(src), l_exc);	\
+EXC(	lb	t0, N(src), .Ll_exc);	\
 	SUB	len, len, 1;		\
-	beqz	len, done;		\
+	beqz	len, .Ldone;		\
 	 sb	t0, N(dst)
 
 	COPY_BYTE(0)
@@ -410,16 +410,16 @@
 	COPY_BYTE(4)
 	COPY_BYTE(5)
 #endif
-EXC(	lb	t0, NBYTES-2(src), l_exc)
+EXC(	lb	t0, NBYTES-2(src), .Ll_exc)
 	SUB	len, len, 1
 	jr	ra
 	 sb	t0, NBYTES-2(dst)
-done:
+.Ldone:
 	jr	ra
 	 nop
 	END(__copy_user_inatomic)
 
-l_exc_copy:
+.Ll_exc_copy:
 	/*
 	 * Copy bytes from src until faulting load address (or until a
 	 * lb faults)
@@ -434,14 +434,14 @@
 	 nop
 	LOAD	t0, THREAD_BUADDR(t0)
 1:
-EXC(	lb	t1, 0(src),	l_exc)
+EXC(	lb	t1, 0(src),	.Ll_exc)
 	ADD	src, src, 1
 	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 1
 	bne	src, t0, 1b
 	.set	noreorder
-l_exc:
+.Ll_exc:
 	LOAD	t0, TI_TASK($28)
 	 nop
 	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 01e450b..c06cccf 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -191,7 +191,7 @@
 	.align	5
 LEAF(memcpy)					/* a0=dst a1=src a2=len */
 	move	v0, dst				/* return value */
-__memcpy:
+.L__memcpy:
 FEXPORT(__copy_user)
 	/*
 	 * Note: dst & src may be unaligned, len may be 0
@@ -213,45 +213,45 @@
 	and	t1, dst, ADDRMASK
 	PREF(	0, 1*32(src) )
 	PREF(	1, 1*32(dst) )
-	bnez	t2, copy_bytes_checklen
+	bnez	t2, .Lcopy_bytes_checklen
 	 and	t0, src, ADDRMASK
 	PREF(	0, 2*32(src) )
 	PREF(	1, 2*32(dst) )
-	bnez	t1, dst_unaligned
+	bnez	t1, .Ldst_unaligned
 	 nop
-	bnez	t0, src_unaligned_dst_aligned
+	bnez	t0, .Lsrc_unaligned_dst_aligned
 	/*
 	 * use delay slot for fall-through
 	 * src and dst are aligned; need to compute rem
 	 */
-both_aligned:
+.Lboth_aligned:
 	 SRL	t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
-	beqz	t0, cleanup_both_aligned # len < 8*NBYTES
+	beqz	t0, .Lcleanup_both_aligned # len < 8*NBYTES
 	 and	rem, len, (8*NBYTES-1)	 # rem = len % (8*NBYTES)
 	PREF(	0, 3*32(src) )
 	PREF(	1, 3*32(dst) )
 	.align	4
 1:
 	R10KCBARRIER(0(ra))
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy)
 	SUB	len, len, 8*NBYTES
-EXC(	LOAD	t4, UNIT(4)(src),	l_exc_copy)
-EXC(	LOAD	t7, UNIT(5)(src),	l_exc_copy)
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p8u)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p7u)
-EXC(	LOAD	t0, UNIT(6)(src),	l_exc_copy)
-EXC(	LOAD	t1, UNIT(7)(src),	l_exc_copy)
+EXC(	LOAD	t4, UNIT(4)(src),	.Ll_exc_copy)
+EXC(	LOAD	t7, UNIT(5)(src),	.Ll_exc_copy)
+EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc_p8u)
+EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc_p7u)
+EXC(	LOAD	t0, UNIT(6)(src),	.Ll_exc_copy)
+EXC(	LOAD	t1, UNIT(7)(src),	.Ll_exc_copy)
 	ADD	src, src, 8*NBYTES
 	ADD	dst, dst, 8*NBYTES
-EXC(	STORE	t2, UNIT(-6)(dst),	s_exc_p6u)
-EXC(	STORE	t3, UNIT(-5)(dst),	s_exc_p5u)
-EXC(	STORE	t4, UNIT(-4)(dst),	s_exc_p4u)
-EXC(	STORE	t7, UNIT(-3)(dst),	s_exc_p3u)
-EXC(	STORE	t0, UNIT(-2)(dst),	s_exc_p2u)
-EXC(	STORE	t1, UNIT(-1)(dst),	s_exc_p1u)
+EXC(	STORE	t2, UNIT(-6)(dst),	.Ls_exc_p6u)
+EXC(	STORE	t3, UNIT(-5)(dst),	.Ls_exc_p5u)
+EXC(	STORE	t4, UNIT(-4)(dst),	.Ls_exc_p4u)
+EXC(	STORE	t7, UNIT(-3)(dst),	.Ls_exc_p3u)
+EXC(	STORE	t0, UNIT(-2)(dst),	.Ls_exc_p2u)
+EXC(	STORE	t1, UNIT(-1)(dst),	.Ls_exc_p1u)
 	PREF(	0, 8*32(src) )
 	PREF(	1, 8*32(dst) )
 	bne	len, rem, 1b
@@ -260,41 +260,41 @@
 	/*
 	 * len == rem == the number of bytes left to copy < 8*NBYTES
 	 */
-cleanup_both_aligned:
-	beqz	len, done
+.Lcleanup_both_aligned:
+	beqz	len, .Ldone
 	 sltu	t0, len, 4*NBYTES
-	bnez	t0, less_than_4units
+	bnez	t0, .Lless_than_4units
 	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
 	/*
 	 * len >= 4*NBYTES
 	 */
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+EXC(	LOAD	t0, UNIT(0)(src),	.Ll_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	.Ll_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	.Ll_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	.Ll_exc_copy)
 	SUB	len, len, 4*NBYTES
 	ADD	src, src, 4*NBYTES
 	R10KCBARRIER(0(ra))
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
+EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc_p4u)
+EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc_p3u)
+EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc_p2u)
+EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc_p1u)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
-	beqz	len, done
+	beqz	len, .Ldone
 	.set	noreorder
-less_than_4units:
+.Lless_than_4units:
 	/*
 	 * rem = len % NBYTES
 	 */
-	beq	rem, len, copy_bytes
+	beq	rem, len, .Lcopy_bytes
 	 nop
 1:
 	R10KCBARRIER(0(ra))
-EXC(	LOAD	t0, 0(src),		l_exc)
+EXC(	LOAD	t0, 0(src),		.Ll_exc)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
-EXC(	STORE	t0, 0(dst),		s_exc_p1u)
+EXC(	STORE	t0, 0(dst),		.Ls_exc_p1u)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, NBYTES
 	bne	rem, len, 1b
@@ -312,17 +312,17 @@
 	 * more instruction-level parallelism.
 	 */
 #define bits t2
-	beqz	len, done
+	beqz	len, .Ldone
 	 ADD	t1, dst, len	# t1 is just past last byte of dst
 	li	bits, 8*NBYTES
 	SLL	rem, len, 3	# rem = number of bits to keep
-EXC(	LOAD	t0, 0(src),		l_exc)
+EXC(	LOAD	t0, 0(src),		.Ll_exc)
 	SUB	bits, bits, rem	# bits = number of bits to discard
 	SHIFT_DISCARD t0, t0, bits
-EXC(	STREST	t0, -1(t1),		s_exc)
+EXC(	STREST	t0, -1(t1),		.Ls_exc)
 	jr	ra
 	 move	len, zero
-dst_unaligned:
+.Ldst_unaligned:
 	/*
 	 * dst is unaligned
 	 * t0 = src & ADDRMASK
@@ -333,23 +333,23 @@
 	 * Set match = (src and dst have same alignment)
 	 */
 #define match rem
-EXC(	LDFIRST	t3, FIRST(0)(src),	l_exc)
+EXC(	LDFIRST	t3, FIRST(0)(src),	.Ll_exc)
 	ADD	t2, zero, NBYTES
-EXC(	LDREST	t3, REST(0)(src),	l_exc_copy)
+EXC(	LDREST	t3, REST(0)(src),	.Ll_exc_copy)
 	SUB	t2, t2, t1	# t2 = number of bytes copied
 	xor	match, t0, t1
 	R10KCBARRIER(0(ra))
-EXC(	STFIRST t3, FIRST(0)(dst),	s_exc)
-	beq	len, t2, done
+EXC(	STFIRST t3, FIRST(0)(dst),	.Ls_exc)
+	beq	len, t2, .Ldone
 	 SUB	len, len, t2
 	ADD	dst, dst, t2
-	beqz	match, both_aligned
+	beqz	match, .Lboth_aligned
 	 ADD	src, src, t2
 
-src_unaligned_dst_aligned:
+.Lsrc_unaligned_dst_aligned:
 	SRL	t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
 	PREF(	0, 3*32(src) )
-	beqz	t0, cleanup_src_unaligned
+	beqz	t0, .Lcleanup_src_unaligned
 	 and	rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
 	PREF(	1, 3*32(dst) )
 1:
@@ -360,58 +360,58 @@
  * are to the same unit (unless src is aligned, but it's not).
  */
 	R10KCBARRIER(0(ra))
-EXC(	LDFIRST	t0, FIRST(0)(src),	l_exc)
-EXC(	LDFIRST	t1, FIRST(1)(src),	l_exc_copy)
+EXC(	LDFIRST	t0, FIRST(0)(src),	.Ll_exc)
+EXC(	LDFIRST	t1, FIRST(1)(src),	.Ll_exc_copy)
 	SUB     len, len, 4*NBYTES
-EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
-EXC(	LDREST	t1, REST(1)(src),	l_exc_copy)
-EXC(	LDFIRST	t2, FIRST(2)(src),	l_exc_copy)
-EXC(	LDFIRST	t3, FIRST(3)(src),	l_exc_copy)
-EXC(	LDREST	t2, REST(2)(src),	l_exc_copy)
-EXC(	LDREST	t3, REST(3)(src),	l_exc_copy)
+EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy)
+EXC(	LDREST	t1, REST(1)(src),	.Ll_exc_copy)
+EXC(	LDFIRST	t2, FIRST(2)(src),	.Ll_exc_copy)
+EXC(	LDFIRST	t3, FIRST(3)(src),	.Ll_exc_copy)
+EXC(	LDREST	t2, REST(2)(src),	.Ll_exc_copy)
+EXC(	LDREST	t3, REST(3)(src),	.Ll_exc_copy)
 	PREF(	0, 9*32(src) )		# 0 is PREF_LOAD  (not streamed)
 	ADD	src, src, 4*NBYTES
 #ifdef CONFIG_CPU_SB1
 	nop				# improves slotting
 #endif
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
+EXC(	STORE	t0, UNIT(0)(dst),	.Ls_exc_p4u)
+EXC(	STORE	t1, UNIT(1)(dst),	.Ls_exc_p3u)
+EXC(	STORE	t2, UNIT(2)(dst),	.Ls_exc_p2u)
+EXC(	STORE	t3, UNIT(3)(dst),	.Ls_exc_p1u)
 	PREF(	1, 9*32(dst) )     	# 1 is PREF_STORE (not streamed)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 4*NBYTES
 	bne	len, rem, 1b
 	.set	noreorder
 
-cleanup_src_unaligned:
-	beqz	len, done
+.Lcleanup_src_unaligned:
+	beqz	len, .Ldone
 	 and	rem, len, NBYTES-1  # rem = len % NBYTES
-	beq	rem, len, copy_bytes
+	beq	rem, len, .Lcopy_bytes
 	 nop
 1:
 	R10KCBARRIER(0(ra))
-EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
-EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
+EXC(	LDFIRST t0, FIRST(0)(src),	.Ll_exc)
+EXC(	LDREST	t0, REST(0)(src),	.Ll_exc_copy)
 	ADD	src, src, NBYTES
 	SUB	len, len, NBYTES
-EXC(	STORE	t0, 0(dst),		s_exc_p1u)
+EXC(	STORE	t0, 0(dst),		.Ls_exc_p1u)
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, NBYTES
 	bne	len, rem, 1b
 	.set	noreorder
 
-copy_bytes_checklen:
-	beqz	len, done
+.Lcopy_bytes_checklen:
+	beqz	len, .Ldone
 	 nop
-copy_bytes:
+.Lcopy_bytes:
 	/* 0 < len < NBYTES  */
 	R10KCBARRIER(0(ra))
 #define COPY_BYTE(N)			\
-EXC(	lb	t0, N(src), l_exc);	\
+EXC(	lb	t0, N(src), .Ll_exc);	\
 	SUB	len, len, 1;		\
-	beqz	len, done;		\
-EXC(	 sb	t0, N(dst), s_exc_p1)
+	beqz	len, .Ldone;		\
+EXC(	 sb	t0, N(dst), .Ls_exc_p1)
 
 	COPY_BYTE(0)
 	COPY_BYTE(1)
@@ -421,16 +421,16 @@
 	COPY_BYTE(4)
 	COPY_BYTE(5)
 #endif
-EXC(	lb	t0, NBYTES-2(src), l_exc)
+EXC(	lb	t0, NBYTES-2(src), .Ll_exc)
 	SUB	len, len, 1
 	jr	ra
-EXC(	 sb	t0, NBYTES-2(dst), s_exc_p1)
-done:
+EXC(	 sb	t0, NBYTES-2(dst), .Ls_exc_p1)
+.Ldone:
 	jr	ra
 	 nop
 	END(memcpy)
 
-l_exc_copy:
+.Ll_exc_copy:
 	/*
 	 * Copy bytes from src until faulting load address (or until a
 	 * lb faults)
@@ -445,14 +445,14 @@
 	 nop
 	LOAD	t0, THREAD_BUADDR(t0)
 1:
-EXC(	lb	t1, 0(src),	l_exc)
+EXC(	lb	t1, 0(src),	.Ll_exc)
 	ADD	src, src, 1
 	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
 	.set	reorder				/* DADDI_WAR */
 	ADD	dst, dst, 1
 	bne	src, t0, 1b
 	.set	noreorder
-l_exc:
+.Ll_exc:
 	LOAD	t0, TI_TASK($28)
 	 nop
 	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
@@ -471,7 +471,7 @@
 	 */
 	.set	reorder				/* DADDI_WAR */
 	SUB	src, len, 1
-	beqz	len, done
+	beqz	len, .Ldone
 	.set	noreorder
 1:	sb	zero, 0(dst)
 	ADD	dst, dst, 1
@@ -492,7 +492,7 @@
 
 #define SEXC(n)							\
 	.set	reorder;			/* DADDI_WAR */	\
-s_exc_p ## n ## u:						\
+.Ls_exc_p ## n ## u:						\
 	ADD	len, len, n*NBYTES;				\
 	jr	ra;						\
 	.set	noreorder
@@ -506,12 +506,12 @@
 SEXC(2)
 SEXC(1)
 
-s_exc_p1:
+.Ls_exc_p1:
 	.set	reorder				/* DADDI_WAR */
 	ADD	len, len, 1
 	jr	ra
 	.set	noreorder
-s_exc:
+.Ls_exc:
 	jr	ra
 	 nop
 
@@ -522,20 +522,20 @@
 	sltu	t0, a1, t0			# dst + len <= src -> memcpy
 	sltu	t1, a0, t1			# dst >= src + len -> memcpy
 	and	t0, t1
-	beqz	t0, __memcpy
+	beqz	t0, .L__memcpy
 	 move	v0, a0				/* return value */
-	beqz	a2, r_out
+	beqz	a2, .Lr_out
 	END(memmove)
 
 	/* fall through to __rmemcpy */
 LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
 	 sltu	t0, a1, a0
-	beqz	t0, r_end_bytes_up		# src >= dst
+	beqz	t0, .Lr_end_bytes_up		# src >= dst
 	 nop
 	ADD	a0, a2				# dst = dst + len
 	ADD	a1, a2				# src = src + len
 
-r_end_bytes:
+.Lr_end_bytes:
 	R10KCBARRIER(0(ra))
 	lb	t0, -1(a1)
 	SUB	a2, a2, 0x1
@@ -543,14 +543,14 @@
 	SUB	a1, a1, 0x1
 	.set	reorder				/* DADDI_WAR */
 	SUB	a0, a0, 0x1
-	bnez	a2, r_end_bytes
+	bnez	a2, .Lr_end_bytes
 	.set	noreorder
 
-r_out:
+.Lr_out:
 	jr	ra
 	 move	a2, zero
 
-r_end_bytes_up:
+.Lr_end_bytes_up:
 	R10KCBARRIER(0(ra))
 	lb	t0, (a1)
 	SUB	a2, a2, 0x1
@@ -558,7 +558,7 @@
 	ADD	a1, a1, 0x1
 	.set	reorder				/* DADDI_WAR */
 	ADD	a0, a0, 0x1
-	bnez	a2, r_end_bytes_up
+	bnez	a2, .Lr_end_bytes_up
 	.set	noreorder
 
 	jr	ra
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index c018a47..77dc3b2 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -72,7 +72,7 @@
 
 FEXPORT(__bzero)
 	sltiu		t0, a2, LONGSIZE	/* very small region? */
-	bnez		t0, small_memset
+	bnez		t0, .Lsmall_memset
 	 andi		t0, a0, LONGMASK	/* aligned? */
 
 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
@@ -88,28 +88,28 @@
 
 	R10KCBARRIER(0(ra))
 #ifdef __MIPSEB__
-	EX(LONG_S_L, a1, (a0), first_fixup)	/* make word/dword aligned */
+	EX(LONG_S_L, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */
 #endif
 #ifdef __MIPSEL__
-	EX(LONG_S_R, a1, (a0), first_fixup)	/* make word/dword aligned */
+	EX(LONG_S_R, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */
 #endif
 	PTR_SUBU	a0, t0			/* long align ptr */
 	PTR_ADDU	a2, t0			/* correct size */
 
 1:	ori		t1, a2, 0x3f		/* # of full blocks */
 	xori		t1, 0x3f
-	beqz		t1, memset_partial	/* no block to fill */
+	beqz		t1, .Lmemset_partial	/* no block to fill */
 	 andi		t0, a2, 0x40-LONGSIZE
 
 	PTR_ADDU	t1, a0			/* end address */
 	.set		reorder
 1:	PTR_ADDIU	a0, 64
 	R10KCBARRIER(0(ra))
-	f_fill64 a0, -64, a1, fwd_fixup
+	f_fill64 a0, -64, a1, .Lfwd_fixup
 	bne		t1, a0, 1b
 	.set		noreorder
 
-memset_partial:
+.Lmemset_partial:
 	R10KCBARRIER(0(ra))
 	PTR_LA		t1, 2f			/* where to start */
 #if LONGSIZE == 4
@@ -126,7 +126,7 @@
 	.set		push
 	.set		noreorder
 	.set		nomacro
-	f_fill64 a0, -64, a1, partial_fixup	/* ... but first do longs ... */
+	f_fill64 a0, -64, a1, .Lpartial_fixup	/* ... but first do longs ... */
 2:	.set		pop
 	andi		a2, LONGMASK		/* At most one long to go */
 
@@ -134,15 +134,15 @@
 	 PTR_ADDU	a0, a2			/* What's left */
 	R10KCBARRIER(0(ra))
 #ifdef __MIPSEB__
-	EX(LONG_S_R, a1, -1(a0), last_fixup)
+	EX(LONG_S_R, a1, -1(a0), .Llast_fixup)
 #endif
 #ifdef __MIPSEL__
-	EX(LONG_S_L, a1, -1(a0), last_fixup)
+	EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
 #endif
 1:	jr		ra
 	 move		a2, zero
 
-small_memset:
+.Lsmall_memset:
 	beqz		a2, 2f
 	 PTR_ADDU	t1, a0, a2
 
@@ -155,11 +155,11 @@
 	 move		a2, zero
 	END(memset)
 
-first_fixup:
+.Lfirst_fixup:
 	jr	ra
 	 nop
 
-fwd_fixup:
+.Lfwd_fixup:
 	PTR_L		t0, TI_TASK($28)
 	LONG_L		t0, THREAD_BUADDR(t0)
 	andi		a2, 0x3f
@@ -167,7 +167,7 @@
 	jr		ra
 	 LONG_SUBU	a2, t0
 
-partial_fixup:
+.Lpartial_fixup:
 	PTR_L		t0, TI_TASK($28)
 	LONG_L		t0, THREAD_BUADDR(t0)
 	andi		a2, LONGMASK
@@ -175,6 +175,6 @@
 	jr		ra
 	 LONG_SUBU	a2, t0
 
-last_fixup:
+.Llast_fixup:
 	jr		ra
 	 andi		v1, a2, LONGMASK
diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S
index eca558d..fdbb970 100644
--- a/arch/mips/lib/strlen_user.S
+++ b/arch/mips/lib/strlen_user.S
@@ -24,16 +24,16 @@
 LEAF(__strlen_user_asm)
 	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?
 	and		v0, a0
-	bnez		v0, fault
+	bnez		v0, .Lfault
 
 FEXPORT(__strlen_user_nocheck_asm)
 	move		v0, a0
-1:	EX(lb, t0, (v0), fault)
+1:	EX(lb, t0, (v0), .Lfault)
 	PTR_ADDIU	v0, 1
 	bnez		t0, 1b
 	PTR_SUBU	v0, a0
 	jr		ra
 	END(__strlen_user_asm)
 
-fault:	move		v0, zero
+.Lfault:	move		v0, zero
 	jr		ra
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
index 8a63f72..7201b2f 100644
--- a/arch/mips/lib/strncpy_user.S
+++ b/arch/mips/lib/strncpy_user.S
@@ -30,13 +30,13 @@
 LEAF(__strncpy_from_user_asm)
 	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?
 	and		v0, a1
-	bnez		v0, fault
+	bnez		v0, .Lfault
 
 FEXPORT(__strncpy_from_user_nocheck_asm)
 	move		v0, zero
 	move		v1, a1
 	.set		noreorder
-1:	EX(lbu, t0, (v1), fault)
+1:	EX(lbu, t0, (v1), .Lfault)
 	PTR_ADDIU	v1, 1
 	R10KCBARRIER(0(ra))
 	beqz		t0, 2f
@@ -47,13 +47,13 @@
 	bne		v0, a2, 1b
 2:	PTR_ADDU	t0, a1, v0
 	xor		t0, a1
-	bltz		t0, fault
+	bltz		t0, .Lfault
 	jr		ra			# return n
 	END(__strncpy_from_user_asm)
 
-fault:	li		v0, -EFAULT
+.Lfault:	li		v0, -EFAULT
 	jr		ra
 
 	.section	__ex_table,"a"
-	PTR		1b, fault
+	PTR		1b, .Lfault
 	.previous
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index c0ea151..c768e300 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -28,18 +28,19 @@
 LEAF(__strnlen_user_asm)
 	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?
 	and		v0, a0
-	bnez		v0, fault
+	bnez		v0, .Lfault
 
 FEXPORT(__strnlen_user_nocheck_asm)
 	move		v0, a0
 	PTR_ADDU	a1, a0			# stop pointer
 1:	beq		v0, a1, 1f		# limit reached?
-	EX(lb, t0, (v0), fault)
+	EX(lb, t0, (v0), .Lfault)
 	PTR_ADDU	v0, 1
 	bnez		t0, 1b
 1:	PTR_SUBU	v0, a0
 	jr		ra
 	END(__strnlen_user_asm)
 
-fault:	move		v0, zero
+.Lfault:
+	move		v0, zero
 	jr		ra