Fix all debug directives.

The backtrace when a fortify check failed was not correct. This change
adds all of the necessary directives to get a correct backtrace.

Fix the strcmp directives and change all labels to local labels.

Testing:
- Verify that the runtime can decode the stack for __memcpy_chk, __memset_chk,
  __strcpy_chk, __strcat_chk fortify failures.
- Verify that gdb can decode the stack properly when hitting a fortify check.
- Verify that the runtime can decode the stack for a seg fault for all of the
  _chk functions and for memcpy/memset.
- Verify that gdb can decode the stack for a seg fault for all of the _chk
  functions and for memcpy/memset.
- Verify that the runtime can decode the stack for a seg fault for strcmp.
- Verify that gdb can decode the stack for a seg fault in strcmp.

Bug: 10342460
Bug: 10345269

Merge from internal master.

(cherry-picked from 05332f2ce7e542d32ff4d5cd9f60248ad71fbf0d)

Change-Id: Ibc919b117cfe72b9ae97e35bd48185477177c5ca
diff --git a/libc/arch-arm/cortex-a9/bionic/strcmp.S b/libc/arch-arm/cortex-a9/bionic/strcmp.S
index a84c047..232df75 100644
--- a/libc/arch-arm/cortex-a9/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcmp.S
@@ -122,7 +122,6 @@
 
         .macro  init
         /* Macro to save temporary registers and prepare magic values.  */
-        .save   {r4-r7}
         subs    sp, sp, #16
         .cfi_def_cfa_offset 16
         strd    r4, r5, [sp, #8]
@@ -178,12 +177,13 @@
         /* Are both strings double-word aligned?  */
         orr     ip, r0, r1
         tst     ip, #7
-        bne     do_align
+        bne     .L_do_align
 
         /* Fast path.  */
+        .save   {r4-r7}
         init
 
-doubleword_aligned:
+.L_doubleword_aligned:
 
         /* Get here when the strings to compare are double-word aligned.  */
         /* Compare two words in every iteration.  */
@@ -196,14 +196,14 @@
         ldrd    r2, r3, [r0], #8
         ldrd    r4, r5, [r1], #8
 
-        magic_compare_and_branch w1=r2, w2=r4, label=return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=return_35
+        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
+        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
         b       2b
 
-do_align:
+.L_do_align:
         /* Is the first string word-aligned?  */
         ands    ip, r0, #3
-        beq     word_aligned_r0
+        beq     .L_word_aligned_r0
 
         /* Fast compare byte by byte until the first string is word-aligned.  */
         /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -211,58 +211,58 @@
         bic     r0, r0, #3
         ldr     r2, [r0], #4
         lsls    ip, ip, #31
-        beq     byte2
-        bcs     byte3
+        beq     .L_byte2
+        bcs     .L_byte3
 
-byte1:
+.L_byte1:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE1_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbz   reg=r3, label=fast_return
+        bne     .L_fast_return
+        m_cbz   reg=r3, label=.L_fast_return
 
-byte2:
+.L_byte2:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE2_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbz   reg=r3, label=fast_return
+        bne     .L_fast_return
+        m_cbz   reg=r3, label=.L_fast_return
 
-byte3:
+.L_byte3:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE3_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbnz  reg=r3, label=word_aligned_r0
+        bne     .L_fast_return
+        m_cbnz  reg=r3, label=.L_word_aligned_r0
 
-fast_return:
+.L_fast_return:
         mov     r0, ip
         bx      lr
 
-word_aligned_r0:
+.L_word_aligned_r0:
         init
         /* The first string is word-aligned.  */
         /* Is the second string word-aligned?  */
         ands    ip, r1, #3
-        bne     strcmp_unaligned
+        bne     .L_strcmp_unaligned
 
-word_aligned:
+.L_word_aligned:
         /* The strings are word-aligned. */
         /* Is the first string double-word aligned?  */
         tst     r0, #4
-        beq     doubleword_aligned_r0
+        beq     .L_doubleword_aligned_r0
 
         /* If r0 is not double-word aligned yet, align it by loading
         and comparing the next word from each string.  */
         ldr     r2, [r0], #4
         ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=return_24
+        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
 
-doubleword_aligned_r0:
+.L_doubleword_aligned_r0:
         /* Get here when r0 is double-word aligned.  */
         /* Is r1 doubleword_aligned?  */
         tst     r1, #4
-        beq     doubleword_aligned
+        beq     .L_doubleword_aligned
 
         /* Get here when the strings to compare are word-aligned,
         r0 is double-word aligned, but r1 is not double-word aligned.  */
@@ -278,9 +278,9 @@
 
         /* Load the next double-word from each string and compare.  */
         ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=return_25
+        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
         ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=return_34
+        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
         b       3b
 
         .macro miscmp_word offsetlo offsethi
@@ -304,33 +304,33 @@
         and     r2, r3, r6, S2LOMEM #\offsetlo
         it      eq
         cmpeq   r2, r5
-        bne     return_25
+        bne     .L_return_25
         ldr     r5, [r1], #4
         cmp     ip, #0
         eor r3, r2, r3
         S2HIMEM r2, r5, #\offsethi
         it      eq
         cmpeq   r3, r2
-        bne     return_32
+        bne     .L_return_32
         b       7b
         .endm /* miscmp_word */
 
-return_32:
+.L_return_32:
         setup_return w1=r3, w2=r2
-        b       do_return
-return_34:
+        b       .L_do_return
+.L_return_34:
         setup_return w1=r3, w2=r4
-        b       do_return
-return_25:
+        b       .L_do_return
+.L_return_25:
         setup_return w1=r2, w2=r5
-        b       do_return
-return_35:
+        b       .L_do_return
+.L_return_35:
         setup_return w1=r3, w2=r5
-        b       do_return
-return_24:
+        b       .L_do_return
+.L_return_24:
         setup_return w1=r2, w2=r4
 
-do_return:
+.L_do_return:
 
 #ifdef __ARMEB__
         mov     r0, ip
@@ -341,7 +341,6 @@
         /* Restore temporaries early, before computing the return value.  */
         ldrd    r6, r7, [sp]
         ldrd    r4, r5, [sp, #8]
-        .pad    #-16
         adds    sp, sp, #16
         .cfi_def_cfa_offset 0
         .cfi_restore r4
@@ -352,7 +351,7 @@
         /* There is a zero or a different byte between r1 and r2.  */
         /* r0 contains a mask of all-zero bytes in r1.  */
         /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=compute_return_value
+        m_cbz   reg=r0, label=.L_compute_return_value
         clz     r0, r0
         /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
         rsb     r0, r0, #24
@@ -360,7 +359,7 @@
         lsr     r1, r1, r0
         lsr     r2, r2, r0
 
-compute_return_value:
+.L_compute_return_value:
         movs    r0, #1
         cmp     r1, r2
         /* The return value is computed as follows.
@@ -380,7 +379,7 @@
      * bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited
      * version of the code.
      */
-strcmp_unaligned:
+.L_strcmp_unaligned:
 	wp1 .req r0
 	wp2 .req r1
 	b1  .req r2
@@ -532,7 +531,6 @@
     /* Restore registers and stack. */
     ldrd    r6, r7, [sp]
     ldrd    r4, r5, [sp, #8]
-    .pad    #-16
     adds    sp, sp, #16
     .cfi_def_cfa_offset 0
     .cfi_restore r4
@@ -557,13 +555,7 @@
     /* Restore registers and stack. */
     ldrd    r6, r7, [sp]
     ldrd    r4, r5, [sp, #8]
-    .pad    #-16
     adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
 
 	bx	lr
     .cfi_endproc