Fix all debug directives.

The backtrace when a fortify check failed was not correct. This change
adds all of the necessary directives to get a correct backtrace.

Fix the strcmp directives and change all labels to local labels.

Testing:
- Verify that the runtime can decode the stack for __memcpy_chk, __memset_chk,
  __strcpy_chk, __strcat_chk fortify failures.
- Verify that gdb can decode the stack properly when hitting a fortify check.
- Verify that the runtime can decode the stack for a seg fault for all of the
  _chk functions and for memcpy/memset.
- Verify that gdb can decode the stack for a seg fault for all of the _chk
  functions and for memcpy/memset.
- Verify that the runtime can decode the stack for a seg fault for strcmp.
- Verify that gdb can decode the stack for a seg fault in strcmp.

Bug: 10342460
Bug: 10345269

Merge from internal master.

(cherry-picked from 05332f2ce7e542d32ff4d5cd9f60248ad71fbf0d)

Change-Id: Ibc919b117cfe72b9ae97e35bd48185477177c5ca
diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
index 088d29e..e8ff4f5 100644
--- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
@@ -32,15 +32,21 @@
  * cache line.
  */
 
+ENTRY(MEMCPY_BASE)
+        .cfi_startproc
+        .save       {r0, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r0, 0
+        .cfi_rel_offset lr, 4
+
         // Check so divider is at least 16 bytes, needed for alignment code.
         cmp         r2, #16
         blo         5f
 
-
         /* check if buffers are aligned. If so, run arm-only version */
         eor         r3, r0, r1
         ands        r3, r3, #0x3
-        beq         11f
+        beq         __memcpy_base_aligned
 
         /* Check the upper size limit for Neon unaligned memory access in memcpy */
         cmp         r2, #224
@@ -131,11 +137,22 @@
 
         ldmfd       sp!, {r0, lr}
         bx          lr
-11:
+
+        .cfi_endproc
+END(MEMCPY_BASE)
+
+ENTRY(MEMCPY_BASE_ALIGNED)
+        .cfi_startproc
+
+        .save       {r0, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r0, 0
+        .cfi_rel_offset lr, 4
+
         /* Simple arm-only copy loop to handle aligned copy operations */
-        .save       {r4-r8}
         stmfd       sp!, {r4-r8}
-        .cfi_def_cfa_offset 20
+        .save       {r4-r8}
+        .cfi_adjust_cfa_offset 20
         .cfi_rel_offset r4, 0
         .cfi_rel_offset r5, 4
         .cfi_rel_offset r6, 8
@@ -209,5 +226,8 @@
         ldrbne      r3, [r1]                /*  last byte  */
         strbne      r3, [r0]
 6:
-        ldmfd       sp!, {r4, r5, r6, r7, r8}
+        ldmfd       sp!, {r4-r8}
         ldmfd       sp!, {r0, pc}
+
+        .cfi_endproc
+END(MEMCPY_BASE_ALIGNED)