Fix all debug directives.

The backtrace when a fortify check failed was not correct. This change
adds all of the necessary directives to get a correct backtrace.

Fix the strcmp directives and change all labels to local labels.

Testing:
- Verify that the runtime can decode the stack for __memcpy_chk, __memset_chk,
  __strcpy_chk, __strcat_chk fortify failures.
- Verify that gdb can decode the stack properly when hitting a fortify check.
- Verify that the runtime can decode the stack for a seg fault for all of the
  _chk functions and for memcpy/memset.
- Verify that gdb can decode the stack for a seg fault for all of the _chk
  functions and for memcpy/memset.
- Verify that the runtime can decode the stack for a seg fault for strcmp.
- Verify that gdb can decode the stack for a seg fault in strcmp.

Bug: 10342460
Bug: 10345269

Merge from internal master.

(cherry-picked from 05332f2ce7e542d32ff4d5cd9f60248ad71fbf0d)

Change-Id: Ibc919b117cfe72b9ae97e35bd48185477177c5ca
diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S
index a7876fb..87d2c08 100644
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -43,8 +43,8 @@
         bls         .L_done
 
         // Preserve lr for backtrace.
-        .save       {lr}
         push        {lr}
+        .save       {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
@@ -74,12 +74,13 @@
 /* memset() returns its first argument.  */
 ENTRY(memset)
         .cfi_startproc
+
         # The neon memset only wins for less than 132.
         cmp         r2, #132
-        bhi         11f
+        bhi         __memset_large_copy
 
-        .save       {r0}
         stmfd       sp!, {r0}
+        .save       {r0}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset r0, 0
 
@@ -114,13 +115,18 @@
         strcsb      r1, [r0], #1
         ldmfd       sp!, {r0}
         bx          lr
-11:
+
+        .cfi_endproc
+END(memset)
+
+ENTRY(__memset_large_copy)
+        .cfi_startproc
+
         /* compute the offset to align the destination
          * offset = (4-(src&3))&3 = -src & 3
          */
-
-        .save       {r0, r4-r7, lr}
         stmfd       sp!, {r0, r4-r7, lr}
+        .save       {r0, r4-r7, lr}
         .cfi_def_cfa_offset 24
         .cfi_rel_offset r0, 0
         .cfi_rel_offset r4, 4
@@ -191,7 +197,7 @@
         ldmfd       sp!, {r0, r4-r7, lr}
         bx          lr
         .cfi_endproc
-END(memset)
+END(__memset_large_copy)
 
         .data
 error_string: