Optimize __memset_chk, __memcpy_chk. DO NOT MERGE.

This change creates assembler versions of __memcpy_chk/__memset_chk
that is implemented in the memcpy/memset assembler code. This change
avoids an extra call to memcpy/memset, instead allowing a simple fall
through to occur from the chk code into the body of the real
implementation.

Testing:

- Ran the libc_test on __memcpy_chk/__memset_chk on all nexus devices.
- Wrote a small test executable that has three calls to __memcpy_chk and
  three calls to __memset_chk. First call dest_len is length + 1. Second
  call dest_len is length. Third call dest_len is length - 1.
  Verified that the first two calls pass, and the third fails. Examined
  the logcat output on all nexus devices to verify that the fortify
  error message was sent properly.
- I benchmarked the new __memcpy_chk and __memset_chk on all systems. For
  __memcpy_chk and large copies, the savings is relatively small (about 1%).
  For small copies, the savings is large on cortex-a15/krait devices
  (between 5% to 30%).
  For cortex-a9 and small copies, the speed up is present, but relatively
  small (about 3% to 5%).
  For __memset_chk and large copies, the savings is also small (about 1%).
  However, all processors show larger speed-ups on small copies (about 30% to
  100%).

Bug: 9293744

Merge from internal master.

(cherry-picked from 7c860db0747f6276a6e43984d43f8fa5181ea936)

Change-Id: I916ad305e4001269460ca6ebd38aaa0be8ac7f52
diff --git a/libc/Android.mk b/libc/Android.mk
index 5f399b4..cfc124c 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -62,7 +62,6 @@
 	string/strcspn.c \
 	string/strdup.c \
 	string/strpbrk.c \
-	string/__strrchr_chk.c \
 	string/strsep.c \
 	string/strspn.c \
 	string/strstr.c \
@@ -181,6 +180,25 @@
 	netbsd/nameser/ns_print.c \
 	netbsd/nameser/ns_samedomain.c \
 
+# Fortify implementations of libc functions.
+libc_common_src_files += \
+    bionic/__fgets_chk.cpp \
+    bionic/__memcpy_chk.cpp \
+    bionic/__memmove_chk.cpp \
+    bionic/__memset_chk.cpp \
+    bionic/__strcat_chk.cpp \
+    bionic/__strchr_chk.cpp \
+    bionic/__strcpy_chk.cpp \
+    bionic/__strlcat_chk.cpp \
+    bionic/__strlcpy_chk.cpp \
+    bionic/__strlen_chk.cpp \
+    bionic/__strncat_chk.cpp \
+    bionic/__strncpy_chk.cpp \
+    bionic/__strrchr_chk.cpp \
+    bionic/__umask_chk.cpp \
+    bionic/__vsnprintf_chk.cpp \
+    bionic/__vsprintf_chk.cpp \
+
 libc_bionic_src_files := \
     bionic/abort.cpp \
     bionic/assert.cpp \
@@ -189,16 +207,12 @@
     bionic/__errno.c \
     bionic/eventfd_read.cpp \
     bionic/eventfd_write.cpp \
-    bionic/__fgets_chk.cpp \
     bionic/futimens.cpp \
     bionic/getauxval.cpp \
     bionic/getcwd.cpp \
     bionic/libc_init_common.cpp \
     bionic/libc_logging.cpp \
     bionic/libgen.cpp \
-    bionic/__memcpy_chk.cpp \
-    bionic/__memmove_chk.cpp \
-    bionic/__memset_chk.cpp \
     bionic/mmap.cpp \
     bionic/pthread_attr.cpp \
     bionic/pthread_detach.cpp \
@@ -221,24 +235,13 @@
     bionic/signalfd.cpp \
     bionic/sigwait.cpp \
     bionic/statvfs.cpp \
-    bionic/__strcat_chk.cpp \
-    bionic/__strchr_chk.cpp \
-    bionic/__strcpy_chk.cpp \
     bionic/strerror.cpp \
     bionic/strerror_r.cpp \
-    bionic/__strlcat_chk.cpp \
-    bionic/__strlcpy_chk.cpp \
-    bionic/__strlen_chk.cpp \
-    bionic/__strncat_chk.cpp \
-    bionic/__strncpy_chk.cpp \
     bionic/strsignal.cpp \
     bionic/stubs.cpp \
     bionic/sysconf.cpp \
     bionic/tdestroy.cpp \
     bionic/tmpfile.cpp \
-    bionic/__umask_chk.cpp \
-    bionic/__vsnprintf_chk.cpp \
-    bionic/__vsprintf_chk.cpp \
     bionic/wait.cpp \
     bionic/wchar.cpp \