Merge "Switch from using base64 BIOs to encoding funcs."
diff --git a/adb/Android.mk b/adb/Android.mk
index 80c1f9c..a82f026 100644
--- a/adb/Android.mk
+++ b/adb/Android.mk
@@ -85,6 +85,7 @@
 	LOCAL_STATIC_LIBRARIES += libcutils
 endif
 
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 include $(BUILD_HOST_EXECUTABLE)
 
 $(call dist-for-goals,dist_files sdk,$(LOCAL_BUILT_MODULE))
@@ -116,8 +117,13 @@
 	remount_service.c \
 	usb_linux_client.c
 
-LOCAL_CFLAGS := -O2 -g -DADB_HOST=0 -Wall -Wno-unused-parameter -Werror
-LOCAL_CFLAGS += -D_XOPEN_SOURCE -D_GNU_SOURCE
+LOCAL_CFLAGS := \
+	-O2 \
+	-g \
+	-DADB_HOST=0 \
+	-D_XOPEN_SOURCE \
+	-D_GNU_SOURCE \
+	-Wall -Wno-unused-parameter -Werror -Wno-deprecated-declarations \
 
 ifneq (,$(filter userdebug eng,$(TARGET_BUILD_VARIANT)))
 LOCAL_CFLAGS += -DALLOW_ADBD_ROOT=1
@@ -130,6 +136,7 @@
 LOCAL_UNSTRIPPED_PATH := $(TARGET_ROOT_OUT_SBIN_UNSTRIPPED)
 
 LOCAL_STATIC_LIBRARIES := liblog libcutils libc libmincrypt libselinux
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 include $(BUILD_EXECUTABLE)
 
 
@@ -172,5 +179,6 @@
 
 LOCAL_SHARED_LIBRARIES := libcrypto
 
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 include $(BUILD_EXECUTABLE)
 endif
diff --git a/adb/adb.c b/adb/adb.c
index 10a1e0d..cf8e3c4 100644
--- a/adb/adb.c
+++ b/adb/adb.c
@@ -329,6 +329,7 @@
 }
 #endif
 
+#if ADB_HOST
 static void send_msg_with_okay(int fd, const char* msg, size_t msglen) {
     char header[9];
     if (msglen > 0xffff)
@@ -337,6 +338,7 @@
     writex(fd, header, 8);
     writex(fd, msg, msglen);
 }
+#endif // ADB_HOST
 
 static void send_connect(atransport *t)
 {
@@ -414,6 +416,7 @@
     send_connect(t);
 }
 
+#if ADB_HOST
 static char *connection_state_name(atransport *t)
 {
     if (t == NULL) {
@@ -437,6 +440,7 @@
         return "unknown";
     }
 }
+#endif // ADB_HOST
 
 /* qual_overwrite is used to overwrite a qualifier string.  dst is a
  * pointer to a char pointer.  It is assumed that if *dst is non-NULL, it
@@ -1554,8 +1558,6 @@
 
 int handle_host_request(char *service, transport_type ttype, char* serial, int reply_fd, asocket *s)
 {
-    atransport *transport = NULL;
-
     if(!strcmp(service, "kill")) {
         fprintf(stderr,"adb server killed by remote request\n");
         fflush(stdout);
@@ -1565,6 +1567,7 @@
     }
 
 #if ADB_HOST
+    atransport *transport = NULL;
     // "transport:" is used for switching transport with a specified serial number
     // "transport-usb:" is used for switching transport to the only USB transport
     // "transport-local:" is used for switching transport to the only local transport
diff --git a/adb/adb.h b/adb/adb.h
index 4704abb..707a6e0 100644
--- a/adb/adb.h
+++ b/adb/adb.h
@@ -36,7 +36,7 @@
 #define ADB_VERSION_MAJOR 1         // Used for help/version information
 #define ADB_VERSION_MINOR 0         // Used for help/version information
 
-#define ADB_SERVER_VERSION    31    // Increment this when we want to force users to start a new adb server
+#define ADB_SERVER_VERSION    32    // Increment this when we want to force users to start a new adb server
 
 typedef struct amessage amessage;
 typedef struct apacket apacket;
diff --git a/adb/adb_client.c b/adb/adb_client.c
index 1e47486..eb1720d 100644
--- a/adb/adb_client.c
+++ b/adb/adb_client.c
@@ -279,7 +279,7 @@
 
     fd = _adb_connect(service);
     if(fd == -1) {
-        fprintf(stderr,"error: %s\n", __adb_error);
+        D("_adb_connect error: %s\n", __adb_error);
     } else if(fd == -2) {
         fprintf(stderr,"** daemon still not running\n");
     }
@@ -296,6 +296,7 @@
 {
     int fd = adb_connect(service);
     if(fd < 0) {
+        fprintf(stderr, "error: %s\n", adb_error());
         return -1;
     }
 
diff --git a/adb/commandline.c b/adb/commandline.c
index 356c0db..f4c2272 100644
--- a/adb/commandline.c
+++ b/adb/commandline.c
@@ -500,6 +500,115 @@
     return status;
 }
 
+#define SIDELOAD_HOST_BLOCK_SIZE (CHUNK_SIZE)
+
+/*
+ * The sideload-host protocol serves the data in a file (given on the
+ * command line) to the client, using a simple protocol:
+ *
+ * - The connect message includes the total number of bytes in the
+ *   file and a block size chosen by us.
+ *
+ * - The other side sends the desired block number as eight decimal
+ *   digits (eg "00000023" for block 23).  Blocks are numbered from
+ *   zero.
+ *
+ * - We send back the data of the requested block.  The last block is
+ *   likely to be partial; when the last block is requested we only
+ *   send the part of the block that exists, it's not padded up to the
+ *   block size.
+ *
+ * - When the other side sends "DONEDONE" instead of a block number,
+ *   we hang up.
+ */
+int adb_sideload_host(const char* fn) {
+    uint8_t* data;
+    unsigned sz;
+    size_t xfer = 0;
+    int status;
+
+    printf("loading: '%s'", fn);
+    fflush(stdout);
+    data = load_file(fn, &sz);
+    if (data == 0) {
+        printf("\n");
+        fprintf(stderr, "* cannot read '%s' *\n", fn);
+        return -1;
+    }
+
+    char buf[100];
+    sprintf(buf, "sideload-host:%d:%d", sz, SIDELOAD_HOST_BLOCK_SIZE);
+    int fd = adb_connect(buf);
+    if (fd < 0) {
+        // Try falling back to the older sideload method.  Maybe this
+        // is an older device that doesn't support sideload-host.
+        printf("\n");
+        status = adb_download_buffer("sideload", fn, data, sz, 1);
+        goto done;
+    }
+
+    int opt = SIDELOAD_HOST_BLOCK_SIZE;
+    opt = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (const void *) &opt, sizeof(opt));
+
+    int last_percent = -1;
+    for (;;) {
+        if (readx(fd, buf, 8)) {
+            fprintf(stderr, "* failed to read command: %s\n", adb_error());
+            status = -1;
+            goto done;
+        }
+
+        if (strncmp("DONEDONE", buf, 8) == 0) {
+            status = 0;
+            break;
+        }
+
+        buf[8] = '\0';
+        int block = strtol(buf, NULL, 10);
+
+        size_t offset = block * SIDELOAD_HOST_BLOCK_SIZE;
+        if (offset >= sz) {
+            fprintf(stderr, "* attempt to read past end: %s\n", adb_error());
+            status = -1;
+            goto done;
+        }
+        uint8_t* start = data + offset;
+        size_t offset_end = offset + SIDELOAD_HOST_BLOCK_SIZE;
+        size_t to_write = SIDELOAD_HOST_BLOCK_SIZE;
+        if (offset_end > sz) {
+            to_write = sz - offset;
+        }
+
+        if(writex(fd, start, to_write)) {
+            adb_status(fd);
+            fprintf(stderr,"* failed to write data '%s' *\n", adb_error());
+            status = -1;
+            goto done;
+        }
+        xfer += to_write;
+
+        // For normal OTA packages, we expect to transfer every byte
+        // twice, plus a bit of overhead (one read during
+        // verification, one read of each byte for installation, plus
+        // extra access to things like the zip central directory).
+        // This estimate of the completion becomes 100% when we've
+        // transferred ~2.13 (=100/47) times the package size.
+        int percent = (int)(xfer * 47LL / (sz ? sz : 1));
+        if (percent != last_percent) {
+            printf("\rserving: '%s'  (~%d%%)    ", fn, percent);
+            fflush(stdout);
+            last_percent = percent;
+        }
+    }
+
+    printf("\rTotal xfer: %.2fx%*s\n", (double)xfer / (sz ? sz : 1), (int)strlen(fn)+10, "");
+
+  done:
+    if (fd >= 0) adb_close(fd);
+    free(data);
+    return status;
+}
+
 static void status_window(transport_type ttype, const char* serial)
 {
     char command[4096];
@@ -1290,7 +1399,7 @@
 
     if(!strcmp(argv[0], "sideload")) {
         if(argc != 2) return usage();
-        if(adb_download("sideload", argv[1], 1)) {
+        if (adb_sideload_host(argv[1])) {
             return 1;
         } else {
             return 0;
diff --git a/adb/usb_osx.c b/adb/usb_osx.c
index 5efb23b..ee893f5 100644
--- a/adb/usb_osx.c
+++ b/adb/usb_osx.c
@@ -513,14 +513,18 @@
         return -1;
     }
 
-    result =
-      (*handle->interface)->ReadPipe(handle->interface,
-                                    handle->bulkIn, buf, &numBytes);
+    result = (*handle->interface)->ReadPipe(handle->interface, handle->bulkIn, buf, &numBytes);
 
-    if (0 == result)
+    if (kIOUSBPipeStalled == result) {
+        DBG(" Pipe stalled, clearing stall.\n");
+        (*handle->interface)->ClearPipeStall(handle->interface, handle->bulkIn);
+        result = (*handle->interface)->ReadPipe(handle->interface, handle->bulkIn, buf, &numBytes);
+    }
+
+    if (kIOReturnSuccess == result)
         return 0;
     else {
-        DBG("ERR: usb_read failed with status %d\n", result);
+        DBG("ERR: usb_read failed with status %x\n", result);
     }
 
     return -1;
diff --git a/adb/usb_vendors.c b/adb/usb_vendors.c
index 6288155..1b8310f 100755
--- a/adb/usb_vendors.c
+++ b/adb/usb_vendors.c
@@ -38,6 +38,8 @@
 /* Keep the list below sorted alphabetically by #define name */
 // Acer's USB Vendor ID
 #define VENDOR_ID_ACER          0x0502
+// Alco's  USB Vendor ID
+#define VENDOR_ID_ALCO          0x1914
 // Allwinner's USB Vendor ID
 #define VENDOR_ID_ALLWINNER     0x1F3A
 // Amlogic's USB Vendor ID
@@ -118,6 +120,8 @@
 #define VENDOR_ID_LGE           0x1004
 // Lumigon's USB Vendor ID
 #define VENDOR_ID_LUMIGON       0x25E3
+// Micromax's USB Vendor ID
+#define VENDOR_ID_MICROMAX      0x2A96
 // Motorola's USB Vendor ID
 #define VENDOR_ID_MOTOROLA      0x22b8
 // MSI's USB Vendor ID
@@ -200,6 +204,7 @@
 /* Keep the list below sorted alphabetically */
 int builtInVendorIds[] = {
     VENDOR_ID_ACER,
+    VENDOR_ID_ALCO,
     VENDOR_ID_ALLWINNER,
     VENDOR_ID_AMLOGIC,
     VENDOR_ID_ANYDATA,
@@ -240,6 +245,7 @@
     VENDOR_ID_LENOVOMOBILE,
     VENDOR_ID_LGE,
     VENDOR_ID_LUMIGON,
+    VENDOR_ID_MICROMAX,
     VENDOR_ID_MOTOROLA,
     VENDOR_ID_MSI,
     VENDOR_ID_MTK,
diff --git a/fastbootd/Android.mk b/fastbootd/Android.mk
index 6aa7400..bccac68 100644
--- a/fastbootd/Android.mk
+++ b/fastbootd/Android.mk
@@ -42,7 +42,7 @@
 
 LOCAL_MODULE := fastbootd
 LOCAL_MODULE_TAGS := optional
-LOCAL_CFLAGS := -Wall -Werror -Wno-unused-parameter -DFLASH_CERT
+LOCAL_CFLAGS := -Wall -Werror -Wno-unused-parameter -Wno-deprecated-declarations -DFLASH_CERT
 LOCAL_LDFLAGS := -ldl
 
 LOCAL_STATIC_LIBRARIES := \
diff --git a/fastbootd/commands/partitions.c b/fastbootd/commands/partitions.c
index 74232e6..f2c9da7 100644
--- a/fastbootd/commands/partitions.c
+++ b/fastbootd/commands/partitions.c
@@ -547,7 +547,8 @@
 int GPT_parse_entry(char *string, struct GPT_entry_raw *entry)
 {
     char *ptr = string;
-    char *key, *value;
+    char *key = NULL;
+    char *value = NULL;
 
     while ((ptr = get_key_value(ptr, &key, &value)) != NULL) {
         if (add_key_value(key, value, entry)) {
diff --git a/fastbootd/vendor_trigger_default.c b/fastbootd/vendor_trigger_default.c
index 3627024..0bcc99b 100644
--- a/fastbootd/vendor_trigger_default.c
+++ b/fastbootd/vendor_trigger_default.c
@@ -52,7 +52,7 @@
     return 0;
 }
 
-int trigger_oem_cmd(const char *arg, const char **response) {
+int trigger_oem_cmd(const char *arg, const char **response __unused) {
     KLOG_DEBUG("fastbootd", "%s: %s", __func__, arg);
     return 0;
 }
diff --git a/fs_mgr/fs_mgr_verity.c b/fs_mgr/fs_mgr_verity.c
index c9a2a9b..1d2e43f 100644
--- a/fs_mgr/fs_mgr_verity.c
+++ b/fs_mgr/fs_mgr_verity.c
@@ -120,7 +120,9 @@
 {
     int data_device;
     struct ext4_super_block sb;
-    struct fs_info info = {0};
+    struct fs_info info;
+
+    info.len = 0;  /* Only len is set to 0 to ask the device for real size. */
 
     data_device = open(blk_device, O_RDONLY);
     if (data_device < 0) {
diff --git a/include/cutils/debugger.h b/include/cutils/debugger.h
index ae6bfc4..4bcc8e6 100644
--- a/include/cutils/debugger.h
+++ b/include/cutils/debugger.h
@@ -23,10 +23,13 @@
 extern "C" {
 #endif
 
-#if __LP64__
-#define DEBUGGER_SOCKET_NAME "android:debuggerd64"
+#define DEBUGGER32_SOCKET_NAME "android:debuggerd"
+#define DEBUGGER64_SOCKET_NAME "android:debuggerd64"
+
+#if defined(__LP64__)
+#define DEBUGGER_SOCKET_NAME DEBUGGER64_SOCKET_NAME
 #else
-#define DEBUGGER_SOCKET_NAME "android:debuggerd"
+#define DEBUGGER_SOCKET_NAME DEBUGGER32_SOCKET_NAME
 #endif
 
 typedef enum {
@@ -45,6 +48,16 @@
     int32_t original_si_code;
 } debugger_msg_t;
 
+#if defined(__LP64__)
+// For a 64 bit process to contact the 32 bit debuggerd.
+typedef struct {
+    debugger_action_t action;
+    pid_t tid;
+    uint32_t abort_msg_address;
+    int32_t original_si_code;
+} debugger32_msg_t;
+#endif
+
 /* Dumps a process backtrace, registers, and stack to a tombstone file (requires root).
  * Stores the tombstone path in the provided buffer.
  * Returns 0 on success, -1 on error.
diff --git a/include/log/logprint.h b/include/log/logprint.h
index 481c96e..1e42b47 100644
--- a/include/log/logprint.h
+++ b/include/log/logprint.h
@@ -36,6 +36,7 @@
     FORMAT_TIME,
     FORMAT_THREADTIME,
     FORMAT_LONG,
+    FORMAT_COLOR,
 } AndroidLogPrintFormat;
 
 typedef struct AndroidLogFormat_t AndroidLogFormat;
diff --git a/include/nativebridge/native_bridge.h b/include/nativebridge/native_bridge.h
index c588bbc..ac254e9 100644
--- a/include/nativebridge/native_bridge.h
+++ b/include/nativebridge/native_bridge.h
@@ -19,19 +19,40 @@
 
 #include "jni.h"
 #include <stdint.h>
+#include <sys/types.h>
 
 namespace android {
 
 struct NativeBridgeRuntimeCallbacks;
+struct NativeBridgeRuntimeValues;
 
-// Initialize the native bridge, if any. Should be called by Runtime::Init().
-// A null library filename signals that we do not want to load a native bridge.
-void SetupNativeBridge(const char* native_bridge_library_filename,
-                       const NativeBridgeRuntimeCallbacks* runtime_callbacks);
+// Open the native bridge, if any. Should be called by Runtime::Init(). A null library filename
+// signals that we do not want to load a native bridge.
+bool LoadNativeBridge(const char* native_bridge_library_filename,
+                      const NativeBridgeRuntimeCallbacks* runtime_callbacks);
+
+// Quick check whether a native bridge will be needed. This is based off of the instruction set
+// of the process.
+bool NeedsNativeBridge(const char* instruction_set);
+
+// Do the early initialization part of the native bridge, if necessary. This should be done under
+// high privileges.
+void PreInitializeNativeBridge(const char* app_data_dir, const char* instruction_set);
+
+// Initialize the native bridge, if any. Should be called by Runtime::DidForkFromZygote. The JNIEnv*
+// will be used to modify the app environment for the bridge.
+bool InitializeNativeBridge(JNIEnv* env, const char* instruction_set);
+
+// Unload the native bridge, if any. Should be called by Runtime::DidForkFromZygote.
+void UnloadNativeBridge();
+
+// Check whether a native bridge is available (opened or initialized). Requires a prior call to
+// LoadNativeBridge.
+bool NativeBridgeAvailable();
 
 // Check whether a native bridge is available (initialized). Requires a prior call to
-// SetupNativeBridge to make sense.
-bool NativeBridgeAvailable();
+// LoadNativeBridge & InitializeNativeBridge.
+bool NativeBridgeInitialized();
 
 // Load a shared library that is supported by the native bridge.
 void* NativeBridgeLoadLibrary(const char* libpath, int flag);
@@ -55,6 +76,9 @@
 
 // Native bridge interfaces to runtime.
 struct NativeBridgeCallbacks {
+  // Version number of the interface.
+  uint32_t version;
+
   // Initialize native bridge. Native bridge's internal implementation must ensure MT safety and
   // that the native bridge is initialized only once. Thus it is OK to call this interface for an
   // already initialized native bridge.
@@ -63,7 +87,8 @@
   //   runtime_cbs [IN] the pointer to NativeBridgeRuntimeCallbacks.
   // Returns:
   //   true iff initialization was successful.
-  bool (*initialize)(const NativeBridgeRuntimeCallbacks* runtime_cbs);
+  bool (*initialize)(const NativeBridgeRuntimeCallbacks* runtime_cbs, const char* private_dir,
+                     const char* instruction_set);
 
   // Load a shared library that is supported by the native bridge.
   //
@@ -92,6 +117,16 @@
   // Returns:
   //   TRUE if library is supported by native bridge, FALSE otherwise
   bool (*isSupported)(const char* libpath);
+
+  // Provide environment values required by the app running with native bridge according to the
+  // instruction set.
+  //
+  // Parameters:
+  //    instruction_set [IN] the instruction set of the app
+  // Returns:
+  //    NULL if not supported by native bridge.
+  //    Otherwise, return all environment values to be set after fork.
+  const struct NativeBridgeRuntimeValues* (*getAppEnv)(const char* instruction_set);
 };
 
 // Runtime interfaces to native bridge.
diff --git a/include/utils/Compat.h b/include/utils/Compat.h
index fb7748e..20a6920 100644
--- a/include/utils/Compat.h
+++ b/include/utils/Compat.h
@@ -19,11 +19,9 @@
 
 #include <unistd.h>
 
-/* Compatibility definitions for non-Linux (i.e., BSD-based) hosts. */
-#ifndef HAVE_OFF64_T
-#if _FILE_OFFSET_BITS < 64
-#error "_FILE_OFFSET_BITS < 64; large files are not supported on this platform"
-#endif /* _FILE_OFFSET_BITS < 64 */
+#if defined(__APPLE__)
+
+/* Mac OS has always had a 64-bit off_t, so it doesn't have off64_t. */
 
 typedef off_t off64_t;
 
@@ -31,13 +29,11 @@
     return lseek(fd, offset, whence);
 }
 
-#ifdef HAVE_PREAD
 static inline ssize_t pread64(int fd, void* buf, size_t nbytes, off64_t offset) {
     return pread(fd, buf, nbytes, offset);
 }
-#endif
 
-#endif /* !HAVE_OFF64_T */
+#endif /* __APPLE__ */
 
 #if HAVE_PRINTF_ZD
 #  define ZD "%zd"
diff --git a/init/util.c b/init/util.c
index 0f69e1c..e1a3ee3 100644
--- a/init/util.c
+++ b/init/util.c
@@ -329,9 +329,9 @@
     if (!s)
         return;
 
-    for (; *s; s++) {
+    while (*s) {
         s += strspn(s, accept);
-        if (*s) *s = '_';
+        if (*s) *s++ = '_';
     }
 }
 
diff --git a/libcutils/Android.mk b/libcutils/Android.mk
index 12a3bf9..469cdcf 100644
--- a/libcutils/Android.mk
+++ b/libcutils/Android.mk
@@ -140,8 +140,8 @@
         arch-x86/android_memset32.S \
 
 LOCAL_SRC_FILES_x86_64 += \
-        arch-x86_64/android_memset16_SSE2-atom.S \
-        arch-x86_64/android_memset32_SSE2-atom.S \
+        arch-x86_64/android_memset16.S \
+        arch-x86_64/android_memset32.S \
 
 LOCAL_CFLAGS_arm += -DHAVE_MEMSET16 -DHAVE_MEMSET32
 LOCAL_CFLAGS_arm64 += -DHAVE_MEMSET16 -DHAVE_MEMSET32
@@ -151,7 +151,7 @@
 
 LOCAL_C_INCLUDES := $(libcutils_c_includes)
 LOCAL_STATIC_LIBRARIES := liblog
-LOCAL_CFLAGS += $(targetSmpFlag) -Werror
+LOCAL_CFLAGS += $(targetSmpFlag) -Werror -std=gnu90
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 include $(BUILD_STATIC_LIBRARY)
 
diff --git a/libcutils/arch-x86/android_memset16.S b/libcutils/arch-x86/android_memset16.S
old mode 100644
new mode 100755
index f8b79bd..cb2ff14
--- a/libcutils/arch-x86/android_memset16.S
+++ b/libcutils/arch-x86/android_memset16.S
@@ -13,13 +13,707 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-/*
- * Contributed by: Intel Corporation
- */
 
-# include "cache_wrapper.S"
-# undef __i686
-# define USE_AS_ANDROID
-# define sse2_memset16_atom android_memset16
-# include "sse2-memset16-atom.S"
+#include "cache.h"
 
+#ifndef MEMSET
+# define MEMSET		android_memset16
+#endif
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef ALIGN
+# define ALIGN(n)	.p2align n
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc			.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc			.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)		.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)			\
+	.type name,  @function; 	\
+	.globl name;			\
+	.p2align 4;			\
+name:					\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)			\
+	cfi_endproc;			\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#ifdef USE_AS_BZERO16
+# define DEST		PARMS
+# define LEN		DEST+4
+# define SETRTNVAL
+#else
+# define DEST		PARMS
+# define CHR		DEST+4
+# define LEN		CHR+4
+# define SETRTNVAL	movl DEST(%esp), %eax
+#endif
+
+#if (defined SHARED || defined __PIC__)
+# define ENTRANCE	PUSH (%ebx);
+# define RETURN_END	POP (%ebx); ret
+# define RETURN		RETURN_END; CFI_PUSH (%ebx)
+# define PARMS		8		/* Preserve EBX.  */
+# define JMPTBL(I, B)	I - B
+
+/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
+   jump table with relative offsets.   */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
+    /* We first load PC into EBX.  */				\
+    call	__x86.get_pc_thunk.bx;				\
+    /* Get the address of the jump table.  */			\
+    add		$(TABLE - .), %ebx;				\
+    /* Get the entry and convert the relative offset to the	\
+       absolute address.  */					\
+    add		(%ebx,%ecx,4), %ebx;				\
+    /* We loaded the jump table and adjuested EDX. Go.  */	\
+    jmp		*%ebx
+
+	.section	.gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
+	.globl	__x86.get_pc_thunk.bx
+	.hidden	__x86.get_pc_thunk.bx
+	ALIGN (4)
+	.type	__x86.get_pc_thunk.bx,@function
+__x86.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+#else
+# define ENTRANCE
+# define RETURN_END	ret
+# define RETURN		RETURN_END
+# define PARMS		4
+# define JMPTBL(I, B)	I
+
+/* Branch to an entry in a jump table.  TABLE is a jump table with
+   absolute offsets.  */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
+    jmp		*TABLE(,%ecx,4)
+#endif
+
+	.section .text.sse2,"ax",@progbits
+	ALIGN (4)
+ENTRY (MEMSET)
+	ENTRANCE
+
+	movl	LEN(%esp), %ecx
+	shr	$1, %ecx
+#ifdef USE_AS_BZERO16
+	xor	%eax, %eax
+#else
+	movzwl	CHR(%esp), %eax
+	mov	%eax, %edx
+	shl	$16, %eax
+	or	%edx, %eax
+#endif
+	movl	DEST(%esp), %edx
+	cmp	$32, %ecx
+	jae	L(32wordsormore)
+
+L(write_less32words):
+	lea	(%edx, %ecx, 2), %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_less32words))
+
+
+	.pushsection .rodata.sse2,"a",@progbits
+	ALIGN (2)
+L(table_less32words):
+	.int	JMPTBL (L(write_0words), L(table_less32words))
+	.int	JMPTBL (L(write_1words), L(table_less32words))
+	.int	JMPTBL (L(write_2words), L(table_less32words))
+	.int	JMPTBL (L(write_3words), L(table_less32words))
+	.int	JMPTBL (L(write_4words), L(table_less32words))
+	.int	JMPTBL (L(write_5words), L(table_less32words))
+	.int	JMPTBL (L(write_6words), L(table_less32words))
+	.int	JMPTBL (L(write_7words), L(table_less32words))
+	.int	JMPTBL (L(write_8words), L(table_less32words))
+	.int	JMPTBL (L(write_9words), L(table_less32words))
+	.int	JMPTBL (L(write_10words), L(table_less32words))
+	.int	JMPTBL (L(write_11words), L(table_less32words))
+	.int	JMPTBL (L(write_12words), L(table_less32words))
+	.int	JMPTBL (L(write_13words), L(table_less32words))
+	.int	JMPTBL (L(write_14words), L(table_less32words))
+	.int	JMPTBL (L(write_15words), L(table_less32words))
+	.int	JMPTBL (L(write_16words), L(table_less32words))
+	.int	JMPTBL (L(write_17words), L(table_less32words))
+	.int	JMPTBL (L(write_18words), L(table_less32words))
+	.int	JMPTBL (L(write_19words), L(table_less32words))
+	.int	JMPTBL (L(write_20words), L(table_less32words))
+	.int	JMPTBL (L(write_21words), L(table_less32words))
+	.int	JMPTBL (L(write_22words), L(table_less32words))
+	.int	JMPTBL (L(write_23words), L(table_less32words))
+	.int	JMPTBL (L(write_24words), L(table_less32words))
+	.int	JMPTBL (L(write_25words), L(table_less32words))
+	.int	JMPTBL (L(write_26words), L(table_less32words))
+	.int	JMPTBL (L(write_27words), L(table_less32words))
+	.int	JMPTBL (L(write_28words), L(table_less32words))
+	.int	JMPTBL (L(write_29words), L(table_less32words))
+	.int	JMPTBL (L(write_30words), L(table_less32words))
+	.int	JMPTBL (L(write_31words), L(table_less32words))
+	.popsection
+
+	ALIGN (4)
+L(write_28words):
+	movl	%eax, -56(%edx)
+	movl	%eax, -52(%edx)
+L(write_24words):
+	movl	%eax, -48(%edx)
+	movl	%eax, -44(%edx)
+L(write_20words):
+	movl	%eax, -40(%edx)
+	movl	%eax, -36(%edx)
+L(write_16words):
+	movl	%eax, -32(%edx)
+	movl	%eax, -28(%edx)
+L(write_12words):
+	movl	%eax, -24(%edx)
+	movl	%eax, -20(%edx)
+L(write_8words):
+	movl	%eax, -16(%edx)
+	movl	%eax, -12(%edx)
+L(write_4words):
+	movl	%eax, -8(%edx)
+	movl	%eax, -4(%edx)
+L(write_0words):
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(write_29words):
+	movl	%eax, -58(%edx)
+	movl	%eax, -54(%edx)
+L(write_25words):
+	movl	%eax, -50(%edx)
+	movl	%eax, -46(%edx)
+L(write_21words):
+	movl	%eax, -42(%edx)
+	movl	%eax, -38(%edx)
+L(write_17words):
+	movl	%eax, -34(%edx)
+	movl	%eax, -30(%edx)
+L(write_13words):
+	movl	%eax, -26(%edx)
+	movl	%eax, -22(%edx)
+L(write_9words):
+	movl	%eax, -18(%edx)
+	movl	%eax, -14(%edx)
+L(write_5words):
+	movl	%eax, -10(%edx)
+	movl	%eax, -6(%edx)
+L(write_1words):
+	mov	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(write_30words):
+	movl	%eax, -60(%edx)
+	movl	%eax, -56(%edx)
+L(write_26words):
+	movl	%eax, -52(%edx)
+	movl	%eax, -48(%edx)
+L(write_22words):
+	movl	%eax, -44(%edx)
+	movl	%eax, -40(%edx)
+L(write_18words):
+	movl	%eax, -36(%edx)
+	movl	%eax, -32(%edx)
+L(write_14words):
+	movl	%eax, -28(%edx)
+	movl	%eax, -24(%edx)
+L(write_10words):
+	movl	%eax, -20(%edx)
+	movl	%eax, -16(%edx)
+L(write_6words):
+	movl	%eax, -12(%edx)
+	movl	%eax, -8(%edx)
+L(write_2words):
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(write_31words):
+	movl	%eax, -62(%edx)
+	movl	%eax, -58(%edx)
+L(write_27words):
+	movl	%eax, -54(%edx)
+	movl	%eax, -50(%edx)
+L(write_23words):
+	movl	%eax, -46(%edx)
+	movl	%eax, -42(%edx)
+L(write_19words):
+	movl	%eax, -38(%edx)
+	movl	%eax, -34(%edx)
+L(write_15words):
+	movl	%eax, -30(%edx)
+	movl	%eax, -26(%edx)
+L(write_11words):
+	movl	%eax, -22(%edx)
+	movl	%eax, -18(%edx)
+L(write_7words):
+	movl	%eax, -14(%edx)
+	movl	%eax, -10(%edx)
+L(write_3words):
+	movl	%eax, -6(%edx)
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+
+L(32wordsormore):
+	shl	$1, %ecx
+	test	$0x01, %edx
+	jz	L(aligned2bytes)
+	mov	%eax, (%edx)
+	mov	%eax, -4(%edx, %ecx)
+	sub	$2, %ecx
+	add	$1, %edx
+	rol	$8, %eax
+L(aligned2bytes):
+#ifdef USE_AS_BZERO16
+	pxor	%xmm0, %xmm0
+#else
+	movd	%eax, %xmm0
+	pshufd	$0, %xmm0, %xmm0
+#endif
+	testl	$0xf, %edx
+	jz	L(aligned_16)
+/* ECX > 32 and EDX is not 16 byte aligned.  */
+L(not_aligned_16):
+	movdqu	%xmm0, (%edx)
+	movl	%edx, %eax
+	and	$-16, %edx
+	add	$16, %edx
+	sub	%edx, %eax
+	add	%eax, %ecx
+	movd	%xmm0, %eax
+
+	ALIGN (4)
+L(aligned_16):
+	cmp	$128, %ecx
+	jae	L(128bytesormore)
+
+L(aligned_16_less128bytes):
+	add	%ecx, %edx
+	shr	$1, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	ALIGN (4)
+L(128bytesormore):
+#ifdef SHARED_CACHE_SIZE
+	PUSH (%ebx)
+	mov	$SHARED_CACHE_SIZE, %ebx
+#else
+# if (defined SHARED || defined __PIC__)
+	call	__x86.get_pc_thunk.bx
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
+# else
+	PUSH (%ebx)
+	mov	__x86_shared_cache_size, %ebx
+# endif
+#endif
+	cmp	%ebx, %ecx
+	jae	L(128bytesormore_nt_start)
+
+
+#ifdef DATA_CACHE_SIZE
+	POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+	cmp	$DATA_CACHE_SIZE, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+#  define RESTORE_EBX_STATE
+	call	__x86.get_pc_thunk.bx
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
+# else
+	POP (%ebx)
+#  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+	cmp	__x86_data_cache_size, %ecx
+# endif
+#endif
+
+	jae	L(128bytes_L2_normal)
+	subl	$128, %ecx
+L(128bytesormore_normal):
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	lea	128(%edx), %edx
+	jb	L(128bytesless_normal)
+
+
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	lea	128(%edx), %edx
+	jae	L(128bytesormore_normal)
+
+L(128bytesless_normal):
+	lea	128(%ecx), %ecx
+	add	%ecx, %edx
+	shr	$1, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	ALIGN (4)
+L(128bytes_L2_normal):
+	prefetcht0	0x380(%edx)
+	prefetcht0	0x3c0(%edx)
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movaps	%xmm0, 0x10(%edx)
+	movaps	%xmm0, 0x20(%edx)
+	movaps	%xmm0, 0x30(%edx)
+	movaps	%xmm0, 0x40(%edx)
+	movaps	%xmm0, 0x50(%edx)
+	movaps	%xmm0, 0x60(%edx)
+	movaps	%xmm0, 0x70(%edx)
+	add	$128, %edx
+	cmp	$128, %ecx
+	jae	L(128bytes_L2_normal)
+
+L(128bytesless_L2_normal):
+	add	%ecx, %edx
+	shr	$1, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	RESTORE_EBX_STATE
+L(128bytesormore_nt_start):
+	sub	%ebx, %ecx
+	mov	%ebx, %eax
+	and	$0x7f, %eax
+	add	%eax, %ecx
+	movd	%xmm0, %eax
+	ALIGN (4)
+L(128bytesormore_shared_cache_loop):
+	prefetcht0	0x3c0(%edx)
+	prefetcht0	0x380(%edx)
+	sub	$0x80, %ebx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	add	$0x80, %edx
+	cmp	$0x80, %ebx
+	jae	L(128bytesormore_shared_cache_loop)
+	cmp	$0x80, %ecx
+	jb	L(shared_cache_loop_end)
+	ALIGN (4)
+L(128bytesormore_nt):
+	sub	$0x80, %ecx
+	movntdq	%xmm0, (%edx)
+	movntdq	%xmm0, 0x10(%edx)
+	movntdq	%xmm0, 0x20(%edx)
+	movntdq	%xmm0, 0x30(%edx)
+	movntdq	%xmm0, 0x40(%edx)
+	movntdq	%xmm0, 0x50(%edx)
+	movntdq	%xmm0, 0x60(%edx)
+	movntdq	%xmm0, 0x70(%edx)
+	add	$0x80, %edx
+	cmp	$0x80, %ecx
+	jae	L(128bytesormore_nt)
+	sfence
+L(shared_cache_loop_end):
+#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
+	POP (%ebx)
+#endif
+	add	%ecx, %edx
+	shr	$1, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+
+	.pushsection .rodata.sse2,"a",@progbits
+	ALIGN (2)
+L(table_16_128bytes):
+	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
+	.popsection
+
+
+	ALIGN (4)
+L(aligned_16_112bytes):
+	movdqa	%xmm0, -112(%edx)
+L(aligned_16_96bytes):
+	movdqa	%xmm0, -96(%edx)
+L(aligned_16_80bytes):
+	movdqa	%xmm0, -80(%edx)
+L(aligned_16_64bytes):
+	movdqa	%xmm0, -64(%edx)
+L(aligned_16_48bytes):
+	movdqa	%xmm0, -48(%edx)
+L(aligned_16_32bytes):
+	movdqa	%xmm0, -32(%edx)
+L(aligned_16_16bytes):
+	movdqa	%xmm0, -16(%edx)
+L(aligned_16_0bytes):
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_114bytes):
+	movdqa	%xmm0, -114(%edx)
+L(aligned_16_98bytes):
+	movdqa	%xmm0, -98(%edx)
+L(aligned_16_82bytes):
+	movdqa	%xmm0, -82(%edx)
+L(aligned_16_66bytes):
+	movdqa	%xmm0, -66(%edx)
+L(aligned_16_50bytes):
+	movdqa	%xmm0, -50(%edx)
+L(aligned_16_34bytes):
+	movdqa	%xmm0, -34(%edx)
+L(aligned_16_18bytes):
+	movdqa	%xmm0, -18(%edx)
+L(aligned_16_2bytes):
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(aligned_16_116bytes):
+	movdqa	%xmm0, -116(%edx)
+L(aligned_16_100bytes):
+	movdqa	%xmm0, -100(%edx)
+L(aligned_16_84bytes):
+	movdqa	%xmm0, -84(%edx)
+L(aligned_16_68bytes):
+	movdqa	%xmm0, -68(%edx)
+L(aligned_16_52bytes):
+	movdqa	%xmm0, -52(%edx)
+L(aligned_16_36bytes):
+	movdqa	%xmm0, -36(%edx)
+L(aligned_16_20bytes):
+	movdqa	%xmm0, -20(%edx)
+L(aligned_16_4bytes):
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_118bytes):
+	movdqa	%xmm0, -118(%edx)
+L(aligned_16_102bytes):
+	movdqa	%xmm0, -102(%edx)
+L(aligned_16_86bytes):
+	movdqa	%xmm0, -86(%edx)
+L(aligned_16_70bytes):
+	movdqa	%xmm0, -70(%edx)
+L(aligned_16_54bytes):
+	movdqa	%xmm0, -54(%edx)
+L(aligned_16_38bytes):
+	movdqa	%xmm0, -38(%edx)
+L(aligned_16_22bytes):
+	movdqa	%xmm0, -22(%edx)
+L(aligned_16_6bytes):
+	movl	%eax, -6(%edx)
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_120bytes):
+	movdqa	%xmm0, -120(%edx)
+L(aligned_16_104bytes):
+	movdqa	%xmm0, -104(%edx)
+L(aligned_16_88bytes):
+	movdqa	%xmm0, -88(%edx)
+L(aligned_16_72bytes):
+	movdqa	%xmm0, -72(%edx)
+L(aligned_16_56bytes):
+	movdqa	%xmm0, -56(%edx)
+L(aligned_16_40bytes):
+	movdqa	%xmm0, -40(%edx)
+L(aligned_16_24bytes):
+	movdqa	%xmm0, -24(%edx)
+L(aligned_16_8bytes):
+	movq	%xmm0, -8(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_122bytes):
+	movdqa	%xmm0, -122(%edx)
+L(aligned_16_106bytes):
+	movdqa	%xmm0, -106(%edx)
+L(aligned_16_90bytes):
+	movdqa	%xmm0, -90(%edx)
+L(aligned_16_74bytes):
+	movdqa	%xmm0, -74(%edx)
+L(aligned_16_58bytes):
+	movdqa	%xmm0, -58(%edx)
+L(aligned_16_42bytes):
+	movdqa	%xmm0, -42(%edx)
+L(aligned_16_26bytes):
+	movdqa	%xmm0, -26(%edx)
+L(aligned_16_10bytes):
+	movq	%xmm0, -10(%edx)
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_124bytes):
+	movdqa	%xmm0, -124(%edx)
+L(aligned_16_108bytes):
+	movdqa	%xmm0, -108(%edx)
+L(aligned_16_92bytes):
+	movdqa	%xmm0, -92(%edx)
+L(aligned_16_76bytes):
+	movdqa	%xmm0, -76(%edx)
+L(aligned_16_60bytes):
+	movdqa	%xmm0, -60(%edx)
+L(aligned_16_44bytes):
+	movdqa	%xmm0, -44(%edx)
+L(aligned_16_28bytes):
+	movdqa	%xmm0, -28(%edx)
+L(aligned_16_12bytes):
+	movq	%xmm0, -12(%edx)
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+
+	ALIGN (4)
+L(aligned_16_126bytes):
+	movdqa	%xmm0, -126(%edx)
+L(aligned_16_110bytes):
+	movdqa	%xmm0, -110(%edx)
+L(aligned_16_94bytes):
+	movdqa	%xmm0, -94(%edx)
+L(aligned_16_78bytes):
+	movdqa	%xmm0, -78(%edx)
+L(aligned_16_62bytes):
+	movdqa	%xmm0, -62(%edx)
+L(aligned_16_46bytes):
+	movdqa	%xmm0, -46(%edx)
+L(aligned_16_30bytes):
+	movdqa	%xmm0, -30(%edx)
+L(aligned_16_14bytes):
+	movq	%xmm0, -14(%edx)
+	movl	%eax, -6(%edx)
+	movw	%ax, -2(%edx)
+	SETRTNVAL
+	RETURN
+
+END (MEMSET)
diff --git a/libcutils/arch-x86/android_memset32.S b/libcutils/arch-x86/android_memset32.S
old mode 100644
new mode 100755
index 6249fce..f4326dc
--- a/libcutils/arch-x86/android_memset32.S
+++ b/libcutils/arch-x86/android_memset32.S
@@ -13,13 +13,498 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-/*
- * Contributed by: Intel Corporation
- */
 
-# include "cache_wrapper.S"
-# undef __i686
-# define USE_AS_ANDROID
-# define sse2_memset32_atom android_memset32
-# include "sse2-memset32-atom.S"
+#include "cache.h"
 
+#ifndef MEMSET
+# define MEMSET 	android_memset32
+#endif
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef ALIGN
+# define ALIGN(n)	.p2align n
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc			.cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc			.cfi_endproc
+#endif
+
+#ifndef cfi_rel_offset
+# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
+#endif
+
+#ifndef cfi_restore
+# define cfi_restore(reg)		.cfi_restore reg
+#endif
+
+#ifndef cfi_adjust_cfa_offset
+# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name)			\
+	.type name,  @function; 	\
+	.globl name;			\
+	.p2align 4;			\
+name:					\
+	cfi_startproc
+#endif
+
+#ifndef END
+# define END(name)			\
+	cfi_endproc;			\
+	.size name, .-name
+#endif
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#ifdef USE_AS_BZERO32
+# define DEST		PARMS
+# define LEN		DEST+4
+# define SETRTNVAL
+#else
+# define DEST		PARMS
+# define DWDS		DEST+4
+# define LEN		DWDS+4
+# define SETRTNVAL	movl DEST(%esp), %eax
+#endif
+
+#if (defined SHARED || defined __PIC__)
+# define ENTRANCE	PUSH (%ebx);
+# define RETURN_END	POP (%ebx); ret
+# define RETURN		RETURN_END; CFI_PUSH (%ebx)
+# define PARMS		8		/* Preserve EBX.  */
+# define JMPTBL(I, B)	I - B
+
+/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
+   jump table with relative offsets.   */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
+    /* We first load PC into EBX.  */				\
+    call	__x86.get_pc_thunk.bx;				\
+    /* Get the address of the jump table.  */			\
+    add		$(TABLE - .), %ebx;				\
+    /* Get the entry and convert the relative offset to the	\
+       absolute address.  */					\
+    add		(%ebx,%ecx,4), %ebx;				\
+    /* We loaded the jump table and adjuested EDX. Go.  */	\
+    jmp		*%ebx
+
+	.section	.gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
+	.globl	__x86.get_pc_thunk.bx
+	.hidden	__x86.get_pc_thunk.bx
+	ALIGN (4)
+	.type	__x86.get_pc_thunk.bx,@function
+__x86.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+#else
+# define ENTRANCE
+# define RETURN_END	ret
+# define RETURN		RETURN_END
+# define PARMS		4
+# define JMPTBL(I, B)	I
+
+/* Branch to an entry in a jump table.  TABLE is a jump table with
+   absolute offsets.  */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
+    jmp		*TABLE(,%ecx,4)
+#endif
+
+	.section .text.sse2,"ax",@progbits
+	ALIGN (4)
+ENTRY (MEMSET)
+	ENTRANCE
+
+	movl	LEN(%esp), %ecx
+	shr     $2, %ecx
+#ifdef USE_AS_BZERO32
+	xor	%eax, %eax
+#else
+	mov	DWDS(%esp), %eax
+	mov	%eax, %edx
+#endif
+	movl	DEST(%esp), %edx
+	cmp	$16, %ecx
+	jae	L(16dbwordsormore)
+
+L(write_less16dbwords):
+	lea	(%edx, %ecx, 4), %edx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_less16dbwords))
+
+	.pushsection .rodata.sse2,"a",@progbits
+	ALIGN (2)
+L(table_less16dbwords):
+	.int	JMPTBL (L(write_0dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_1dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_2dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_3dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_4dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_5dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_6dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_7dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_8dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_9dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_10dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_11dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_12dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_13dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_14dbwords), L(table_less16dbwords))
+	.int	JMPTBL (L(write_15dbwords), L(table_less16dbwords))
+	.popsection
+
+	ALIGN (4)
+L(write_15dbwords):
+	movl	%eax, -60(%edx)
+L(write_14dbwords):
+	movl	%eax, -56(%edx)
+L(write_13dbwords):
+	movl	%eax, -52(%edx)
+L(write_12dbwords):
+	movl	%eax, -48(%edx)
+L(write_11dbwords):
+	movl	%eax, -44(%edx)
+L(write_10dbwords):
+	movl	%eax, -40(%edx)
+L(write_9dbwords):
+	movl	%eax, -36(%edx)
+L(write_8dbwords):
+	movl	%eax, -32(%edx)
+L(write_7dbwords):
+	movl	%eax, -28(%edx)
+L(write_6dbwords):
+	movl	%eax, -24(%edx)
+L(write_5dbwords):
+	movl	%eax, -20(%edx)
+L(write_4dbwords):
+	movl	%eax, -16(%edx)
+L(write_3dbwords):
+	movl	%eax, -12(%edx)
+L(write_2dbwords):
+	movl	%eax, -8(%edx)
+L(write_1dbwords):
+	movl	%eax, -4(%edx)
+L(write_0dbwords):
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(16dbwordsormore):
+	test	$3, %edx
+	jz	L(aligned4bytes)
+	mov	%eax, (%edx)
+	mov	%eax, -4(%edx, %ecx, 4)
+	sub	$1, %ecx
+	rol	$24, %eax
+	add	$1, %edx
+	test	$3, %edx
+	jz	L(aligned4bytes)
+	ror	$8, %eax
+	add	$1, %edx
+	test	$3, %edx
+	jz	L(aligned4bytes)
+	ror	$8, %eax
+	add	$1, %edx
+L(aligned4bytes):
+	shl	$2, %ecx
+
+#ifdef USE_AS_BZERO32
+	pxor	%xmm0, %xmm0
+#else
+	movd	%eax, %xmm0
+	pshufd	$0, %xmm0, %xmm0
+#endif
+	testl	$0xf, %edx
+	jz	L(aligned_16)
+/* ECX > 32 and EDX is not 16 byte aligned.  */
+L(not_aligned_16):
+	movdqu	%xmm0, (%edx)
+	movl	%edx, %eax
+	and	$-16, %edx
+	add	$16, %edx
+	sub	%edx, %eax
+	add	%eax, %ecx
+	movd	%xmm0, %eax
+	ALIGN (4)
+L(aligned_16):
+	cmp	$128, %ecx
+	jae	L(128bytesormore)
+
+L(aligned_16_less128bytes):
+	add	%ecx, %edx
+	shr	$2, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	ALIGN (4)
+L(128bytesormore):
+#ifdef SHARED_CACHE_SIZE
+	PUSH (%ebx)
+	mov	$SHARED_CACHE_SIZE, %ebx
+#else
+# if (defined SHARED || defined __PIC__)
+	call	__x86.get_pc_thunk.bx
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
+# else
+	PUSH (%ebx)
+	mov	__x86_shared_cache_size, %ebx
+# endif
+#endif
+	cmp	%ebx, %ecx
+	jae	L(128bytesormore_nt_start)
+
+#ifdef DATA_CACHE_SIZE
+	POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+	cmp	$DATA_CACHE_SIZE, %ecx
+#else
+# if (defined SHARED || defined __PIC__)
+#  define RESTORE_EBX_STATE
+	call	__x86.get_pc_thunk.bx
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
+# else
+	POP (%ebx)
+#  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
+	cmp	__x86_data_cache_size, %ecx
+# endif
+#endif
+
+	jae	L(128bytes_L2_normal)
+	subl	$128, %ecx
+L(128bytesormore_normal):
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	lea	128(%edx), %edx
+	jb	L(128bytesless_normal)
+
+
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	lea	128(%edx), %edx
+	jae	L(128bytesormore_normal)
+
+L(128bytesless_normal):
+	lea	128(%ecx), %ecx
+	add	%ecx, %edx
+	shr	$2, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	ALIGN (4)
+L(128bytes_L2_normal):
+	prefetcht0	0x380(%edx)
+	prefetcht0	0x3c0(%edx)
+	sub	$128, %ecx
+	movdqa	%xmm0, (%edx)
+	movaps	%xmm0, 0x10(%edx)
+	movaps	%xmm0, 0x20(%edx)
+	movaps	%xmm0, 0x30(%edx)
+	movaps	%xmm0, 0x40(%edx)
+	movaps	%xmm0, 0x50(%edx)
+	movaps	%xmm0, 0x60(%edx)
+	movaps	%xmm0, 0x70(%edx)
+	add	$128, %edx
+	cmp	$128, %ecx
+	jae	L(128bytes_L2_normal)
+
+L(128bytesless_L2_normal):
+	add	%ecx, %edx
+	shr	$2, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	RESTORE_EBX_STATE
+L(128bytesormore_nt_start):
+	sub	%ebx, %ecx
+	mov	%ebx, %eax
+	and	$0x7f, %eax
+	add	%eax, %ecx
+	movd	%xmm0, %eax
+	ALIGN (4)
+L(128bytesormore_shared_cache_loop):
+	prefetcht0	0x3c0(%edx)
+	prefetcht0	0x380(%edx)
+	sub	$0x80, %ebx
+	movdqa	%xmm0, (%edx)
+	movdqa	%xmm0, 0x10(%edx)
+	movdqa	%xmm0, 0x20(%edx)
+	movdqa	%xmm0, 0x30(%edx)
+	movdqa	%xmm0, 0x40(%edx)
+	movdqa	%xmm0, 0x50(%edx)
+	movdqa	%xmm0, 0x60(%edx)
+	movdqa	%xmm0, 0x70(%edx)
+	add	$0x80, %edx
+	cmp	$0x80, %ebx
+	jae	L(128bytesormore_shared_cache_loop)
+	cmp	$0x80, %ecx
+	jb	L(shared_cache_loop_end)
+
+	ALIGN (4)
+L(128bytesormore_nt):
+	sub	$0x80, %ecx
+	movntdq	%xmm0, (%edx)
+	movntdq	%xmm0, 0x10(%edx)
+	movntdq	%xmm0, 0x20(%edx)
+	movntdq	%xmm0, 0x30(%edx)
+	movntdq	%xmm0, 0x40(%edx)
+	movntdq	%xmm0, 0x50(%edx)
+	movntdq	%xmm0, 0x60(%edx)
+	movntdq	%xmm0, 0x70(%edx)
+	add	$0x80, %edx
+	cmp	$0x80, %ecx
+	jae	L(128bytesormore_nt)
+	sfence
+L(shared_cache_loop_end):
+#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
+	POP (%ebx)
+#endif
+	add	%ecx, %edx
+	shr	$2, %ecx
+	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+
+	.pushsection .rodata.sse2,"a",@progbits
+	ALIGN (2)
+L(table_16_128bytes):
+	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
+	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
+	.popsection
+
+	ALIGN (4)
+L(aligned_16_112bytes):
+	movdqa	%xmm0, -112(%edx)
+L(aligned_16_96bytes):
+	movdqa	%xmm0, -96(%edx)
+L(aligned_16_80bytes):
+	movdqa	%xmm0, -80(%edx)
+L(aligned_16_64bytes):
+	movdqa	%xmm0, -64(%edx)
+L(aligned_16_48bytes):
+	movdqa	%xmm0, -48(%edx)
+L(aligned_16_32bytes):
+	movdqa	%xmm0, -32(%edx)
+L(aligned_16_16bytes):
+	movdqa	%xmm0, -16(%edx)
+L(aligned_16_0bytes):
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(aligned_16_116bytes):
+	movdqa	%xmm0, -116(%edx)
+L(aligned_16_100bytes):
+	movdqa	%xmm0, -100(%edx)
+L(aligned_16_84bytes):
+	movdqa	%xmm0, -84(%edx)
+L(aligned_16_68bytes):
+	movdqa	%xmm0, -68(%edx)
+L(aligned_16_52bytes):
+	movdqa	%xmm0, -52(%edx)
+L(aligned_16_36bytes):
+	movdqa	%xmm0, -36(%edx)
+L(aligned_16_20bytes):
+	movdqa	%xmm0, -20(%edx)
+L(aligned_16_4bytes):
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(aligned_16_120bytes):
+	movdqa	%xmm0, -120(%edx)
+L(aligned_16_104bytes):
+	movdqa	%xmm0, -104(%edx)
+L(aligned_16_88bytes):
+	movdqa	%xmm0, -88(%edx)
+L(aligned_16_72bytes):
+	movdqa	%xmm0, -72(%edx)
+L(aligned_16_56bytes):
+	movdqa	%xmm0, -56(%edx)
+L(aligned_16_40bytes):
+	movdqa	%xmm0, -40(%edx)
+L(aligned_16_24bytes):
+	movdqa	%xmm0, -24(%edx)
+L(aligned_16_8bytes):
+	movq	%xmm0, -8(%edx)
+	SETRTNVAL
+	RETURN
+
+	ALIGN (4)
+L(aligned_16_124bytes):
+	movdqa	%xmm0, -124(%edx)
+L(aligned_16_108bytes):
+	movdqa	%xmm0, -108(%edx)
+L(aligned_16_92bytes):
+	movdqa	%xmm0, -92(%edx)
+L(aligned_16_76bytes):
+	movdqa	%xmm0, -76(%edx)
+L(aligned_16_60bytes):
+	movdqa	%xmm0, -60(%edx)
+L(aligned_16_44bytes):
+	movdqa	%xmm0, -44(%edx)
+L(aligned_16_28bytes):
+	movdqa	%xmm0, -28(%edx)
+L(aligned_16_12bytes):
+	movq	%xmm0, -12(%edx)
+	movl	%eax, -4(%edx)
+	SETRTNVAL
+	RETURN
+
+END (MEMSET)
diff --git a/libcutils/arch-x86/cache_wrapper.S b/libcutils/arch-x86/cache.h
similarity index 95%
rename from libcutils/arch-x86/cache_wrapper.S
rename to libcutils/arch-x86/cache.h
index 9eee25c..1c22fea 100644
--- a/libcutils/arch-x86/cache_wrapper.S
+++ b/libcutils/arch-x86/cache.h
@@ -13,9 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-/*
- * Contributed by: Intel Corporation
- */
 
 #if defined(__slm__)
 /* Values are optimized for Silvermont */
diff --git a/libcutils/arch-x86/sse2-memset16-atom.S b/libcutils/arch-x86/sse2-memset16-atom.S
deleted file mode 100755
index c2a762b..0000000
--- a/libcutils/arch-x86/sse2-memset16-atom.S
+++ /dev/null
@@ -1,722 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Contributed by: Intel Corporation
- */
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc			.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc			.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)		.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)			\
-	.type name,  @function; 	\
-	.globl name;			\
-	.p2align 4;			\
-name:					\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)			\
-	cfi_endproc;			\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_BZERO16
-# define DEST		PARMS
-# define LEN		DEST+4
-#else
-# define DEST		PARMS
-# define CHR		DEST+4
-# define LEN		CHR+4
-#endif
-
-#if 1
-# define SETRTNVAL
-#else
-# define SETRTNVAL	movl DEST(%esp), %eax
-#endif
-
-#if (defined SHARED || defined __PIC__)
-# define ENTRANCE	PUSH (%ebx);
-# define RETURN_END	POP (%ebx); ret
-# define RETURN		RETURN_END; CFI_PUSH (%ebx)
-# define PARMS		8		/* Preserve EBX.  */
-# define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-   jump table with relative offsets.   */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    /* We first load PC into EBX.  */				\
-    call	__i686.get_pc_thunk.bx;				\
-    /* Get the address of the jump table.  */			\
-    add		$(TABLE - .), %ebx;				\
-    /* Get the entry and convert the relative offset to the	\
-       absolute address.  */					\
-    add		(%ebx,%ecx,4), %ebx;				\
-    /* We loaded the jump table and adjuested EDX. Go.  */	\
-    jmp		*%ebx
-
-	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
-	.globl	__i686.get_pc_thunk.bx
-	.hidden	__i686.get_pc_thunk.bx
-	ALIGN (4)
-	.type	__i686.get_pc_thunk.bx,@function
-__i686.get_pc_thunk.bx:
-	movl	(%esp), %ebx
-	ret
-#else
-# define ENTRANCE
-# define RETURN_END	ret
-# define RETURN		RETURN_END
-# define PARMS		4
-# define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-   absolute offsets.  */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    jmp		*TABLE(,%ecx,4)
-#endif
-
-	.section .text.sse2,"ax",@progbits
-	ALIGN (4)
-ENTRY (sse2_memset16_atom)
-	ENTRANCE
-
-	movl	LEN(%esp), %ecx
-#ifdef USE_AS_ANDROID
-	shr	$1, %ecx
-#endif
-#ifdef USE_AS_BZERO16
-	xor	%eax, %eax
-#else
-	movzwl	CHR(%esp), %eax
-	mov	%eax, %edx
-	shl	$16, %eax
-	or	%edx, %eax
-#endif
-	movl	DEST(%esp), %edx
-	cmp	$32, %ecx
-	jae	L(32wordsormore)
-
-L(write_less32words):
-	lea	(%edx, %ecx, 2), %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_less32words))
-
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN (2)
-L(table_less32words):
-	.int	JMPTBL (L(write_0words), L(table_less32words))
-	.int	JMPTBL (L(write_1words), L(table_less32words))
-	.int	JMPTBL (L(write_2words), L(table_less32words))
-	.int	JMPTBL (L(write_3words), L(table_less32words))
-	.int	JMPTBL (L(write_4words), L(table_less32words))
-	.int	JMPTBL (L(write_5words), L(table_less32words))
-	.int	JMPTBL (L(write_6words), L(table_less32words))
-	.int	JMPTBL (L(write_7words), L(table_less32words))
-	.int	JMPTBL (L(write_8words), L(table_less32words))
-	.int	JMPTBL (L(write_9words), L(table_less32words))
-	.int	JMPTBL (L(write_10words), L(table_less32words))
-	.int	JMPTBL (L(write_11words), L(table_less32words))
-	.int	JMPTBL (L(write_12words), L(table_less32words))
-	.int	JMPTBL (L(write_13words), L(table_less32words))
-	.int	JMPTBL (L(write_14words), L(table_less32words))
-	.int	JMPTBL (L(write_15words), L(table_less32words))
-	.int	JMPTBL (L(write_16words), L(table_less32words))
-	.int	JMPTBL (L(write_17words), L(table_less32words))
-	.int	JMPTBL (L(write_18words), L(table_less32words))
-	.int	JMPTBL (L(write_19words), L(table_less32words))
-	.int	JMPTBL (L(write_20words), L(table_less32words))
-	.int	JMPTBL (L(write_21words), L(table_less32words))
-	.int	JMPTBL (L(write_22words), L(table_less32words))
-	.int	JMPTBL (L(write_23words), L(table_less32words))
-	.int	JMPTBL (L(write_24words), L(table_less32words))
-	.int	JMPTBL (L(write_25words), L(table_less32words))
-	.int	JMPTBL (L(write_26words), L(table_less32words))
-	.int	JMPTBL (L(write_27words), L(table_less32words))
-	.int	JMPTBL (L(write_28words), L(table_less32words))
-	.int	JMPTBL (L(write_29words), L(table_less32words))
-	.int	JMPTBL (L(write_30words), L(table_less32words))
-	.int	JMPTBL (L(write_31words), L(table_less32words))
-	.popsection
-
-	ALIGN (4)
-L(write_28words):
-	movl	%eax, -56(%edx)
-	movl	%eax, -52(%edx)
-L(write_24words):
-	movl	%eax, -48(%edx)
-	movl	%eax, -44(%edx)
-L(write_20words):
-	movl	%eax, -40(%edx)
-	movl	%eax, -36(%edx)
-L(write_16words):
-	movl	%eax, -32(%edx)
-	movl	%eax, -28(%edx)
-L(write_12words):
-	movl	%eax, -24(%edx)
-	movl	%eax, -20(%edx)
-L(write_8words):
-	movl	%eax, -16(%edx)
-	movl	%eax, -12(%edx)
-L(write_4words):
-	movl	%eax, -8(%edx)
-	movl	%eax, -4(%edx)
-L(write_0words):
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_29words):
-	movl	%eax, -58(%edx)
-	movl	%eax, -54(%edx)
-L(write_25words):
-	movl	%eax, -50(%edx)
-	movl	%eax, -46(%edx)
-L(write_21words):
-	movl	%eax, -42(%edx)
-	movl	%eax, -38(%edx)
-L(write_17words):
-	movl	%eax, -34(%edx)
-	movl	%eax, -30(%edx)
-L(write_13words):
-	movl	%eax, -26(%edx)
-	movl	%eax, -22(%edx)
-L(write_9words):
-	movl	%eax, -18(%edx)
-	movl	%eax, -14(%edx)
-L(write_5words):
-	movl	%eax, -10(%edx)
-	movl	%eax, -6(%edx)
-L(write_1words):
-	mov	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_30words):
-	movl	%eax, -60(%edx)
-	movl	%eax, -56(%edx)
-L(write_26words):
-	movl	%eax, -52(%edx)
-	movl	%eax, -48(%edx)
-L(write_22words):
-	movl	%eax, -44(%edx)
-	movl	%eax, -40(%edx)
-L(write_18words):
-	movl	%eax, -36(%edx)
-	movl	%eax, -32(%edx)
-L(write_14words):
-	movl	%eax, -28(%edx)
-	movl	%eax, -24(%edx)
-L(write_10words):
-	movl	%eax, -20(%edx)
-	movl	%eax, -16(%edx)
-L(write_6words):
-	movl	%eax, -12(%edx)
-	movl	%eax, -8(%edx)
-L(write_2words):
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(write_31words):
-	movl	%eax, -62(%edx)
-	movl	%eax, -58(%edx)
-L(write_27words):
-	movl	%eax, -54(%edx)
-	movl	%eax, -50(%edx)
-L(write_23words):
-	movl	%eax, -46(%edx)
-	movl	%eax, -42(%edx)
-L(write_19words):
-	movl	%eax, -38(%edx)
-	movl	%eax, -34(%edx)
-L(write_15words):
-	movl	%eax, -30(%edx)
-	movl	%eax, -26(%edx)
-L(write_11words):
-	movl	%eax, -22(%edx)
-	movl	%eax, -18(%edx)
-L(write_7words):
-	movl	%eax, -14(%edx)
-	movl	%eax, -10(%edx)
-L(write_3words):
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-
-L(32wordsormore):
-	shl	$1, %ecx
-	test	$0x01, %edx
-	jz	L(aligned2bytes)
-	mov	%eax, (%edx)
-	mov	%eax, -4(%edx, %ecx)
-	sub	$2, %ecx
-	add	$1, %edx
-	rol	$8, %eax
-L(aligned2bytes):
-#ifdef USE_AS_BZERO16
-	pxor	%xmm0, %xmm0
-#else
-	movd	%eax, %xmm0
-	pshufd	$0, %xmm0, %xmm0
-#endif
-	testl	$0xf, %edx
-	jz	L(aligned_16)
-/* ECX > 32 and EDX is not 16 byte aligned.  */
-L(not_aligned_16):
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	and	$-16, %edx
-	add	$16, %edx
-	sub	%edx, %eax
-	add	%eax, %ecx
-	movd	%xmm0, %eax
-
-	ALIGN (4)
-L(aligned_16):
-	cmp	$128, %ecx
-	jae	L(128bytesormore)
-
-L(aligned_16_less128bytes):
-	add	%ecx, %edx
-	shr	$1, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	ALIGN (4)
-L(128bytesormore):
-#ifdef SHARED_CACHE_SIZE
-	PUSH (%ebx)
-	mov	$SHARED_CACHE_SIZE, %ebx
-#else
-# if (defined SHARED || defined __PIC__)
-	call	__i686.get_pc_thunk.bx
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
-# else
-	PUSH (%ebx)
-	mov	__x86_shared_cache_size, %ebx
-# endif
-#endif
-	cmp	%ebx, %ecx
-	jae	L(128bytesormore_nt_start)
-
-	
-#ifdef DATA_CACHE_SIZE
-	POP (%ebx)
-# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
-	cmp	$DATA_CACHE_SIZE, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-#  define RESTORE_EBX_STATE
-	call	__i686.get_pc_thunk.bx
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
-# else
-	POP (%ebx)
-#  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
-	cmp	__x86_data_cache_size, %ecx
-# endif
-#endif
-
-	jae	L(128bytes_L2_normal)
-	subl	$128, %ecx
-L(128bytesormore_normal):
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jb	L(128bytesless_normal)
-
-
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jae	L(128bytesormore_normal)
-
-L(128bytesless_normal):
-	lea	128(%ecx), %ecx
-	add	%ecx, %edx
-	shr	$1, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	ALIGN (4)
-L(128bytes_L2_normal):
-	prefetcht0	0x380(%edx)
-	prefetcht0	0x3c0(%edx)
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movaps	%xmm0, 0x10(%edx)
-	movaps	%xmm0, 0x20(%edx)
-	movaps	%xmm0, 0x30(%edx)
-	movaps	%xmm0, 0x40(%edx)
-	movaps	%xmm0, 0x50(%edx)
-	movaps	%xmm0, 0x60(%edx)
-	movaps	%xmm0, 0x70(%edx)
-	add	$128, %edx
-	cmp	$128, %ecx 	
-	jae	L(128bytes_L2_normal)
-
-L(128bytesless_L2_normal):
-	add	%ecx, %edx
-	shr	$1, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	RESTORE_EBX_STATE
-L(128bytesormore_nt_start):
-	sub	%ebx, %ecx
-	mov	%ebx, %eax
-	and	$0x7f, %eax
-	add	%eax, %ecx
-	movd	%xmm0, %eax
-	ALIGN (4)
-L(128bytesormore_shared_cache_loop):
-	prefetcht0	0x3c0(%edx)
-	prefetcht0	0x380(%edx)
-	sub	$0x80, %ebx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	add	$0x80, %edx
-	cmp	$0x80, %ebx
-	jae	L(128bytesormore_shared_cache_loop)
-	cmp	$0x80, %ecx
-	jb	L(shared_cache_loop_end)
-	ALIGN (4)
-L(128bytesormore_nt):
-	sub	$0x80, %ecx
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm0, 0x10(%edx)
-	movntdq	%xmm0, 0x20(%edx)
-	movntdq	%xmm0, 0x30(%edx)
-	movntdq	%xmm0, 0x40(%edx)
-	movntdq	%xmm0, 0x50(%edx)
-	movntdq	%xmm0, 0x60(%edx)
-	movntdq	%xmm0, 0x70(%edx)
-	add	$0x80, %edx
-	cmp	$0x80, %ecx
-	jae	L(128bytesormore_nt)
-	sfence
-L(shared_cache_loop_end):
-#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
-	POP (%ebx)
-#endif
-	add	%ecx, %edx
-	shr	$1, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN (2)
-L(table_16_128bytes):
-	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
-	.popsection
-
-
-	ALIGN (4)
-L(aligned_16_112bytes):
-	movdqa	%xmm0, -112(%edx)
-L(aligned_16_96bytes):
-	movdqa	%xmm0, -96(%edx)
-L(aligned_16_80bytes):
-	movdqa	%xmm0, -80(%edx)
-L(aligned_16_64bytes):
-	movdqa	%xmm0, -64(%edx)
-L(aligned_16_48bytes):
-	movdqa	%xmm0, -48(%edx)
-L(aligned_16_32bytes):
-	movdqa	%xmm0, -32(%edx)
-L(aligned_16_16bytes):
-	movdqa	%xmm0, -16(%edx)
-L(aligned_16_0bytes):
-	SETRTNVAL
-	RETURN
-
-
-	ALIGN (4)
-L(aligned_16_114bytes):
-	movdqa	%xmm0, -114(%edx)
-L(aligned_16_98bytes):
-	movdqa	%xmm0, -98(%edx)
-L(aligned_16_82bytes):
-	movdqa	%xmm0, -82(%edx)
-L(aligned_16_66bytes):
-	movdqa	%xmm0, -66(%edx)
-L(aligned_16_50bytes):
-	movdqa	%xmm0, -50(%edx)
-L(aligned_16_34bytes):
-	movdqa	%xmm0, -34(%edx)
-L(aligned_16_18bytes):
-	movdqa	%xmm0, -18(%edx)
-L(aligned_16_2bytes):
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_116bytes):
-	movdqa	%xmm0, -116(%edx)
-L(aligned_16_100bytes):
-	movdqa	%xmm0, -100(%edx)
-L(aligned_16_84bytes):
-	movdqa	%xmm0, -84(%edx)
-L(aligned_16_68bytes):
-	movdqa	%xmm0, -68(%edx)
-L(aligned_16_52bytes):
-	movdqa	%xmm0, -52(%edx)
-L(aligned_16_36bytes):
-	movdqa	%xmm0, -36(%edx)
-L(aligned_16_20bytes):
-	movdqa	%xmm0, -20(%edx)
-L(aligned_16_4bytes):
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-
-	ALIGN (4)
-L(aligned_16_118bytes):
-	movdqa	%xmm0, -118(%edx)
-L(aligned_16_102bytes):
-	movdqa	%xmm0, -102(%edx)
-L(aligned_16_86bytes):
-	movdqa	%xmm0, -86(%edx)
-L(aligned_16_70bytes):
-	movdqa	%xmm0, -70(%edx)
-L(aligned_16_54bytes):
-	movdqa	%xmm0, -54(%edx)
-L(aligned_16_38bytes):
-	movdqa	%xmm0, -38(%edx)
-L(aligned_16_22bytes):
-	movdqa	%xmm0, -22(%edx)
-L(aligned_16_6bytes):
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-
-	ALIGN (4)
-L(aligned_16_120bytes):
-	movdqa	%xmm0, -120(%edx)
-L(aligned_16_104bytes):
-	movdqa	%xmm0, -104(%edx)
-L(aligned_16_88bytes):
-	movdqa	%xmm0, -88(%edx)
-L(aligned_16_72bytes):
-	movdqa	%xmm0, -72(%edx)
-L(aligned_16_56bytes):
-	movdqa	%xmm0, -56(%edx)
-L(aligned_16_40bytes):
-	movdqa	%xmm0, -40(%edx)
-L(aligned_16_24bytes):
-	movdqa	%xmm0, -24(%edx)
-L(aligned_16_8bytes):
-	movq	%xmm0, -8(%edx)
-	SETRTNVAL
-	RETURN
-
-
-	ALIGN (4)
-L(aligned_16_122bytes):
-	movdqa	%xmm0, -122(%edx)
-L(aligned_16_106bytes):
-	movdqa	%xmm0, -106(%edx)
-L(aligned_16_90bytes):
-	movdqa	%xmm0, -90(%edx)
-L(aligned_16_74bytes):
-	movdqa	%xmm0, -74(%edx)
-L(aligned_16_58bytes):
-	movdqa	%xmm0, -58(%edx)
-L(aligned_16_42bytes):
-	movdqa	%xmm0, -42(%edx)
-L(aligned_16_26bytes):
-	movdqa	%xmm0, -26(%edx)
-L(aligned_16_10bytes):
-	movq	%xmm0, -10(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-
-	ALIGN (4)
-L(aligned_16_124bytes):
-	movdqa	%xmm0, -124(%edx)
-L(aligned_16_108bytes):
-	movdqa	%xmm0, -108(%edx)
-L(aligned_16_92bytes):
-	movdqa	%xmm0, -92(%edx)
-L(aligned_16_76bytes):
-	movdqa	%xmm0, -76(%edx)
-L(aligned_16_60bytes):
-	movdqa	%xmm0, -60(%edx)
-L(aligned_16_44bytes):
-	movdqa	%xmm0, -44(%edx)
-L(aligned_16_28bytes):
-	movdqa	%xmm0, -28(%edx)
-L(aligned_16_12bytes):
-	movq	%xmm0, -12(%edx)
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-
-	ALIGN (4)
-L(aligned_16_126bytes):
-	movdqa	%xmm0, -126(%edx)
-L(aligned_16_110bytes):
-	movdqa	%xmm0, -110(%edx)
-L(aligned_16_94bytes):
-	movdqa	%xmm0, -94(%edx)
-L(aligned_16_78bytes):
-	movdqa	%xmm0, -78(%edx)
-L(aligned_16_62bytes):
-	movdqa	%xmm0, -62(%edx)
-L(aligned_16_46bytes):
-	movdqa	%xmm0, -46(%edx)
-L(aligned_16_30bytes):
-	movdqa	%xmm0, -30(%edx)
-L(aligned_16_14bytes):
-	movq	%xmm0, -14(%edx)
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-END (sse2_memset16_atom)
diff --git a/libcutils/arch-x86/sse2-memset32-atom.S b/libcutils/arch-x86/sse2-memset32-atom.S
deleted file mode 100755
index 05eb64f..0000000
--- a/libcutils/arch-x86/sse2-memset32-atom.S
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * Contributed by: Intel Corporation
- */
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc			.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc			.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)		.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)			\
-	.type name,  @function; 	\
-	.globl name;			\
-	.p2align 4;			\
-name:					\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)			\
-	cfi_endproc;			\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)						\
-  cfi_adjust_cfa_offset (4);					\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)						\
-  cfi_adjust_cfa_offset (-4);					\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_BZERO32
-# define DEST		PARMS
-# define LEN		DEST+4
-#else
-# define DEST		PARMS
-# define DWDS		DEST+4
-# define LEN		DWDS+4
-#endif
-
-#ifdef USE_AS_WMEMSET32
-# define SETRTNVAL	movl DEST(%esp), %eax
-#else
-# define SETRTNVAL
-#endif
-
-#if (defined SHARED || defined __PIC__)
-# define ENTRANCE	PUSH (%ebx);
-# define RETURN_END	POP (%ebx); ret
-# define RETURN		RETURN_END; CFI_PUSH (%ebx)
-# define PARMS		8		/* Preserve EBX.  */
-# define JMPTBL(I, B)	I - B
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-   jump table with relative offsets.   */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    /* We first load PC into EBX.  */				\
-    call	__i686.get_pc_thunk.bx;				\
-    /* Get the address of the jump table.  */			\
-    add		$(TABLE - .), %ebx;				\
-    /* Get the entry and convert the relative offset to the	\
-       absolute address.  */					\
-    add		(%ebx,%ecx,4), %ebx;				\
-    /* We loaded the jump table and adjuested EDX. Go.  */	\
-    jmp		*%ebx
-
-	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
-	.globl	__i686.get_pc_thunk.bx
-	.hidden	__i686.get_pc_thunk.bx
-	ALIGN (4)
-	.type	__i686.get_pc_thunk.bx,@function
-__i686.get_pc_thunk.bx:
-	movl	(%esp), %ebx
-	ret
-#else
-# define ENTRANCE
-# define RETURN_END	ret
-# define RETURN		RETURN_END
-# define PARMS		4
-# define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-   absolute offsets.  */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    jmp		*TABLE(,%ecx,4)
-#endif
-
-	.section .text.sse2,"ax",@progbits
-	ALIGN (4)
-ENTRY (sse2_memset32_atom)
-	ENTRANCE
-
-	movl	LEN(%esp), %ecx
-#ifdef USE_AS_ANDROID
-	shr     $2, %ecx
-#endif
-#ifdef USE_AS_BZERO32
-	xor	%eax, %eax
-#else
-	mov	DWDS(%esp), %eax
-	mov	%eax, %edx
-#endif
-	movl	DEST(%esp), %edx
-	cmp	$16, %ecx
-	jae	L(16dbwordsormore)
-
-L(write_less16dbwords):
-	lea	(%edx, %ecx, 4), %edx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_less16dbwords))
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN (2)
-L(table_less16dbwords):
-	.int	JMPTBL (L(write_0dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_1dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_2dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_3dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_4dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_5dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_6dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_7dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_8dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_9dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_10dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_11dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_12dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_13dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_14dbwords), L(table_less16dbwords))
-	.int	JMPTBL (L(write_15dbwords), L(table_less16dbwords))
-	.popsection
-
-	ALIGN (4)
-L(write_15dbwords):
-	movl	%eax, -60(%edx)
-L(write_14dbwords):
-	movl	%eax, -56(%edx)
-L(write_13dbwords):
-	movl	%eax, -52(%edx)
-L(write_12dbwords):
-	movl	%eax, -48(%edx)
-L(write_11dbwords):
-	movl	%eax, -44(%edx)
-L(write_10dbwords):
-	movl	%eax, -40(%edx)
-L(write_9dbwords):
-	movl	%eax, -36(%edx)
-L(write_8dbwords):
-	movl	%eax, -32(%edx)
-L(write_7dbwords):
-	movl	%eax, -28(%edx)
-L(write_6dbwords):
-	movl	%eax, -24(%edx)
-L(write_5dbwords):
-	movl	%eax, -20(%edx)
-L(write_4dbwords):
-	movl	%eax, -16(%edx)
-L(write_3dbwords):
-	movl	%eax, -12(%edx)
-L(write_2dbwords):
-	movl	%eax, -8(%edx)
-L(write_1dbwords):
-	movl	%eax, -4(%edx)
-L(write_0dbwords):
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(16dbwordsormore):
-	test	$3, %edx
-	jz	L(aligned4bytes)
-	mov	%eax, (%edx)
-	mov	%eax, -4(%edx, %ecx, 4)
-	sub	$1, %ecx
-	rol	$24, %eax
-	add	$1, %edx
-	test	$3, %edx
-	jz	L(aligned4bytes)
-	ror	$8, %eax
-	add	$1, %edx
-	test	$3, %edx
-	jz	L(aligned4bytes)
-	ror	$8, %eax
-	add	$1, %edx
-L(aligned4bytes):
-	shl	$2, %ecx
-
-#ifdef USE_AS_BZERO32
-	pxor	%xmm0, %xmm0
-#else
-	movd	%eax, %xmm0
-	pshufd	$0, %xmm0, %xmm0
-#endif
-	testl	$0xf, %edx
-	jz	L(aligned_16)
-/* ECX > 32 and EDX is not 16 byte aligned.  */
-L(not_aligned_16):
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	and	$-16, %edx
-	add	$16, %edx
-	sub	%edx, %eax
-	add	%eax, %ecx
-	movd	%xmm0, %eax
-	ALIGN (4)
-L(aligned_16):
-	cmp	$128, %ecx
-	jae	L(128bytesormore)
-
-L(aligned_16_less128bytes):
-	add	%ecx, %edx
-	shr	$2, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	ALIGN (4)
-L(128bytesormore):
-#ifdef SHARED_CACHE_SIZE
-	PUSH (%ebx)
-	mov	$SHARED_CACHE_SIZE, %ebx
-#else
-# if (defined SHARED || defined __PIC__)
-	call	__i686.get_pc_thunk.bx
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
-# else
-	PUSH (%ebx)
-	mov	__x86_shared_cache_size, %ebx
-# endif
-#endif
-	cmp	%ebx, %ecx
-	jae	L(128bytesormore_nt_start)
-	
-#ifdef DATA_CACHE_SIZE
-	POP (%ebx)
-# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
-	cmp	$DATA_CACHE_SIZE, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-#  define RESTORE_EBX_STATE
-	call	__i686.get_pc_thunk.bx
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
-# else
-	POP (%ebx)
-#  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
-	cmp	__x86_data_cache_size, %ecx
-# endif
-#endif
-
-	jae	L(128bytes_L2_normal)
-	subl	$128, %ecx
-L(128bytesormore_normal):
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jb	L(128bytesless_normal)
-
-
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jae	L(128bytesormore_normal)
-
-L(128bytesless_normal):
-	lea	128(%ecx), %ecx
-	add	%ecx, %edx
-	shr	$2, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	ALIGN (4)
-L(128bytes_L2_normal):
-	prefetcht0	0x380(%edx)
-	prefetcht0	0x3c0(%edx)
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movaps	%xmm0, 0x10(%edx)
-	movaps	%xmm0, 0x20(%edx)
-	movaps	%xmm0, 0x30(%edx)
-	movaps	%xmm0, 0x40(%edx)
-	movaps	%xmm0, 0x50(%edx)
-	movaps	%xmm0, 0x60(%edx)
-	movaps	%xmm0, 0x70(%edx)
-	add	$128, %edx
-	cmp	$128, %ecx 	
-	jae	L(128bytes_L2_normal)
-
-L(128bytesless_L2_normal):
-	add	%ecx, %edx
-	shr	$2, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	RESTORE_EBX_STATE
-L(128bytesormore_nt_start):
-	sub	%ebx, %ecx
-	mov	%ebx, %eax
-	and	$0x7f, %eax
-	add	%eax, %ecx
-	movd	%xmm0, %eax
-	ALIGN (4)
-L(128bytesormore_shared_cache_loop):
-	prefetcht0	0x3c0(%edx)
-	prefetcht0	0x380(%edx)
-	sub	$0x80, %ebx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	add	$0x80, %edx
-	cmp	$0x80, %ebx
-	jae	L(128bytesormore_shared_cache_loop)
-	cmp	$0x80, %ecx
-	jb	L(shared_cache_loop_end)
-
-	ALIGN (4)
-L(128bytesormore_nt):
-	sub	$0x80, %ecx
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm0, 0x10(%edx)
-	movntdq	%xmm0, 0x20(%edx)
-	movntdq	%xmm0, 0x30(%edx)
-	movntdq	%xmm0, 0x40(%edx)
-	movntdq	%xmm0, 0x50(%edx)
-	movntdq	%xmm0, 0x60(%edx)
-	movntdq	%xmm0, 0x70(%edx)
-	add	$0x80, %edx
-	cmp	$0x80, %ecx
-	jae	L(128bytesormore_nt)
-	sfence
-L(shared_cache_loop_end):
-#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
-	POP (%ebx)
-#endif
-	add	%ecx, %edx
-	shr	$2, %ecx
-	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN (2)
-L(table_16_128bytes):
-	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
-	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
-	.popsection
-
-	ALIGN (4)
-L(aligned_16_112bytes):
-	movdqa	%xmm0, -112(%edx)
-L(aligned_16_96bytes):
-	movdqa	%xmm0, -96(%edx)
-L(aligned_16_80bytes):
-	movdqa	%xmm0, -80(%edx)
-L(aligned_16_64bytes):
-	movdqa	%xmm0, -64(%edx)
-L(aligned_16_48bytes):
-	movdqa	%xmm0, -48(%edx)
-L(aligned_16_32bytes):
-	movdqa	%xmm0, -32(%edx)
-L(aligned_16_16bytes):
-	movdqa	%xmm0, -16(%edx)
-L(aligned_16_0bytes):
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_116bytes):
-	movdqa	%xmm0, -116(%edx)
-L(aligned_16_100bytes):
-	movdqa	%xmm0, -100(%edx)
-L(aligned_16_84bytes):
-	movdqa	%xmm0, -84(%edx)
-L(aligned_16_68bytes):
-	movdqa	%xmm0, -68(%edx)
-L(aligned_16_52bytes):
-	movdqa	%xmm0, -52(%edx)
-L(aligned_16_36bytes):
-	movdqa	%xmm0, -36(%edx)
-L(aligned_16_20bytes):
-	movdqa	%xmm0, -20(%edx)
-L(aligned_16_4bytes):
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_120bytes):
-	movdqa	%xmm0, -120(%edx)
-L(aligned_16_104bytes):
-	movdqa	%xmm0, -104(%edx)
-L(aligned_16_88bytes):
-	movdqa	%xmm0, -88(%edx)
-L(aligned_16_72bytes):
-	movdqa	%xmm0, -72(%edx)
-L(aligned_16_56bytes):
-	movdqa	%xmm0, -56(%edx)
-L(aligned_16_40bytes):
-	movdqa	%xmm0, -40(%edx)
-L(aligned_16_24bytes):
-	movdqa	%xmm0, -24(%edx)
-L(aligned_16_8bytes):
-	movq	%xmm0, -8(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN (4)
-L(aligned_16_124bytes):
-	movdqa	%xmm0, -124(%edx)
-L(aligned_16_108bytes):
-	movdqa	%xmm0, -108(%edx)
-L(aligned_16_92bytes):
-	movdqa	%xmm0, -92(%edx)
-L(aligned_16_76bytes):
-	movdqa	%xmm0, -76(%edx)
-L(aligned_16_60bytes):
-	movdqa	%xmm0, -60(%edx)
-L(aligned_16_44bytes):
-	movdqa	%xmm0, -44(%edx)
-L(aligned_16_28bytes):
-	movdqa	%xmm0, -28(%edx)
-L(aligned_16_12bytes):
-	movq	%xmm0, -12(%edx)
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-END (sse2_memset32_atom)
diff --git a/libcutils/arch-x86_64/android_memset16_SSE2-atom.S b/libcutils/arch-x86_64/android_memset16.S
similarity index 98%
rename from libcutils/arch-x86_64/android_memset16_SSE2-atom.S
rename to libcutils/arch-x86_64/android_memset16.S
index 48a10ed..cb6d4a3 100644
--- a/libcutils/arch-x86_64/android_memset16_SSE2-atom.S
+++ b/libcutils/arch-x86_64/android_memset16.S
@@ -13,12 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-/*
- * Contributed by: Intel Corporation
- */
 
 #include "cache.h"
 
+#ifndef MEMSET
+# define MEMSET		android_memset16
+#endif
+
 #ifndef L
 # define L(label)	.L##label
 #endif
@@ -63,7 +64,7 @@
 
 	.section .text.sse2,"ax",@progbits
 	ALIGN (4)
-ENTRY (android_memset16)	// Address in rdi
+ENTRY (MEMSET)	// Address in rdi
 	shr    $1, %rdx			// Count in rdx
 	movzwl %si, %ecx
 	/* Fill the whole ECX with pattern.  */
@@ -561,4 +562,4 @@
 	movw   %cx, -2(%rdi)
 	ret
 
-END (android_memset16)
+END (MEMSET)
diff --git a/libcutils/arch-x86_64/android_memset32_SSE2-atom.S b/libcutils/arch-x86_64/android_memset32.S
similarity index 98%
rename from libcutils/arch-x86_64/android_memset32_SSE2-atom.S
rename to libcutils/arch-x86_64/android_memset32.S
index 4bdea8e..1514aa2 100644
--- a/libcutils/arch-x86_64/android_memset32_SSE2-atom.S
+++ b/libcutils/arch-x86_64/android_memset32.S
@@ -13,12 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-/*
- * Contributed by: Intel Corporation
- */
 
 #include "cache.h"
 
+#ifndef MEMSET
+# define MEMSET		android_memset32
+#endif
+
 #ifndef L
 # define L(label)	.L##label
 #endif
@@ -63,7 +64,7 @@
 
 	.section .text.sse2,"ax",@progbits
 	ALIGN (4)
-ENTRY (android_memset32)	// Address in rdi
+ENTRY (MEMSET)	// Address in rdi
 	shr    $2, %rdx			// Count in rdx
 	movl   %esi, %ecx		// Pattern in ecx
 
@@ -369,4 +370,4 @@
 	movl	%ecx, -4(%rdi)
 	ret
 
-END (android_memset32)
+END (MEMSET)
diff --git a/libcutils/arch-x86_64/cache.h b/libcutils/arch-x86_64/cache.h
index ab5dd2f..f144309 100644
--- a/libcutils/arch-x86_64/cache.h
+++ b/libcutils/arch-x86_64/cache.h
@@ -13,19 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-/*
- * Contributed by: Intel Corporation
- */
 
-#if defined(__slm__)
 /* Values are optimized for Silvermont */
 #define SHARED_CACHE_SIZE	(1024*1024)			/* Silvermont L2 Cache */
 #define DATA_CACHE_SIZE		(24*1024)			/* Silvermont L1 Data Cache */
-#else
-/* Values are optimized for Atom */
-#define SHARED_CACHE_SIZE	(512*1024)			/* Atom L2 Cache */
-#define DATA_CACHE_SIZE		(24*1024)			/* Atom L1 Data Cache */
-#endif
 
 #define SHARED_CACHE_SIZE_HALF	(SHARED_CACHE_SIZE / 2)
 #define DATA_CACHE_SIZE_HALF	(DATA_CACHE_SIZE / 2)
diff --git a/libcutils/debugger.c b/libcutils/debugger.c
index 056de5d..4035ee1 100644
--- a/libcutils/debugger.c
+++ b/libcutils/debugger.c
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include <stdbool.h>
+#include <fcntl.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -21,70 +24,129 @@
 #include <cutils/debugger.h>
 #include <cutils/sockets.h>
 
-int dump_tombstone(pid_t tid, char* pathbuf, size_t pathlen) {
-    int s = socket_local_client(DEBUGGER_SOCKET_NAME,
-            ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_STREAM);
-    if (s < 0) {
-        return -1;
-    }
+#if defined(__LP64__)
+#include <elf.h>
 
-    debugger_msg_t msg;
-    memset(&msg, 0, sizeof(msg));
+static bool is32bit(pid_t tid) {
+  char* exeline;
+  if (asprintf(&exeline, "/proc/%d/exe", tid) == -1) {
+    return false;
+  }
+  int fd = open(exeline, O_RDONLY | O_CLOEXEC);
+  free(exeline);
+  if (fd == -1) {
+    return false;
+  }
+
+  char ehdr[EI_NIDENT];
+  ssize_t bytes = read(fd, &ehdr, sizeof(ehdr));
+  close(fd);
+  if (bytes != (ssize_t) sizeof(ehdr) || memcmp(ELFMAG, ehdr, SELFMAG) != 0) {
+    return false;
+  }
+  if (ehdr[EI_CLASS] == ELFCLASS32) {
+    return true;
+  }
+  return false;
+}
+#endif
+
+static int send_request(int sock_fd, void* msg_ptr, size_t msg_len) {
+  int result = 0;
+  if (TEMP_FAILURE_RETRY(write(sock_fd, msg_ptr, msg_len)) != (ssize_t) msg_len) {
+    result = -1;
+  } else {
+    char ack;
+    if (TEMP_FAILURE_RETRY(read(sock_fd, &ack, 1)) != 1) {
+      result = -1;
+    }
+  }
+  return result;
+}
+
+static int make_dump_request(debugger_action_t action, pid_t tid) {
+  const char* socket_name;
+  debugger_msg_t msg;
+  size_t msg_len;
+  void* msg_ptr;
+
+#if defined(__LP64__)
+  debugger32_msg_t msg32;
+  if (is32bit(tid)) {
+    msg_len = sizeof(debugger32_msg_t);
+    memset(&msg32, 0, msg_len);
+    msg32.tid = tid;
+    msg32.action = action;
+    msg_ptr = &msg32;
+
+    socket_name = DEBUGGER32_SOCKET_NAME;
+  } else
+#endif
+  {
+    msg_len = sizeof(debugger_msg_t);
+    memset(&msg, 0, msg_len);
     msg.tid = tid;
-    msg.action = DEBUGGER_ACTION_DUMP_TOMBSTONE;
+    msg.action = action;
+    msg_ptr = &msg;
 
-    int result = 0;
-    if (TEMP_FAILURE_RETRY(write(s, &msg, sizeof(msg))) != sizeof(msg)) {
-        result = -1;
-    } else {
-        char ack;
-        if (TEMP_FAILURE_RETRY(read(s, &ack, 1)) != 1) {
-            result = -1;
-        } else {
-            if (pathbuf && pathlen) {
-                ssize_t n = TEMP_FAILURE_RETRY(read(s, pathbuf, pathlen - 1));
-                if (n <= 0) {
-                    result = -1;
-                } else {
-                    pathbuf[n] = '\0';
-                }
-            }
-        }
-    }
-    TEMP_FAILURE_RETRY(close(s));
-    return result;
+    socket_name = DEBUGGER_SOCKET_NAME;
+  }
+
+  int sock_fd = socket_local_client(socket_name, ANDROID_SOCKET_NAMESPACE_ABSTRACT,
+      SOCK_STREAM | SOCK_CLOEXEC);
+  if (sock_fd < 0) {
+    return -1;
+  }
+
+  if (send_request(sock_fd, msg_ptr, msg_len) < 0) {
+    TEMP_FAILURE_RETRY(close(sock_fd));
+    return -1;
+  }
+
+  return sock_fd;
 }
 
 int dump_backtrace_to_file(pid_t tid, int fd) {
-    int s = socket_local_client(DEBUGGER_SOCKET_NAME,
-            ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_STREAM);
-    if (s < 0) {
-        return -1;
-    }
+  int sock_fd = make_dump_request(DEBUGGER_ACTION_DUMP_BACKTRACE, tid);
+  if (sock_fd < 0) {
+    return -1;
+  }
 
-    debugger_msg_t msg;
-    memset(&msg, 0, sizeof(msg));
-    msg.tid = tid;
-    msg.action = DEBUGGER_ACTION_DUMP_BACKTRACE;
-
-    int result = 0;
-    if (TEMP_FAILURE_RETRY(write(s, &msg, sizeof(msg))) != sizeof(msg)) {
-        result = -1;
-    } else {
-        char ack;
-        if (TEMP_FAILURE_RETRY(read(s, &ack, 1)) != 1) {
-            result = -1;
-        } else {
-            char buffer[4096];
-            ssize_t n;
-            while ((n = TEMP_FAILURE_RETRY(read(s, buffer, sizeof(buffer)))) > 0) {
-                if (TEMP_FAILURE_RETRY(write(fd, buffer, n)) != n) {
-                    result = -1;
-                    break;
-                }
-            }
-        }
+  /* Write the data read from the socket to the fd. */
+  int result = 0;
+  char buffer[1024];
+  ssize_t n;
+  while ((n = TEMP_FAILURE_RETRY(read(sock_fd, buffer, sizeof(buffer)))) > 0) {
+    if (TEMP_FAILURE_RETRY(write(fd, buffer, n)) != n) {
+      result = -1;
+      break;
     }
-    TEMP_FAILURE_RETRY(close(s));
-    return result;
+  }
+  TEMP_FAILURE_RETRY(close(sock_fd));
+  return result;
+}
+
+int dump_tombstone(pid_t tid, char* pathbuf, size_t pathlen) {
+  int sock_fd = make_dump_request(DEBUGGER_ACTION_DUMP_TOMBSTONE, tid);
+  if (sock_fd < 0) {
+    return -1;
+  }
+
+  /* Read the tombstone file name. */
+  char buffer[100]; /* This is larger than the largest tombstone path. */
+  int result = 0;
+  ssize_t n = TEMP_FAILURE_RETRY(read(sock_fd, buffer, sizeof(buffer) - 1));
+  if (n <= 0) {
+    result = -1;
+  } else {
+    if (pathbuf && pathlen) {
+      if (n >= (ssize_t) pathlen) {
+        n = pathlen - 1;
+      }
+      buffer[n] = '\0';
+      memcpy(pathbuf, buffer, n + 1);
+    }
+  }
+  TEMP_FAILURE_RETRY(close(sock_fd));
+  return result;
 }
diff --git a/libcutils/tests/Android.mk b/libcutils/tests/Android.mk
index 8e65310..76db5b1 100644
--- a/libcutils/tests/Android.mk
+++ b/libcutils/tests/Android.mk
@@ -19,6 +19,7 @@
     PropertiesTest.cpp \
 
 include $(CLEAR_VARS)
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_MODULE := libcutils_test
 LOCAL_SRC_FILES := $(test_src_files)
 LOCAL_SHARED_LIBRARIES := \
@@ -32,6 +33,7 @@
 include $(BUILD_NATIVE_TEST)
 
 include $(CLEAR_VARS)
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_MODULE := libcutils_test_static
 LOCAL_FORCE_STATIC_EXECUTABLE := true
 LOCAL_SRC_FILES := $(test_src_files)
@@ -39,9 +41,9 @@
     libc \
     libcutils \
     liblog \
-    libstlport_static \
     libutils \
 
+LOCAL_CXX_STL := stlport_static
 LOCAL_MULTILIB := both
 LOCAL_MODULE_STEM_32 := $(LOCAL_MODULE)32
 LOCAL_MODULE_STEM_64 := $(LOCAL_MODULE)64
diff --git a/liblog/logprint.c b/liblog/logprint.c
index 08e830a..244f723 100644
--- a/liblog/logprint.c
+++ b/liblog/logprint.c
@@ -21,10 +21,12 @@
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/param.h>
 
 #include <log/logd.h>
 #include <log/logprint.h>
@@ -39,8 +41,23 @@
     android_LogPriority global_pri;
     FilterInfo *filters;
     AndroidLogPrintFormat format;
+    bool colored_output;
 };
 
+/*
+ *  gnome-terminal color tags
+ *    See http://misc.flogisoft.com/bash/tip_colors_and_formatting
+ *    for ideas on how to set the forground color of the text for xterm.
+ *    The color manipulation character stream is defined as:
+ *      ESC [ 3 8 ; 5 ; <color#> m
+ */
+#define ANDROID_COLOR_BLUE     75
+#define ANDROID_COLOR_DEFAULT 231
+#define ANDROID_COLOR_GREEN    40
+#define ANDROID_COLOR_ORANGE  166
+#define ANDROID_COLOR_RED     196
+#define ANDROID_COLOR_YELLOW  226
+
 static FilterInfo * filterinfo_new(const char * tag, android_LogPriority pri)
 {
     FilterInfo *p_ret;
@@ -110,6 +127,23 @@
     }
 }
 
+static int colorFromPri (android_LogPriority pri)
+{
+    switch (pri) {
+        case ANDROID_LOG_VERBOSE:       return ANDROID_COLOR_DEFAULT;
+        case ANDROID_LOG_DEBUG:         return ANDROID_COLOR_BLUE;
+        case ANDROID_LOG_INFO:          return ANDROID_COLOR_GREEN;
+        case ANDROID_LOG_WARN:          return ANDROID_COLOR_ORANGE;
+        case ANDROID_LOG_ERROR:         return ANDROID_COLOR_RED;
+        case ANDROID_LOG_FATAL:         return ANDROID_COLOR_RED;
+        case ANDROID_LOG_SILENT:        return ANDROID_COLOR_DEFAULT;
+
+        case ANDROID_LOG_DEFAULT:
+        case ANDROID_LOG_UNKNOWN:
+        default:                        return ANDROID_COLOR_DEFAULT;
+    }
+}
+
 static android_LogPriority filterPriForTag(
         AndroidLogFormat *p_format, const char *tag)
 {
@@ -149,6 +183,7 @@
 
     p_ret->global_pri = ANDROID_LOG_VERBOSE;
     p_ret->format = FORMAT_BRIEF;
+    p_ret->colored_output = false;
 
     return p_ret;
 }
@@ -174,7 +209,10 @@
 void android_log_setPrintFormat(AndroidLogFormat *p_format,
         AndroidLogPrintFormat format)
 {
-    p_format->format=format;
+    if (format == FORMAT_COLOR)
+        p_format->colored_output = true;
+    else
+        p_format->format = format;
 }
 
 /**
@@ -192,6 +230,7 @@
     else if (strcmp(formatString, "time") == 0) format = FORMAT_TIME;
     else if (strcmp(formatString, "threadtime") == 0) format = FORMAT_THREADTIME;
     else if (strcmp(formatString, "long") == 0) format = FORMAT_LONG;
+    else if (strcmp(formatString, "color") == 0) format = FORMAT_COLOR;
     else format = FORMAT_OFF;
 
     return format;
@@ -698,6 +737,8 @@
     char * ret = NULL;
 
     priChar = filterPriToChar(entry->priority);
+    size_t prefixLen = 0, suffixLen = 0;
+    size_t len;
 
     /*
      * Get the current date/time in pretty form
@@ -719,73 +760,80 @@
     /*
      * Construct a buffer containing the log header and log message.
      */
-    size_t prefixLen, suffixLen;
+    if (p_format->colored_output) {
+        prefixLen = snprintf(prefixBuf, sizeof(prefixBuf), "\x1B[38;5;%dm",
+                             colorFromPri(entry->priority));
+        prefixLen = MIN(prefixLen, sizeof(prefixBuf));
+        suffixLen = snprintf(suffixBuf, sizeof(suffixBuf), "\x1B[0m");
+        suffixLen = MIN(suffixLen, sizeof(suffixBuf));
+    }
 
     switch (p_format->format) {
         case FORMAT_TAG:
-            prefixLen = snprintf(prefixBuf, sizeof(prefixBuf),
+            len = snprintf(prefixBuf + prefixLen, sizeof(prefixBuf) - prefixLen,
                 "%c/%-8s: ", priChar, entry->tag);
-            strcpy(suffixBuf, "\n"); suffixLen = 1;
+            strcpy(suffixBuf + suffixLen, "\n");
+            ++suffixLen;
             break;
         case FORMAT_PROCESS:
-            prefixLen = snprintf(prefixBuf, sizeof(prefixBuf),
-                "%c(%5d) ", priChar, entry->pid);
-            suffixLen = snprintf(suffixBuf, sizeof(suffixBuf),
+            len = snprintf(suffixBuf + suffixLen, sizeof(suffixBuf) - suffixLen,
                 "  (%s)\n", entry->tag);
+            suffixLen += MIN(len, sizeof(suffixBuf) - suffixLen);
+            len = snprintf(prefixBuf + prefixLen, sizeof(prefixBuf) - prefixLen,
+                "%c(%5d) ", priChar, entry->pid);
             break;
         case FORMAT_THREAD:
-            prefixLen = snprintf(prefixBuf, sizeof(prefixBuf),
+            len = snprintf(prefixBuf + prefixLen, sizeof(prefixBuf) - prefixLen,
                 "%c(%5d:%5d) ", priChar, entry->pid, entry->tid);
-            strcpy(suffixBuf, "\n");
-            suffixLen = 1;
+            strcpy(suffixBuf + suffixLen, "\n");
+            ++suffixLen;
             break;
         case FORMAT_RAW:
-            prefixBuf[0] = 0;
-            prefixLen = 0;
-            strcpy(suffixBuf, "\n");
-            suffixLen = 1;
+            prefixBuf[prefixLen] = 0;
+            len = 0;
+            strcpy(suffixBuf + suffixLen, "\n");
+            ++suffixLen;
             break;
         case FORMAT_TIME:
-            prefixLen = snprintf(prefixBuf, sizeof(prefixBuf),
+            len = snprintf(prefixBuf + prefixLen, sizeof(prefixBuf) - prefixLen,
                 "%s.%03ld %c/%-8s(%5d): ", timeBuf, entry->tv_nsec / 1000000,
                 priChar, entry->tag, entry->pid);
-            strcpy(suffixBuf, "\n");
-            suffixLen = 1;
+            strcpy(suffixBuf + suffixLen, "\n");
+            ++suffixLen;
             break;
         case FORMAT_THREADTIME:
-            prefixLen = snprintf(prefixBuf, sizeof(prefixBuf),
+            len = snprintf(prefixBuf + prefixLen, sizeof(prefixBuf) - prefixLen,
                 "%s.%03ld %5d %5d %c %-8s: ", timeBuf, entry->tv_nsec / 1000000,
                 entry->pid, entry->tid, priChar, entry->tag);
-            strcpy(suffixBuf, "\n");
-            suffixLen = 1;
+            strcpy(suffixBuf + suffixLen, "\n");
+            ++suffixLen;
             break;
         case FORMAT_LONG:
-            prefixLen = snprintf(prefixBuf, sizeof(prefixBuf),
+            len = snprintf(prefixBuf + prefixLen, sizeof(prefixBuf) - prefixLen,
                 "[ %s.%03ld %5d:%5d %c/%-8s ]\n",
                 timeBuf, entry->tv_nsec / 1000000, entry->pid,
                 entry->tid, priChar, entry->tag);
-            strcpy(suffixBuf, "\n\n");
-            suffixLen = 2;
+            strcpy(suffixBuf + suffixLen, "\n\n");
+            suffixLen += 2;
             prefixSuffixIsHeaderFooter = 1;
             break;
         case FORMAT_BRIEF:
         default:
-            prefixLen = snprintf(prefixBuf, sizeof(prefixBuf),
+            len = snprintf(prefixBuf + prefixLen, sizeof(prefixBuf) - prefixLen,
                 "%c/%-8s(%5d): ", priChar, entry->tag, entry->pid);
-            strcpy(suffixBuf, "\n");
-            suffixLen = 1;
+            strcpy(suffixBuf + suffixLen, "\n");
+            ++suffixLen;
             break;
     }
+
     /* snprintf has a weird return value.   It returns what would have been
      * written given a large enough buffer.  In the case that the prefix is
      * longer then our buffer(128), it messes up the calculations below
      * possibly causing heap corruption.  To avoid this we double check and
      * set the length at the maximum (size minus null byte)
      */
-    if(prefixLen >= sizeof(prefixBuf))
-        prefixLen = sizeof(prefixBuf) - 1;
-    if(suffixLen >= sizeof(suffixBuf))
-        suffixLen = sizeof(suffixBuf) - 1;
+    prefixLen += MIN(len, sizeof(prefixBuf) - prefixLen);
+    suffixLen = MIN(suffixLen, sizeof(suffixLen));
 
     /* the following code is tragically unreadable */
 
diff --git a/liblog/tests/Android.mk b/liblog/tests/Android.mk
index 6c72138..b1426d3 100644
--- a/liblog/tests/Android.mk
+++ b/liblog/tests/Android.mk
@@ -43,11 +43,8 @@
 LOCAL_CFLAGS += $(benchmark_c_flags)
 LOCAL_SHARED_LIBRARIES += liblog libm
 LOCAL_SRC_FILES := $(benchmark_src_files)
-ifndef LOCAL_SDK_VERSION
-LOCAL_C_INCLUDES += bionic bionic/libstdc++/include external/stlport/stlport
-LOCAL_SHARED_LIBRARIES += libstlport
-endif
 LOCAL_MODULE_PATH := $(TARGET_OUT_DATA_NATIVE_TESTS)/$(LOCAL_MODULE)
+include external/stlport/libstlport.mk
 include $(BUILD_EXECUTABLE)
 
 # -----------------------------------------------------------------------------
@@ -59,7 +56,8 @@
     -g \
     -Wall -Wextra \
     -Werror \
-    -fno-builtin
+    -fno-builtin \
+    -std=gnu++11
 
 test_src_files := \
     liblog_test.cpp
diff --git a/libnativebridge/Android.mk b/libnativebridge/Android.mk
index 9403fd2..6c2e43e 100644
--- a/libnativebridge/Android.mk
+++ b/libnativebridge/Android.mk
@@ -13,7 +13,7 @@
 LOCAL_SHARED_LIBRARIES := liblog
 LOCAL_CLANG := true
 LOCAL_CPP_EXTENSION := .cc
-LOCAL_CFLAGS := -Werror
+LOCAL_CFLAGS := -Werror -Wall
 LOCAL_CPPFLAGS := -std=gnu++11 -fvisibility=protected
 LOCAL_LDFLAGS := -ldl
 LOCAL_MULTILIB := both
@@ -30,9 +30,11 @@
 LOCAL_SHARED_LIBRARIES := liblog
 LOCAL_CLANG := true
 LOCAL_CPP_EXTENSION := .cc
-LOCAL_CFLAGS := -Werror
+LOCAL_CFLAGS := -Werror -Wall
 LOCAL_CPPFLAGS := -std=gnu++11 -fvisibility=protected
 LOCAL_LDFLAGS := -ldl
 LOCAL_MULTILIB := both
 
 include $(BUILD_HOST_SHARED_LIBRARY)
+
+include $(LOCAL_PATH)/tests/Android.mk
\ No newline at end of file
diff --git a/libnativebridge/native_bridge.cc b/libnativebridge/native_bridge.cc
index 2205f45..d460f6f 100644
--- a/libnativebridge/native_bridge.cc
+++ b/libnativebridge/native_bridge.cc
@@ -16,32 +16,77 @@
 
 #include "nativebridge/native_bridge.h"
 
+#include <cstring>
 #include <cutils/log.h>
 #include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
 #include <stdio.h>
-#include "utils/Mutex.h"
+#include <sys/mount.h>
+#include <sys/stat.h>
 
 
 namespace android {
 
-static Mutex native_bridge_lock("native bridge lock");
+// Environment values required by the apps running with native bridge.
+struct NativeBridgeRuntimeValues {
+    const char* os_arch;
+    const char* cpu_abi;
+    const char* cpu_abi2;
+    const char* *supported_abis;
+    int32_t abi_count;
+};
 
 // The symbol name exposed by native-bridge with the type of NativeBridgeCallbacks.
 static constexpr const char* kNativeBridgeInterfaceSymbol = "NativeBridgeItf";
 
-// The filename of the library we are supposed to load.
-static const char* native_bridge_library_filename = nullptr;
+enum class NativeBridgeState {
+  kNotSetup,                        // Initial state.
+  kOpened,                          // After successful dlopen.
+  kInitialized,                     // After successful initialization.
+  kClosed                           // Closed or errors.
+};
 
-// Whether a native bridge is available (loaded and ready).
-static bool available = false;
-// Whether we have already initialized (or tried to).
-static bool initialized = false;
+static const char* kNotSetupString = "kNotSetup";
+static const char* kOpenedString = "kOpened";
+static const char* kInitializedString = "kInitialized";
+static const char* kClosedString = "kClosed";
+
+static const char* GetNativeBridgeStateString(NativeBridgeState state) {
+  switch (state) {
+    case NativeBridgeState::kNotSetup:
+      return kNotSetupString;
+
+    case NativeBridgeState::kOpened:
+      return kOpenedString;
+
+    case NativeBridgeState::kInitialized:
+      return kInitializedString;
+
+    case NativeBridgeState::kClosed:
+      return kClosedString;
+  }
+}
+
+// Current state of the native bridge.
+static NativeBridgeState state = NativeBridgeState::kNotSetup;
+
 // Whether we had an error at some point.
 static bool had_error = false;
 
+// Handle of the loaded library.
+static void* native_bridge_handle = nullptr;
+// Pointer to the callbacks. Available as soon as LoadNativeBridge succeeds, but only initialized
+// later.
 static NativeBridgeCallbacks* callbacks = nullptr;
+// Callbacks provided by the environment to the bridge. Passed to LoadNativeBridge.
 static const NativeBridgeRuntimeCallbacks* runtime_callbacks = nullptr;
 
+// The app's data directory.
+static char* app_data_dir = nullptr;
+
+static constexpr uint32_t kNativeBridgeCallbackVersion = 1;
+
 // Characters allowed in a native bridge filename. The first character must
 // be in [a-zA-Z] (expected 'l' for "libx"). The rest must be in [a-zA-Z0-9._-].
 static bool CharacterAllowed(char c, bool first) {
@@ -83,81 +128,315 @@
   }
 }
 
-void SetupNativeBridge(const char* nb_library_filename,
-                       const NativeBridgeRuntimeCallbacks* runtime_cbs) {
-  Mutex::Autolock auto_lock(native_bridge_lock);
-
-  if (initialized || native_bridge_library_filename != nullptr) {
-    // Setup has been called before. Ignore this call.
-    ALOGW("Called SetupNativeBridge for an already set up native bridge.");
-    // Note: counts as an error, even though the bridge may be functional.
-    had_error = true;
-    return;
-  }
-
-  runtime_callbacks = runtime_cbs;
-
-  if (nb_library_filename == nullptr) {
-    available = false;
-    initialized = true;
-  } else {
-    // Check whether it's an empty string.
-    if (*nb_library_filename == 0) {
-      available = false;
-      initialized = true;
-    } else if (!NativeBridgeNameAcceptable(nb_library_filename)) {
-      available = false;
-      initialized = true;
-      had_error = true;
-    }
-
-    if (!initialized) {
-      // Didn't find a name error or empty string, assign it.
-      native_bridge_library_filename = nb_library_filename;
-    }
-  }
+static bool VersionCheck(NativeBridgeCallbacks* cb) {
+  return cb != nullptr && cb->version == kNativeBridgeCallbackVersion;
 }
 
-static bool NativeBridgeInitialize() {
-  Mutex::Autolock auto_lock(native_bridge_lock);
+bool LoadNativeBridge(const char* nb_library_filename,
+                      const NativeBridgeRuntimeCallbacks* runtime_cbs) {
+  // We expect only one place that calls LoadNativeBridge: Runtime::Init. At that point we are not
+  // multi-threaded, so we do not need locking here.
 
-  if (initialized) {
-    // Somebody did it before.
-    return available;
-  }
-
-  available = false;
-
-  if (native_bridge_library_filename == nullptr) {
-    // Called initialize without setup. dlopen has special semantics for nullptr input.
-    // So just call it a day here. This counts as an error.
-    initialized = true;
+  if (state != NativeBridgeState::kNotSetup) {
+    // Setup has been called before. Ignore this call.
+    if (nb_library_filename != nullptr) {  // Avoids some log-spam for dalvikvm.
+      ALOGW("Called LoadNativeBridge for an already set up native bridge. State is %s.",
+            GetNativeBridgeStateString(state));
+    }
+    // Note: counts as an error, even though the bridge may be functional.
     had_error = true;
     return false;
   }
 
-  void* handle = dlopen(native_bridge_library_filename, RTLD_LAZY);
-  if (handle != nullptr) {
-    callbacks = reinterpret_cast<NativeBridgeCallbacks*>(dlsym(handle,
-                                                               kNativeBridgeInterfaceSymbol));
-
-    if (callbacks != nullptr) {
-      available = callbacks->initialize(runtime_callbacks);
-    }
-
-    if (!available) {
-      // If we fail initialization, this counts as an error.
-      had_error = true;
-      dlclose(handle);
-    }
+  if (nb_library_filename == nullptr || *nb_library_filename == 0) {
+    state = NativeBridgeState::kClosed;
+    return true;
   } else {
-    // Being unable to open the library counts as an error.
-    had_error = true;
+    if (!NativeBridgeNameAcceptable(nb_library_filename)) {
+      state = NativeBridgeState::kClosed;
+      had_error = true;
+    } else {
+      // Try to open the library.
+      void* handle = dlopen(nb_library_filename, RTLD_LAZY);
+      if (handle != nullptr) {
+        callbacks = reinterpret_cast<NativeBridgeCallbacks*>(dlsym(handle,
+                                                                   kNativeBridgeInterfaceSymbol));
+        if (callbacks != nullptr) {
+          if (VersionCheck(callbacks)) {
+            // Store the handle for later.
+            native_bridge_handle = handle;
+          } else {
+            callbacks = nullptr;
+            dlclose(handle);
+            ALOGW("Unsupported native bridge interface.");
+          }
+        } else {
+          dlclose(handle);
+        }
+      }
+
+      // Two failure conditions: could not find library (dlopen failed), or could not find native
+      // bridge interface (dlsym failed). Both are an error and close the native bridge.
+      if (callbacks == nullptr) {
+        had_error = true;
+        state = NativeBridgeState::kClosed;
+      } else {
+        runtime_callbacks = runtime_cbs;
+        state = NativeBridgeState::kOpened;
+      }
+    }
+    return state == NativeBridgeState::kOpened;
+  }
+}
+
+#if defined(__arm__)
+static const char* kRuntimeISA = "arm";
+#elif defined(__aarch64__)
+static const char* kRuntimeISA = "arm64";
+#elif defined(__mips__)
+static const char* kRuntimeISA = "mips";
+#elif defined(__i386__)
+static const char* kRuntimeISA = "x86";
+#elif defined(__x86_64__)
+static const char* kRuntimeISA = "x86_64";
+#else
+static const char* kRuntimeISA = "unknown";
+#endif
+
+
+bool NeedsNativeBridge(const char* instruction_set) {
+  if (instruction_set == nullptr) {
+    ALOGE("Null instruction set in NeedsNativeBridge.");
+    return false;
+  }
+  return strncmp(instruction_set, kRuntimeISA, strlen(kRuntimeISA) + 1) != 0;
+}
+
+#ifdef __APPLE__
+template<typename T> void UNUSED(const T&) {}
+#endif
+
+void PreInitializeNativeBridge(const char* app_data_dir_in, const char* instruction_set) {
+  if (app_data_dir_in == nullptr) {
+    return;
   }
 
-  initialized = true;
+  const size_t len = strlen(app_data_dir_in);
+  // Make a copy for us.
+  app_data_dir = new char[len];
+  strncpy(app_data_dir, app_data_dir_in, len);
 
-  return available;
+#ifndef __APPLE__
+  if (instruction_set == nullptr) {
+    return;
+  }
+  size_t isa_len = strlen(instruction_set);
+  if (isa_len > 10) {
+    // 10 is a loose upper bound on the currently known instruction sets (a tight bound is 7 for
+    // x86_64 [including the trailing \0]). This is so we don't have to change here if there will
+    // be another instruction set in the future.
+    ALOGW("Instruction set %s is malformed, must be less than or equal to 10 characters.",
+          instruction_set);
+    return;
+  }
+
+  // Bind-mount /system/lib{,64}/<isa>/cpuinfo to /proc/cpuinfo. If the file does not exist, the
+  // mount command will fail, so we safe the extra file existence check...
+  char cpuinfo_path[1024];
+
+#ifdef HAVE_ANDROID_OS
+  snprintf(cpuinfo_path, sizeof(cpuinfo_path), "/system/lib"
+#ifdef __LP64__
+      "64"
+#endif  // __LP64__
+      "/%s/cpuinfo", instruction_set);
+#else   // !HAVE_ANDROID_OS
+  // To be able to test on the host, we hardwire a relative path.
+  snprintf(cpuinfo_path, sizeof(cpuinfo_path), "./cpuinfo");
+#endif
+
+  // Bind-mount.
+  if (TEMP_FAILURE_RETRY(mount(cpuinfo_path,        // Source.
+                               "/proc/cpuinfo",     // Target.
+                               nullptr,             // FS type.
+                               MS_BIND,             // Mount flags: bind mount.
+                               nullptr)) == -1) {   // "Data."
+    ALOGW("Failed to bind-mount %s as /proc/cpuinfo: %s", cpuinfo_path, strerror(errno));
+  }
+#else
+  UNUSED(instruction_set);
+  ALOGW("Mac OS does not support bind-mounting. Host simulation of native bridge impossible.");
+#endif
+}
+
+static void SetCpuAbi(JNIEnv* env, jclass build_class, const char* field, const char* value) {
+  if (value != nullptr) {
+    jfieldID field_id = env->GetStaticFieldID(build_class, field, "Ljava/lang/String;");
+    if (field_id == nullptr) {
+      env->ExceptionClear();
+      ALOGW("Could not find %s field.", field);
+      return;
+    }
+
+    jstring str = env->NewStringUTF(value);
+    if (str == nullptr) {
+      env->ExceptionClear();
+      ALOGW("Could not create string %s.", value);
+      return;
+    }
+
+    env->SetStaticObjectField(build_class, field_id, str);
+  }
+}
+
+static void SetSupportedAbis(JNIEnv* env, jclass build_class, const char* field,
+                             const char* *values, int32_t value_count) {
+  if (value_count < 0) {
+    return;
+  }
+  if (values == nullptr && value_count > 0) {
+    ALOGW("More than zero values expected: %d.", value_count);
+    return;
+  }
+
+  jfieldID field_id = env->GetStaticFieldID(build_class, field, "[Ljava/lang/String;");
+  if (field_id != nullptr) {
+    // Create the array.
+    jobjectArray array = env->NewObjectArray(value_count, env->FindClass("java/lang/String"),
+                                             nullptr);
+    if (array == nullptr) {
+      env->ExceptionClear();
+      ALOGW("Could not create array.");
+      return;
+    }
+
+    // Fill the array.
+    for (int32_t i = 0; i < value_count; i++) {
+      jstring str = env->NewStringUTF(values[i]);
+      if (str == nullptr) {
+        env->ExceptionClear();
+        ALOGW("Could not create string %s.", values[i]);
+        return;
+      }
+
+      env->SetObjectArrayElement(array, i, str);
+    }
+
+    env->SetStaticObjectField(build_class, field_id, array);
+  } else {
+    env->ExceptionClear();
+    ALOGW("Could not find %s field.", field);
+  }
+}
+
+// Set up the environment for the bridged app.
+static void SetupEnvironment(NativeBridgeCallbacks* callbacks, JNIEnv* env, const char* isa) {
+  // Need a JNIEnv* to do anything.
+  if (env == nullptr) {
+    ALOGW("No JNIEnv* to set up app environment.");
+    return;
+  }
+
+  // Query the bridge for environment values.
+  const struct NativeBridgeRuntimeValues* env_values = callbacks->getAppEnv(isa);
+  if (env_values == nullptr) {
+    return;
+  }
+
+  // Keep the JNIEnv clean.
+  jint success = env->PushLocalFrame(16);  // That should be small and large enough.
+  if (success < 0) {
+    // Out of memory, really borked.
+    ALOGW("Out of memory while setting up app environment.");
+    env->ExceptionClear();
+    return;
+  }
+
+  // Reset CPU_ABI & CPU_ABI2 to values required by the apps running with native bridge.
+  if (env_values->cpu_abi != nullptr || env_values->cpu_abi2 != nullptr ||
+      env_values->abi_count >= 0) {
+    jclass bclass_id = env->FindClass("android/os/Build");
+    if (bclass_id != nullptr) {
+      SetCpuAbi(env, bclass_id, "CPU_ABI", env_values->cpu_abi);
+      SetCpuAbi(env, bclass_id, "CPU_ABI2", env_values->cpu_abi2);
+
+      SetSupportedAbis(env, bclass_id, "SUPPORTED_ABIS", env_values->supported_abis,
+                       env_values->abi_count);
+    } else {
+      // For example in a host test environment.
+      env->ExceptionClear();
+      ALOGW("Could not find Build class.");
+    }
+  }
+
+  if (env_values->os_arch != nullptr) {
+    jclass sclass_id = env->FindClass("java/lang/System");
+    if (sclass_id != nullptr) {
+      jmethodID set_prop_id = env->GetStaticMethodID(sclass_id, "setProperty",
+          "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;");
+      if (set_prop_id != nullptr) {
+        // Reset os.arch to the value reqired by the apps running with native bridge.
+        env->CallStaticObjectMethod(sclass_id, set_prop_id, env->NewStringUTF("os.arch"),
+            env->NewStringUTF(env_values->os_arch));
+      } else {
+        env->ExceptionClear();
+        ALOGW("Could not find setProperty method.");
+      }
+    } else {
+      env->ExceptionClear();
+      ALOGW("Could not find System class.");
+    }
+  }
+
+  // Make it pristine again.
+  env->PopLocalFrame(nullptr);
+}
+
+bool InitializeNativeBridge(JNIEnv* env, const char* instruction_set) {
+  // We expect only one place that calls InitializeNativeBridge: Runtime::DidForkFromZygote. At that
+  // point we are not multi-threaded, so we do not need locking here.
+
+  if (state == NativeBridgeState::kOpened) {
+    // Try to initialize.
+    if (callbacks->initialize(runtime_callbacks, app_data_dir, instruction_set)) {
+      SetupEnvironment(callbacks, env, instruction_set);
+      state = NativeBridgeState::kInitialized;
+    } else {
+      // Unload the library.
+      dlclose(native_bridge_handle);
+      had_error = true;
+      state = NativeBridgeState::kClosed;
+    }
+  } else {
+    had_error = true;
+    state = NativeBridgeState::kClosed;
+  }
+
+  return state == NativeBridgeState::kInitialized;
+}
+
+void UnloadNativeBridge() {
+  // We expect only one place that calls UnloadNativeBridge: Runtime::DidForkFromZygote. At that
+  // point we are not multi-threaded, so we do not need locking here.
+
+  switch(state) {
+    case NativeBridgeState::kOpened:
+    case NativeBridgeState::kInitialized:
+      // Unload.
+      dlclose(native_bridge_handle);
+      break;
+
+    case NativeBridgeState::kNotSetup:
+      // Not even set up. Error.
+      had_error = true;
+      break;
+
+    case NativeBridgeState::kClosed:
+      // Ignore.
+      break;
+  }
+
+  state = NativeBridgeState::kClosed;
 }
 
 bool NativeBridgeError() {
@@ -165,11 +444,17 @@
 }
 
 bool NativeBridgeAvailable() {
-  return NativeBridgeInitialize();
+  return state == NativeBridgeState::kOpened || state == NativeBridgeState::kInitialized;
+}
+
+bool NativeBridgeInitialized() {
+  // Calls of this are supposed to happen in a state where the native bridge is stable, i.e., after
+  // Runtime::DidForkFromZygote. In that case we do not need a lock.
+  return state == NativeBridgeState::kInitialized;
 }
 
 void* NativeBridgeLoadLibrary(const char* libpath, int flag) {
-  if (NativeBridgeInitialize()) {
+  if (NativeBridgeInitialized()) {
     return callbacks->loadLibrary(libpath, flag);
   }
   return nullptr;
@@ -177,14 +462,14 @@
 
 void* NativeBridgeGetTrampoline(void* handle, const char* name, const char* shorty,
                                 uint32_t len) {
-  if (NativeBridgeInitialize()) {
+  if (NativeBridgeInitialized()) {
     return callbacks->getTrampoline(handle, name, shorty, len);
   }
   return nullptr;
 }
 
 bool NativeBridgeIsSupported(const char* libpath) {
-  if (NativeBridgeInitialize()) {
+  if (NativeBridgeInitialized()) {
     return callbacks->isSupported(libpath);
   }
   return false;
diff --git a/libnativebridge/tests/Android.mk b/libnativebridge/tests/Android.mk
index f58b8f7..a94d2f7 100644
--- a/libnativebridge/tests/Android.mk
+++ b/libnativebridge/tests/Android.mk
@@ -5,6 +5,8 @@
 # Build the unit tests.
 test_src_files := \
     InvalidCharsNativeBridge_test.cpp \
+    NeedsNativeBridge_test.cpp \
+    PreInitializeNativeBridge_test.cpp \
     ReSetupNativeBridge_test.cpp \
     UnavailableNativeBridge_test.cpp \
     ValidNameNativeBridge_test.cpp
diff --git a/libnativebridge/tests/InvalidCharsNativeBridge_test.cpp b/libnativebridge/tests/InvalidCharsNativeBridge_test.cpp
index f37e9c1..8f7973d 100644
--- a/libnativebridge/tests/InvalidCharsNativeBridge_test.cpp
+++ b/libnativebridge/tests/InvalidCharsNativeBridge_test.cpp
@@ -23,7 +23,7 @@
 TEST_F(NativeBridgeTest, InvalidChars) {
     // Do one test actually calling setup.
     EXPECT_EQ(false, NativeBridgeError());
-    SetupNativeBridge(kTestName, nullptr);
+    LoadNativeBridge(kTestName, nullptr);
     // This should lead to an error for invalid characters.
     EXPECT_EQ(true, NativeBridgeError());
 
diff --git a/libnativebridge/tests/NeedsNativeBridge_test.cpp b/libnativebridge/tests/NeedsNativeBridge_test.cpp
new file mode 100644
index 0000000..e1c0876
--- /dev/null
+++ b/libnativebridge/tests/NeedsNativeBridge_test.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NativeBridgeTest.h"
+
+namespace android {
+
+static const char* kISAs[] = { "arm", "arm64", "mips", "x86", "x86_64", "random", "64arm", "64_x86",
+                               "64_x86_64", "", "reallylongstringabcd", nullptr };
+
+#if defined(__arm__)
+static const char* kRuntimeISA = "arm";
+#elif defined(__aarch64__)
+static const char* kRuntimeISA = "arm64";
+#elif defined(__mips__)
+static const char* kRuntimeISA = "mips";
+#elif defined(__i386__)
+static const char* kRuntimeISA = "x86";
+#elif defined(__x86_64__)
+static const char* kRuntimeISA = "x86_64";
+#else
+static const char* kRuntimeISA = "unknown";
+#endif
+
+TEST_F(NativeBridgeTest, NeedsNativeBridge) {
+    EXPECT_EQ(false, NeedsNativeBridge(kRuntimeISA));
+
+    const size_t kISACount = sizeof(kISAs)/sizeof(kISAs[0]);
+    for (size_t i = 0; i < kISACount; i++) {
+        EXPECT_EQ(kISAs[i] == nullptr ? false : strcmp(kISAs[i], kRuntimeISA) != 0,
+                  NeedsNativeBridge(kISAs[i]));
+    }
+}
+
+}  // namespace android
diff --git a/libnativebridge/tests/PreInitializeNativeBridge_test.cpp b/libnativebridge/tests/PreInitializeNativeBridge_test.cpp
new file mode 100644
index 0000000..9b487d7
--- /dev/null
+++ b/libnativebridge/tests/PreInitializeNativeBridge_test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NativeBridgeTest.h"
+
+#include <cstdio>
+#include <cstring>
+#include <cutils/log.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+
+namespace android {
+
+static constexpr const char* kTestData = "PreInitializeNativeBridge test.";
+
+TEST_F(NativeBridgeTest, PreInitializeNativeBridge) {
+#ifndef __APPLE_         // Mac OS does not support bind-mount.
+#ifndef HAVE_ANDROID_OS  // Cannot write into the hard-wired location.
+    // Try to create our mount namespace.
+    if (unshare(CLONE_NEWNS) != -1) {
+        // Create a dummy file.
+        FILE* cpuinfo = fopen("./cpuinfo", "w");
+        ASSERT_NE(nullptr, cpuinfo) << strerror(errno);
+        fprintf(cpuinfo, kTestData);
+        fclose(cpuinfo);
+
+        // Call the setup.
+        PreInitializeNativeBridge("does not matter 1", "short 2");
+
+        // Read /proc/cpuinfo
+        FILE* proc_cpuinfo = fopen("/proc/cpuinfo", "r");
+        ASSERT_NE(nullptr, proc_cpuinfo) << strerror(errno);
+        char buf[1024];
+        EXPECT_NE(nullptr, fgets(buf, sizeof(buf), proc_cpuinfo)) << "Error reading.";
+        fclose(proc_cpuinfo);
+
+        EXPECT_EQ(0, strcmp(buf, kTestData));
+
+        // Delete the file.
+        ASSERT_EQ(0, unlink("./cpuinfo")) << "Error unlinking temporary file.";
+        // Ending the test will tear down the mount namespace.
+    } else {
+        GTEST_LOG_(WARNING) << "Could not create mount namespace. Are you running this as root?";
+    }
+#endif
+#endif
+}
+
+}  // namespace android
diff --git a/libnativebridge/tests/ReSetupNativeBridge_test.cpp b/libnativebridge/tests/ReSetupNativeBridge_test.cpp
index ef5bfce..944e5d7 100644
--- a/libnativebridge/tests/ReSetupNativeBridge_test.cpp
+++ b/libnativebridge/tests/ReSetupNativeBridge_test.cpp
@@ -18,13 +18,11 @@
 
 namespace android {
 
-static const char* kTestName = "librandom-bridge_not.existing.so";
-
 TEST_F(NativeBridgeTest, ReSetup) {
     EXPECT_EQ(false, NativeBridgeError());
-    SetupNativeBridge(kTestName, nullptr);
+    LoadNativeBridge("", nullptr);
     EXPECT_EQ(false, NativeBridgeError());
-    SetupNativeBridge(kTestName, nullptr);
+    LoadNativeBridge("", nullptr);
     // This should lead to an error for trying to re-setup a native bridge.
     EXPECT_EQ(true, NativeBridgeError());
 }
diff --git a/libnativebridge/tests/UnavailableNativeBridge_test.cpp b/libnativebridge/tests/UnavailableNativeBridge_test.cpp
index 27d1233..ad374a5 100644
--- a/libnativebridge/tests/UnavailableNativeBridge_test.cpp
+++ b/libnativebridge/tests/UnavailableNativeBridge_test.cpp
@@ -20,9 +20,10 @@
 
 TEST_F(NativeBridgeTest, NoNativeBridge) {
     EXPECT_EQ(false, NativeBridgeAvailable());
-    // This should lead to an error for trying to initialize a not-setup
-    // native bridge.
+    // Try to initialize. This should fail as we are not set up.
+    EXPECT_EQ(false, InitializeNativeBridge(nullptr, nullptr));
     EXPECT_EQ(true, NativeBridgeError());
+    EXPECT_EQ(false, NativeBridgeAvailable());
 }
 
 }  // namespace android
diff --git a/libnativebridge/tests/ValidNameNativeBridge_test.cpp b/libnativebridge/tests/ValidNameNativeBridge_test.cpp
index 3e01923..690be4a 100644
--- a/libnativebridge/tests/ValidNameNativeBridge_test.cpp
+++ b/libnativebridge/tests/ValidNameNativeBridge_test.cpp
@@ -21,13 +21,15 @@
 static const char* kTestName = "librandom-bridge_not.existing.so";
 
 TEST_F(NativeBridgeTest, ValidName) {
+    // Check that the name is acceptable.
+    EXPECT_EQ(true, NativeBridgeNameAcceptable(kTestName));
+
+    // Now check what happens on LoadNativeBridge.
     EXPECT_EQ(false, NativeBridgeError());
-    SetupNativeBridge(kTestName, nullptr);
-    EXPECT_EQ(false, NativeBridgeError());
-    EXPECT_EQ(false, NativeBridgeAvailable());
-    // This should lead to an error for trying to initialize a not-existing
-    // native bridge.
+    LoadNativeBridge(kTestName, nullptr);
+    // This will lead to an error as the library doesn't exist.
     EXPECT_EQ(true, NativeBridgeError());
+    EXPECT_EQ(false, NativeBridgeAvailable());
 }
 
 }  // namespace android
diff --git a/libpixelflinger/codeflinger/CodeCache.cpp b/libpixelflinger/codeflinger/CodeCache.cpp
index cfd2b37..d770302 100644
--- a/libpixelflinger/codeflinger/CodeCache.cpp
+++ b/libpixelflinger/codeflinger/CodeCache.cpp
@@ -201,8 +201,8 @@
         mCacheInUse += assemblySize;
         mWhen++;
         // synchronize caches...
-        void* base = assembly->base();
-        void* curr = (uint8_t*)base + assembly->size();
+        char* base = reinterpret_cast<char*>(assembly->base());
+        char* curr = reinterpret_cast<char*>(base + assembly->size());
         __builtin___clear_cache(base, curr);
     }
 
diff --git a/libutils/Android.mk b/libutils/Android.mk
index 0c8625c..b55e635 100644
--- a/libutils/Android.mk
+++ b/libutils/Android.mk
@@ -93,8 +93,7 @@
 LOCAL_CFLAGS += -Werror
 
 LOCAL_C_INCLUDES += \
-		bionic/libc \
-		external/zlib
+	external/zlib
 
 LOCAL_STATIC_LIBRARIES := \
 	libcutils
diff --git a/libutils/Threads.cpp b/libutils/Threads.cpp
index 03fde97..9bcd063 100644
--- a/libutils/Threads.cpp
+++ b/libutils/Threads.cpp
@@ -28,9 +28,6 @@
 # include <pthread.h>
 # include <sched.h>
 # include <sys/resource.h>
-#ifdef HAVE_ANDROID_OS
-# include <private/bionic_pthread.h>
-#endif
 #elif defined(HAVE_WIN32_THREADS)
 # include <windows.h>
 # include <stdint.h>
@@ -855,7 +852,7 @@
     pid_t tid;
     if (mRunning) {
         pthread_t pthread = android_thread_id_t_to_pthread(mThread);
-        tid = __pthread_gettid(pthread);
+        tid = pthread_gettid_np(pthread);
     } else {
         ALOGW("Thread (this=%p): getTid() is undefined before run()", this);
         tid = -1;
diff --git a/libutils/tests/Android.mk b/libutils/tests/Android.mk
index caedaff..ab03d87 100644
--- a/libutils/tests/Android.mk
+++ b/libutils/tests/Android.mk
@@ -1,6 +1,5 @@
 # Build the unit tests.
 LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
 
 # Build the unit tests.
 test_src_files := \
@@ -18,11 +17,6 @@
     liblog \
     libcutils \
     libutils \
-    libstlport
-
-static_libraries := \
-    libgtest \
-    libgtest_main
 
 $(foreach file,$(test_src_files), \
     $(eval include $(CLEAR_VARS)) \
diff --git a/logcat/logcat.cpp b/logcat/logcat.cpp
index 858e56c..b557011 100644
--- a/logcat/logcat.cpp
+++ b/logcat/logcat.cpp
@@ -219,8 +219,8 @@
                     "  -f <filename>   Log to file. Default to stdout\n"
                     "  -r [<kbytes>]   Rotate log every kbytes. (16 if unspecified). Requires -f\n"
                     "  -n <count>      Sets max number of rotated logs to <count>, default 4\n"
-                    "  -v <format>     Sets the log print format, where <format> is one of:\n\n"
-                    "                  brief process tag thread raw time threadtime long\n\n"
+                    "  -v <format>     Sets the log print format, where <format> is:\n\n"
+                    "                  brief color long process raw tag thread threadtime time\n\n"
                     "  -c              clear (flush) the entire log and exit\n"
                     "  -d              dump the log and then exit (don't block)\n"
                     "  -t <count>      print only the most recent <count> lines (implies -d)\n"
@@ -259,7 +259,7 @@
                    "\nIf not specified on the commandline, filterspec is set from ANDROID_LOG_TAGS.\n"
                    "If no filterspec is found, filter defaults to '*:I'\n"
                    "\nIf not specified with -v, format is set from ANDROID_PRINTF_LOG\n"
-                   "or defaults to \"brief\"\n\n");
+                   "or defaults to \"threadtime\"\n\n");
 
 
 
@@ -543,7 +543,9 @@
                     exit(-1);
                 }
 
-                hasSetLogFormat = 1;
+                if (strcmp("color", optarg)) { // exception for modifiers
+                    hasSetLogFormat = 1;
+                }
             break;
 
             case 'Q':
@@ -653,11 +655,12 @@
 
         if (logFormat != NULL) {
             err = setLogFormat(logFormat);
-
             if (err < 0) {
                 fprintf(stderr, "invalid format in ANDROID_PRINTF_LOG '%s'\n",
                                     logFormat);
             }
+        } else {
+            setLogFormat("threadtime");
         }
     }
 
diff --git a/logcat/tests/logcat_test.cpp b/logcat/tests/logcat_test.cpp
index 85756d5..321eaeb 100644
--- a/logcat/tests/logcat_test.cpp
+++ b/logcat/tests/logcat_test.cpp
@@ -243,7 +243,7 @@
 
     FILE *fp;
     ASSERT_TRUE(NULL != (fp = popen(
-      "logcat -b events -t 100 2>/dev/null",
+      "logcat -v brief -b events -t 100 2>/dev/null",
       "r")));
 
     char buffer[5120];
@@ -275,7 +275,7 @@
 
     // NB: crash log only available in user space
     ASSERT_TRUE(NULL != (fp = popen(
-      "logcat -b radio -b events -b system -b main -g 2>/dev/null",
+      "logcat -v brief -b radio -b events -b system -b main -g 2>/dev/null",
       "r")));
 
     char buffer[5120];
@@ -364,7 +364,7 @@
 
     ASSERT_TRUE(NULL != (fp = popen(
       "( trap exit HUP QUIT INT PIPE KILL ; sleep 6; echo DONE )&"
-      " logcat -b events 2>&1",
+      " logcat -v brief -b events 2>&1",
       "r")));
 
     char buffer[5120];
@@ -433,7 +433,7 @@
 
     ASSERT_TRUE(NULL != (fp = popen(
       "( trap exit HUP QUIT INT PIPE KILL ; sleep 6; echo DONE )&"
-      " logcat -b events -T 5 2>&1",
+      " logcat -v brief -b events -T 5 2>&1",
       "r")));
 
     char buffer[5120];
@@ -503,10 +503,14 @@
             int count = 0;
 
             while (fgets(buffer, sizeof(buffer), fp)) {
-                static const char match[] = "4 log.txt";
+                static const char match_1[] = "4 log.txt";
+                static const char match_2[] = "8 log.txt";
+                static const char match_3[] = "16 log.txt";
                 static const char total[] = "total ";
 
-                if (!strncmp(buffer, match, sizeof(match) - 1)) {
+                if (!strncmp(buffer, match_1, sizeof(match_1) - 1)
+                 || !strncmp(buffer, match_2, sizeof(match_2) - 1)
+                 || !strncmp(buffer, match_3, sizeof(match_3) - 1)) {
                     ++count;
                 } else if (strncmp(buffer, total, sizeof(total) - 1)) {
                     fprintf(stderr, "WARNING: Parse error: %s", buffer);
@@ -542,7 +546,7 @@
     ASSERT_TRUE(NULL != (fp = popen(
       "( trap exit HUP QUIT INT PIPE KILL ; sleep 6; echo DONE )&"
       " logcat -b events -c 2>&1 ;"
-      " logcat -b events 2>&1",
+      " logcat -v brief -b events 2>&1",
       "r")));
 
     char buffer[5120];
diff --git a/logd/LogAudit.cpp b/logd/LogAudit.cpp
index f8d6162..9988178 100644
--- a/logd/LogAudit.cpp
+++ b/logd/LogAudit.cpp
@@ -31,6 +31,8 @@
         , logbuf(buf)
         , reader(reader)
         , fdDmesg(-1) {
+    static const char auditd_message[] = "<6>logd.auditd: start\n";
+    write(fdDmsg, auditd_message, sizeof(auditd_message));
     logDmesg();
     fdDmesg = fdDmsg;
 }
@@ -75,13 +77,17 @@
         memmove(cp, cp + 1, strlen(cp + 1) + 1);
     }
 
+    bool info = strstr(str, " permissive=1") || strstr(str, " policy loaded ");
     if (fdDmesg >= 0) {
-        struct iovec iov[2];
+        struct iovec iov[3];
 
-        iov[0].iov_base = str;
-        iov[0].iov_len = strlen(str);
-        iov[1].iov_base = const_cast<char *>("\n");
-        iov[1].iov_len = 1;
+        iov[0].iov_base = info ? const_cast<char *>("<6>")
+                               : const_cast<char *>("<4>");
+        iov[0].iov_len = 3;
+        iov[1].iov_base = str;
+        iov[1].iov_len = strlen(str);
+        iov[2].iov_base = const_cast<char *>("\n");
+        iov[2].iov_len = 1;
 
         writev(fdDmesg, iov, sizeof(iov) / sizeof(iov[0]));
     }
@@ -175,10 +181,7 @@
     if (!newstr) {
         rc = -ENOMEM;
     } else {
-        *newstr = (strstr(str, " permissive=1")
-                || strstr(str, " policy loaded "))
-                    ? ANDROID_LOG_INFO
-                    : ANDROID_LOG_WARN;
+        *newstr = info ? ANDROID_LOG_INFO : ANDROID_LOG_WARN;
         strlcpy(newstr + 1, comm, l);
         strncpy(newstr + 1 + l, str, estr - str);
         strcpy(newstr + 1 + l + (estr - str), ecomm);
diff --git a/logd/LogStatistics.cpp b/logd/LogStatistics.cpp
index 81c9bab..82f9165 100644
--- a/logd/LogStatistics.cpp
+++ b/logd/LogStatistics.cpp
@@ -524,7 +524,7 @@
     short spaces = 2;
 
     log_id_for_each(i) {
-        if (!logMask & (1 << i)) {
+        if (!(logMask & (1 << i))) {
             continue;
         }
         oldLength = string.length();
@@ -671,8 +671,11 @@
             size_t sizesTotal = p->sizesTotal();
 
             android::String8 sz("");
-            sz.appendFormat((sizes != sizesTotal) ? "%zu/%zu" : "%zu",
-                            sizes, sizesTotal);
+            if (sizes == sizesTotal) {
+                sz.appendFormat("%zu", sizes);
+            } else {
+                sz.appendFormat("%zu/%zu", sizes, sizesTotal);
+            }
 
             android::String8 pd("");
             pd.appendFormat("%u%c", pid, p->pidGone() ? '?' : ' ');
@@ -783,12 +786,15 @@
             PidStatistics *pp = *pt;
             pid_t p = pp->getPid();
 
-            intermediate = string.format(oneline
-                                             ? ((p == PidStatistics::gone)
-                                                 ? "%d/?"
-                                                 : "%d/%d%c")
-                                             : "%d",
-                                         u, p, pp->pidGone() ? '?' : '\0');
+            if (!oneline) {
+                intermediate = string.format("%d", u);
+            } else if (p == PidStatistics::gone) {
+                intermediate = string.format("%d/?", u);
+            } else if (pp->pidGone()) {
+                intermediate = string.format("%d/%d?", u, p);
+            } else {
+                intermediate = string.format("%d/%d", u, p);
+            }
             string.appendFormat(first ? "\n%-12s" : "%-12s",
                                 intermediate.string());
             intermediate.clear();
diff --git a/logd/LogWhiteBlackList.cpp b/logd/LogWhiteBlackList.cpp
index e87b604..9728db1 100644
--- a/logd/LogWhiteBlackList.cpp
+++ b/logd/LogWhiteBlackList.cpp
@@ -39,10 +39,15 @@
 
 void Prune::format(char **strp) {
     if (mUid != uid_all) {
-        asprintf(strp, (mPid != pid_all) ? "%u/%u" : "%u", mUid, mPid);
-    } else {
-        // NB: mPid == pid_all can not happen if mUid == uid_all
-        asprintf(strp, (mPid != pid_all) ? "/%u" : "/", mPid);
+        if (mPid != pid_all) {
+            asprintf(strp, "%u/%u", mUid, mPid);
+        } else {
+            asprintf(strp, "%u", mUid);
+        }
+    } else if (mPid != pid_all) {
+        asprintf(strp, "/%u", mPid);
+    } else { // NB: mPid == pid_all can not happen if mUid == uid_all
+        asprintf(strp, "/");
     }
 }
 
diff --git a/logd/tests/logd_test.cpp b/logd/tests/logd_test.cpp
index 4bea4be..96877a9 100644
--- a/logd/tests/logd_test.cpp
+++ b/logd/tests/logd_test.cpp
@@ -417,7 +417,11 @@
         if (((p - cp) > 3) && !*p && ((unsigned int)(p - cp) < len)) {
             fprintf(stderr, "\"");
             while (*cp) {
-                fprintf(stderr, (*cp != '\n') ? "%c" : "\\n", *cp);
+                if (*cp != '\n') {
+                    fprintf(stderr, "%c", *cp);
+                } else {
+                    fprintf(stderr, "\\n");
+                }
                 ++cp;
                 --len;
             }
diff --git a/sdcard/sdcard.c b/sdcard/sdcard.c
index 9b55b33..f55a98a 100644
--- a/sdcard/sdcard.c
+++ b/sdcard/sdcard.c
@@ -29,6 +29,7 @@
 #include <string.h>
 #include <sys/inotify.h>
 #include <sys/mount.h>
+#include <sys/param.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/statfs.h>
@@ -1420,17 +1421,42 @@
         const struct fuse_in_header* hdr, const struct fuse_init_in* req)
 {
     struct fuse_init_out out;
+    size_t fuse_struct_size;
 
     TRACE("[%d] INIT ver=%d.%d maxread=%d flags=%x\n",
             handler->token, req->major, req->minor, req->max_readahead, req->flags);
+
+    /* Kernel 2.6.16 is the first stable kernel with struct fuse_init_out
+     * defined (fuse version 7.6). The structure is the same from 7.6 through
+     * 7.22. Beginning with 7.23, the structure increased in size and added
+     * new parameters.
+     */
+    if (req->major != FUSE_KERNEL_VERSION || req->minor < 6) {
+        ERROR("Fuse kernel version mismatch: Kernel version %d.%d, Expected at least %d.6",
+              req->major, req->minor, FUSE_KERNEL_VERSION);
+        return -1;
+    }
+
+    out.minor = MIN(req->minor, FUSE_KERNEL_MINOR_VERSION);
+    fuse_struct_size = sizeof(out);
+#if defined(FUSE_COMPAT_22_INIT_OUT_SIZE)
+    /* FUSE_KERNEL_VERSION >= 23. */
+
+    /* If the kernel only works on minor revs older than or equal to 22,
+     * then use the older structure size since this code only uses the 7.22
+     * version of the structure. */
+    if (req->minor <= 22) {
+        fuse_struct_size = FUSE_COMPAT_22_INIT_OUT_SIZE;
+    }
+#endif
+
     out.major = FUSE_KERNEL_VERSION;
-    out.minor = FUSE_KERNEL_MINOR_VERSION;
     out.max_readahead = req->max_readahead;
     out.flags = FUSE_ATOMIC_O_TRUNC | FUSE_BIG_WRITES;
     out.max_background = 32;
     out.congestion_threshold = 32;
     out.max_write = MAX_WRITE;
-    fuse_reply(fuse, hdr->unique, &out, sizeof(out));
+    fuse_reply(fuse, hdr->unique, &out, fuse_struct_size);
     return NO_STATUS;
 }
 
diff --git a/toolbox/bsd-compatibility.h b/toolbox/bsd-compatibility.h
index 9c6c34a..36ddca9 100644
--- a/toolbox/bsd-compatibility.h
+++ b/toolbox/bsd-compatibility.h
@@ -50,9 +50,6 @@
 
 #define S_ISWHT(x) false
 
-// TODO: should this be in bionic? (glibc does this, even though it's not quite right.)
-#define O_RSYNC O_SYNC
-
 __BEGIN_DECLS
 
 /* From NetBSD <grp.h> and <pwd.h>. */