merge in master-release history after reset to master
diff --git a/Android.mk b/Android.mk
index f30cf50..99958d6 100644
--- a/Android.mk
+++ b/Android.mk
@@ -169,6 +169,9 @@
   lib/arm/udivsi3.S \
   lib/arm/umodsi3.S
 
+# ARM64-specific runtimes
+libcompiler_rt_arm64_SRC_FILES :=
+
 # MIPS-specific runtimes
 libcompiler_rt_mips_SRC_FILES := # nothing to add
 
@@ -211,7 +214,8 @@
           $(if $(findstring $(1),x86),$(call get-libcompiler-rt-x86-source-files),
              $(if $(findstring $(1),x86_64),$(call get-libcompiler-rt-x86_64-source-files),
                  $(if $(findstring $(1),x32),$(call get-libcompiler-rt-x86-source-files),
-  $(error Unsupported ARCH $(1)))))))
+                    $(if $(findstring $(1),arm64),$(call get-libcompiler-rt-arm64-source-files),
+  $(error Unsupported ARCH $(1))))))))
 endef
 
 # $(1): source list
@@ -252,6 +256,12 @@
       $(call get-libcompiler-rt-arm-common-source-files))
 endef
 
+define get-libcompiler-rt-arm64-source-files
+  $(call filter-libcompiler-rt-common-source-files,
+      $(libcompiler_rt_common_SRC_FILES) \
+      $(libcompiler_rt_arm64_SRC_FILES),arm64)
+endef
+
 define get-libcompiler-rt-mips-source-files
   $(call filter-libcompiler-rt-common-source-files,
       $(libcompiler_rt_common_SRC_FILES) \
@@ -285,16 +295,15 @@
 
 include $(BUILD_STATIC_LIBRARY)
 
+# Don't build compiler-rt without clang
+ifneq ($(WITHOUT_TARGET_CLANG), true)
+
 #=====================================================================
 # Device Static Library: libcompiler_rt
 #=====================================================================
 
 include $(CLEAR_VARS)
 
-ifeq ($(TARGET_ARCH),arm64)
-$(warning TODOArm64: Enable compiler-rt build)
-endif
-
 ifeq ($(TARGET_ARCH),mips64)
 $(warning TODOMips64: Enable compiler-rt build)
 endif
@@ -304,10 +313,11 @@
 LOCAL_ASFLAGS := -integrated-as
 LOCAL_CLANG := true
 LOCAL_SRC_FILES_arm := $(call get-libcompiler-rt-source-files,arm)
+LOCAL_SRC_FILES_arm64 := $(call get-libcompiler-rt-source-files,arm64)
 LOCAL_SRC_FILES_mips := $(call get-libcompiler-rt-source-files,mips)
 LOCAL_SRC_FILES_x86 := $(call get-libcompiler-rt-source-files,x86)
 LOCAL_SRC_FILES_x86_64 := $(call get-libcompiler-rt-source-files,x86_64)
-LOCAL_MODULE_TARGET_ARCH := arm mips x86 x86_64
+LOCAL_MODULE_TARGET_ARCH := arm arm64 mips x86 x86_64
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 
 include $(BUILD_STATIC_LIBRARY)
@@ -321,9 +331,11 @@
 LOCAL_MODULE := libcompiler_rt
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_WHOLE_STATIC_LIBRARIES := libcompiler_rt
-LOCAL_MODULE_TARGET_ARCH := arm mips x86 x86_64
+LOCAL_MODULE_TARGET_ARCH := arm arm64 mips x86 x86_64
 
 include $(BUILD_SHARED_LIBRARY)
 
 # Build ASan
 include $(LOCAL_PATH)/lib/asan/Android.mk
+
+endif
diff --git a/lib/clear_cache.c b/lib/clear_cache.c
index b934fd4..1d49b7c 100644
--- a/lib/clear_cache.c
+++ b/lib/clear_cache.c
@@ -14,10 +14,18 @@
   #include <libkern/OSCacheControl.h>
 #endif
 
+#if defined(ANDROID) && defined(__mips__)
+  #include <sys/cachectl.h>
+#endif
+
+#if defined(ANDROID) && defined(__arm__)
+  #include <asm/unistd.h>
+#endif
+
 /*
- * The compiler generates calls to __clear_cache() when creating 
+ * The compiler generates calls to __clear_cache() when creating
  * trampoline functions on the stack for use with nested functions.
- * It is expected to invalidate the instruction cache for the 
+ * It is expected to invalidate the instruction cache for the
  * specified range.
  */
 
@@ -28,6 +36,52 @@
  * Intel processors have a unified instruction and data cache
  * so there is nothing to do
  */
+#elif defined(__arm__) && !defined(__APPLE__)
+    #if defined(__NetBSD__)
+        struct arm_sync_icache_args arg;
+
+        arg.addr = (uintptr_t)start;
+        arg.len = (uintptr_t)end - (uintptr_t)start;
+
+        sysarch(ARM_SYNC_ICACHE, &arg);
+    #elif defined(ANDROID)
+         const register int start_reg __asm("r0") = (int) (intptr_t) start;
+         const register int end_reg __asm("r1") = (int) (intptr_t) end;
+         const register int flags __asm("r2") = 0;
+         const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
+        __asm __volatile("svc 0x0" : "=r"(start_reg)
+            : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags) : "r0");
+         if (start_reg != 0) {
+             compilerrt_abort();
+         }
+    #else
+        compilerrt_abort();
+    #endif
+#elif defined(ANDROID) && defined(__mips__)
+  const uintptr_t start_int = (uintptr_t) start;
+  const uintptr_t end_int = (uintptr_t) end;
+  _flush_cache(start, (end_int - start_int), BCACHE);
+#elif defined(__aarch64__) && !defined(__APPLE__)
+  uint64_t xstart = (uint64_t)(uintptr_t) start;
+  uint64_t xend = (uint64_t)(uintptr_t) end;
+
+  // Get Cache Type Info
+  uint64_t ctr_el0;
+  __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
+
+  /*
+   * dc & ic instructions must use 64bit registers so we don't use
+   * uintptr_t in case this runs in an IPL32 environment.
+   */
+  const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
+  for (uint64_t addr = xstart; addr < xend; addr += dcache_line_size)
+    __asm __volatile("dc cvau, %0" :: "r"(addr));
+  __asm __volatile("dsb ish");
+
+  const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
+  for (uint64_t addr = xstart; addr < xend; addr += icache_line_size)
+    __asm __volatile("ic ivau, %0" :: "r"(addr));
+  __asm __volatile("isb sy");
 #else
     #if __APPLE__
         /* On Darwin, sys_icache_invalidate() provides this functionality */