Add support for ARMv8 AArch64 (the 64 bit ARM instruction set).


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13770 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/ARM64_TIDYUPS.txt b/ARM64_TIDYUPS.txt
new file mode 100644
index 0000000..3586c11
--- /dev/null
+++ b/ARM64_TIDYUPS.txt
@@ -0,0 +1,178 @@
+
+## HOW TO Cross-CONFIGURE
+
+export CC=aarch64-linux-gnu-gcc
+export LD=aarch64-linux-gnu-ld
+export AR=aarch64-linux-gnu-ar
+
+./autogen.sh
+./configure --prefix=`pwd`/Inst --host=aarch64-unknown-linux --enable-only64bit
+
+##############################################################
+
+UnwindStartRegs -- what should that contain?
+
+
+
+vki-arm64-linux.h: vki_sigaction_base
+
+I really don't think that __vki_sigrestore_t sa_restorer
+should be present.  Adding it surely puts sa_mask at a wrong
+offset compared to (kernel) reality.  But not having it causes
+compilation of m_signals.c to fail in hard to understand ways,
+so adding it temporarily.
+
+
+m_trampoline.S: what's the unexecutable-insn value? 0xFFFFFFFF 
+is there at the moment, but 0x00000000 is probably what it should be.
+Also, fix indentation/tab-vs-space stuff
+
+
+./include/vki/vki-arm64-linux.h: uses __uint128_t.  Should change
+it to __vki_uint128_t, but what's the defn of that?
+
+
+
+m_debuginfo/priv_storage.h: need proper defn of DiCfSI
+
+
+readdwarf.c: is this correct?
+#elif defined(VGP_arm64_linux)
+#  define FP_REG         29    //???
+#  define SP_REG         31    //???
+#  define RA_REG_DEFAULT 30    //???
+
+
+vki-arm64-linux.h:
+re linux-3.10.5/include/uapi/asm-generic/sembuf.h
+I'd say the amd64 version has padding it shouldn't have.  Check?
+
+
+
+syswrap-linux.c run_a_thread_NORETURN assembly sections
+seems like tst->os_state.exitcode has word type
+in which case the ppc64_linux use of lwz to read it, is wrong
+
+
+
+syswrap-linux.c ML_(do_fork_clone)
+assuming that VGP_arm64_linux is the same as VGP_arm_linux here
+
+
+
+dispatch-arm64-linux.S: FIXME: set up FP control state before
+entering generated code.  Also fix screwy indentation.
+
+dispatcher-ery general: what's a good (predictor-friendly) way to
+branch to a register?
+
+
+
+in vki-arm64-scnums.h
+//#if __BITS_PER_LONG == 64 && !defined(__SYSCALL_COMPAT)
+Probably want to reenable that and clean up accordingly
+
+
+
+putIRegXXorZR: figure out a way that the computed value is actually
+used, so as to keep any memory reads that might generate it, alive.
+(else the simulation can lose exceptions).  At least, for writes to
+the zero register generated by loads .. or .. can anything other
+integer instructions, that write to a register, cause exceptions?
+
+
+
+loads/stores: generate stack alignment checks as necessary
+
+
+
+fix barrier insns: ISB, DMB
+
+
+
+fix atomic loads/stores
+
+
+
+FMADD/FMSUB/FNMADD/FNMSUB: generate and use the relevant fused
+IROps so as to avoid double rounding
+
+
+
+ARM64Instr_Call getRegUsage: re-check relative to what
+getAllocableRegs_ARM64 makes available
+
+
+
+Make dispatch-arm64-linux.S save any callee-saved Q regs
+I think what is required is to save D8-D15 and nothing more than that.
+
+
+
+wrapper for __NR3264_fstat -- correct?
+
+
+
+PRE(sys_clone): get rid of references to vki_modify_ldt_t
+and the definition of it in vki-arm64-linux.h.  Ditto for 
+32 bit arm.
+
+
+
+sigframe-arm64-linux.c: build_sigframe: references to nonexistent
+siguc->uc_mcontext.trap_no, siguc->uc_mcontext.error_code have been
+replaced by zero.  Also in synth_ucontext.
+
+
+
+m_debugger.c:
+uregs.pstate   = LibVEX_GuestARM64_get_nzcv(vex); /* is this correct? */
+Is that remotely correct?
+
+
+
+host_arm64_defs.c: emit_ARM64INstr:
+ARM64in_VDfromX and ARM64in_VQfromXX: use simple top-half zeroing
+MOVs to vector registers instead of INS Vd.D[0], Xreg, to avoid false
+dependencies on the top half of the register.  (Or at least check
+the semantocs of INS Vd.D[0] to see if it zeroes out the top.)
+
+
+
+preferredVectorSubTypeFromSize: review perf effects and decide
+on a types-for-subparts policy
+
+
+
+fold_IRExpr_Unop: add a reduction rule for this
+1Sto64(CmpNEZ64( Or64(GET:I64(1192),GET:I64(1184)) ))
+vis 1Sto64(CmpNEZ64(x)) --> CmpwNEZ64(x)
+
+
+
+check insn selection for memcheck-only primops:
+Left64 CmpwNEZ64 V128to64 V128HIto64 1Sto64 CmpNEZ64 CmpNEZ32
+widen_z_8_to_64 1Sto32 Left32 32HLto64 CmpwNEZ32 CmpNEZ8
+
+
+
+isel: get rid of various cases where zero is put into a register
+and just use xzr instead.  Especially for CmpNEZ64/32.  And for
+writing zeroes into the CC thunk fields.
+
+
+
+/* Keep this list in sync with that in iselNext below */
+/* Keep this list in sync with that for Ist_Exit above */
+uh .. they are not in sync
+
+
+
+very stupid:
+imm64  x23, 0xFFFFFFFFFFFFFFA0
+17 F4 9F D2 F7 FF BF F2 F7 FF DF F2 F7 FF FF F2 
+
+
+
+valgrind.h: fix VALGRIND_ALIGN_STACK/VALGRIND_RESTORE_STACK,
+also add CFI annotations
diff --git a/Makefile.all.am b/Makefile.all.am
index 1f69802..3793b03 100644
--- a/Makefile.all.am
+++ b/Makefile.all.am
@@ -156,6 +156,10 @@
 AM_CCASFLAGS_ARM_LINUX    = @FLAG_M32@ \
 				-marm -mcpu=cortex-a8 -g
 
+AM_FLAG_M3264_ARM64_LINUX = @FLAG_M64@
+AM_CFLAGS_ARM64_LINUX     = @FLAG_M64@ $(AM_CFLAGS_BASE)
+AM_CCASFLAGS_ARM64_LINUX  = @FLAG_M64@ -g
+
 AM_FLAG_M3264_X86_DARWIN = -arch i386
 AM_CFLAGS_X86_DARWIN     = $(WERROR) -arch i386 $(AM_CFLAGS_BASE) \
 				-mmacosx-version-min=10.5 \
@@ -213,6 +217,7 @@
 PRELOAD_LDFLAGS_PPC32_LINUX  = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
 PRELOAD_LDFLAGS_PPC64_LINUX  = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
 PRELOAD_LDFLAGS_ARM_LINUX    = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M32@
+PRELOAD_LDFLAGS_ARM64_LINUX  = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
 PRELOAD_LDFLAGS_X86_DARWIN   = $(PRELOAD_LDFLAGS_COMMON_DARWIN) -arch i386
 PRELOAD_LDFLAGS_AMD64_DARWIN = $(PRELOAD_LDFLAGS_COMMON_DARWIN) -arch x86_64
 PRELOAD_LDFLAGS_S390X_LINUX  = $(PRELOAD_LDFLAGS_COMMON_LINUX) @FLAG_M64@
diff --git a/Makefile.tool.am b/Makefile.tool.am
index 8e0a0ad..563d540 100644
--- a/Makefile.tool.am
+++ b/Makefile.tool.am
@@ -52,6 +52,9 @@
 TOOL_LDFLAGS_ARM_LINUX = \
 	$(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M32@
 
+TOOL_LDFLAGS_ARM64_LINUX = \
+	$(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M64@
+
 TOOL_LDFLAGS_S390X_LINUX = \
 	$(TOOL_LDFLAGS_COMMON_LINUX) @FLAG_M64@
 
@@ -105,6 +108,9 @@
 LIBREPLACEMALLOC_ARM_LINUX = \
 	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-arm-linux.a
 
+LIBREPLACEMALLOC_ARM64_LINUX = \
+	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-arm64-linux.a
+
 LIBREPLACEMALLOC_X86_DARWIN = \
 	$(top_builddir)/coregrind/libreplacemalloc_toolpreload-x86-darwin.a
 
@@ -145,6 +151,11 @@
 	$(LIBREPLACEMALLOC_ARM_LINUX) \
 	-Wl,--no-whole-archive
 
+LIBREPLACEMALLOC_LDFLAGS_ARM64_LINUX = \
+	-Wl,--whole-archive \
+	$(LIBREPLACEMALLOC_ARM64_LINUX) \
+	-Wl,--no-whole-archive
+
 LIBREPLACEMALLOC_LDFLAGS_X86_DARWIN = \
 	$(LIBREPLACEMALLOC_X86_DARWIN)
 
diff --git a/Makefile.vex.am b/Makefile.vex.am
index a7b9593..5adbae8 100644
--- a/Makefile.vex.am
+++ b/Makefile.vex.am
@@ -24,6 +24,7 @@
 	pub/libvex_guest_ppc32.h \
 	pub/libvex_guest_ppc64.h \
 	pub/libvex_guest_arm.h \
+	pub/libvex_guest_arm64.h \
 	pub/libvex_guest_s390x.h \
 	pub/libvex_guest_mips32.h \
 	pub/libvex_guest_mips64.h \
@@ -42,6 +43,7 @@
 	priv/guest_amd64_defs.h \
 	priv/guest_ppc_defs.h \
 	priv/guest_arm_defs.h \
+	priv/guest_arm64_defs.h \
 	priv/guest_s390_defs.h \
 	priv/guest_mips_defs.h \
 	priv/host_generic_regs.h \
@@ -53,6 +55,7 @@
 	priv/host_amd64_defs.h \
 	priv/host_ppc_defs.h \
 	priv/host_arm_defs.h \
+	priv/host_arm64_defs.h \
 	priv/host_s390_defs.h \
 	priv/s390_disasm.h \
 	priv/s390_defs.h \
@@ -71,6 +74,7 @@
 			    pub/libvex_guest_ppc32.h \
 			    pub/libvex_guest_ppc64.h \
 			    pub/libvex_guest_arm.h \
+			    pub/libvex_guest_arm64.h \
 			    pub/libvex_guest_s390x.h \
 			    pub/libvex_guest_mips32.h \
 			    pub/libvex_guest_mips64.h
@@ -114,6 +118,8 @@
 	priv/guest_ppc_toIR.c \
 	priv/guest_arm_helpers.c \
 	priv/guest_arm_toIR.c \
+	priv/guest_arm64_helpers.c \
+	priv/guest_arm64_toIR.c \
 	priv/guest_s390_helpers.c \
 	priv/guest_s390_toIR.c \
 	priv/guest_mips_helpers.c \
@@ -132,6 +138,8 @@
 	priv/host_ppc_isel.c \
 	priv/host_arm_defs.c \
 	priv/host_arm_isel.c \
+	priv/host_arm64_defs.c \
+	priv/host_arm64_isel.c \
 	priv/host_s390_defs.c \
 	priv/host_s390_isel.c \
 	priv/s390_disasm.c \
diff --git a/cachegrind/cg_arch.c b/cachegrind/cg_arch.c
index 3098ad8..0b39c52 100644
--- a/cachegrind/cg_arch.c
+++ b/cachegrind/cg_arch.c
@@ -367,6 +367,14 @@
    *D1c = (cache_t) {  16384, 4, 64 };
    *LLc = (cache_t) { 262144, 8, 64 };
 
+#elif defined(VGA_arm64)
+
+   // Copy the 32-bit ARM version until such time as we have
+   // some real hardware to run on
+   *I1c = (cache_t) {  16384, 4, 64 };
+   *D1c = (cache_t) {  16384, 4, 64 };
+   *LLc = (cache_t) { 262144, 8, 64 };
+
 #elif defined(VGA_s390x)
    //
    // Here is the cache data from older machine models:
diff --git a/cachegrind/cg_branchpred.c b/cachegrind/cg_branchpred.c
index 18e16b9..b385f66 100644
--- a/cachegrind/cg_branchpred.c
+++ b/cachegrind/cg_branchpred.c
@@ -44,12 +44,12 @@
 
 /* How many bits at the bottom of an instruction address are
    guaranteed to be zero? */
-#if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm) \
-    || defined(VGA_mips32) || defined(VGA_mips64)
+#if defined(VGA_ppc32) || defined(VGA_ppc64) \
+    || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64)
 #  define N_IADDR_LO_ZERO_BITS 2
 #elif defined(VGA_x86) || defined(VGA_amd64)
 #  define N_IADDR_LO_ZERO_BITS 0
-#elif defined(VGA_s390x)
+#elif defined(VGA_s390x) || defined(VGA_arm)
 #  define N_IADDR_LO_ZERO_BITS 1
 #else
 #  error "Unsupported architecture"
diff --git a/configure.ac b/configure.ac
index 933b11b..9199626 100644
--- a/configure.ac
+++ b/configure.ac
@@ -195,6 +195,11 @@
 	ARCH_MAX="arm"
 	;;
 
+     aarch64*)
+       AC_MSG_RESULT([ok (${host_cpu})])
+       ARCH_MAX="arm64"
+       ;;
+
      mips)
         AC_MSG_RESULT([ok (${host_cpu})])
         ARCH_MAX="mips32"
@@ -559,6 +564,35 @@
         valt_load_address_sec_inner="0xUNSET"
 	AC_MSG_RESULT([ok (${host_cpu}-${host_os})])
 	;;
+     arm64-linux)
+        valt_load_address_sec_norml="0xUNSET"
+        valt_load_address_sec_inner="0xUNSET"
+        if test x$vg_cv_only64bit = xyes; then
+           VGCONF_ARCH_PRI="arm64"
+           VGCONF_ARCH_SEC=""
+           VGCONF_PLATFORM_PRI_CAPS="ARM64_LINUX"
+           VGCONF_PLATFORM_SEC_CAPS=""
+           valt_load_address_pri_norml="0x38000000"
+           valt_load_address_pri_inner="0x28000000"
+        elif test x$vg_cv_only32bit = xyes; then
+           VGCONF_ARCH_PRI="arm"
+           VGCONF_ARCH_SEC=""
+           VGCONF_PLATFORM_PRI_CAPS="ARM_LINUX"
+           VGCONF_PLATFORM_SEC_CAPS=""
+           valt_load_address_pri_norml="0x38000000"
+           valt_load_address_pri_inner="0x28000000"
+        else
+           VGCONF_ARCH_PRI="arm64"
+           VGCONF_ARCH_SEC="arm"
+           VGCONF_PLATFORM_PRI_CAPS="ARM64_LINUX"
+           VGCONF_PLATFORM_SEC_CAPS="ARM_LINUX"
+           valt_load_address_pri_norml="0x38000000"
+           valt_load_address_pri_inner="0x28000000"
+           valt_load_address_sec_norml="0x38000000"
+           valt_load_address_sec_inner="0x28000000"
+        fi
+        AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})])
+        ;;
      s390x-linux)
         VGCONF_ARCH_PRI="s390x"
         VGCONF_ARCH_SEC=""
@@ -624,7 +658,10 @@
 AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_PPC64, 
                test x$VGCONF_PLATFORM_PRI_CAPS = xPPC64_LINUX )
 AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_ARM,   
-               test x$VGCONF_PLATFORM_PRI_CAPS = xARM_LINUX )
+               test x$VGCONF_PLATFORM_PRI_CAPS = xARM_LINUX \
+                 -o x$VGCONF_PLATFORM_SEC_CAPS = xARM_LINUX )
+AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_ARM64, 
+               test x$VGCONF_PLATFORM_PRI_CAPS = xARM64_LINUX )
 AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_S390X,
                test x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX )
 AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_MIPS32,
@@ -645,7 +682,10 @@
 AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_PPC64_LINUX, 
                test x$VGCONF_PLATFORM_PRI_CAPS = xPPC64_LINUX)
 AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_ARM_LINUX, 
-               test x$VGCONF_PLATFORM_PRI_CAPS = xARM_LINUX)
+               test x$VGCONF_PLATFORM_PRI_CAPS = xARM_LINUX \
+                 -o x$VGCONF_PLATFORM_SEC_CAPS = xARM_LINUX)
+AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_ARM64_LINUX, 
+               test x$VGCONF_PLATFORM_PRI_CAPS = xARM64_LINUX)
 AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_S390X_LINUX,
                test x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \
                  -o x$VGCONF_PLATFORM_SEC_CAPS = xS390X_LINUX)
@@ -653,7 +693,6 @@
                test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX)
 AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_MIPS64_LINUX,
                test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX)
-
 AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_X86_DARWIN,   
                test x$VGCONF_PLATFORM_PRI_CAPS = xX86_DARWIN \
                  -o x$VGCONF_PLATFORM_SEC_CAPS = xX86_DARWIN)
@@ -670,6 +709,7 @@
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xPPC32_LINUX \
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xPPC64_LINUX \
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xARM_LINUX \
+                 -o x$VGCONF_PLATFORM_PRI_CAPS = xARM64_LINUX \
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX \
                  -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX)
@@ -2406,6 +2446,7 @@
   mflag_primary=$FLAG_M32
 elif test x$VGCONF_PLATFORM_PRI_CAPS = xAMD64_LINUX \
        -o x$VGCONF_PLATFORM_PRI_CAPS = xPPC64_LINUX \
+       -o x$VGCONF_PLATFORM_PRI_CAPS = xARM64_LINUX \
        -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX ; then
   mflag_primary=$FLAG_M64
 elif test x$VGCONF_PLATFORM_PRI_CAPS = xX86_DARWIN ; then
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index 792d94a..51a4291 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -61,6 +61,12 @@
 
 
 vgdb_SOURCES = vgdb.c
+if VGCONF_PLATFORMS_INCLUDE_ARM64_LINUX
+# vgdb-invoker-ptrace.c isn't buildable on arm64-linux yet
+# so skip it.  Unfortunately this also causes it to be skipped
+# for 32-bit ARM builds which are part of a bi-arch ARM build.
+vgdb_SOURCES += vgdb-invoker-none.c
+else
 if VGCONF_OS_IS_LINUX
 if VGCONF_PLATVARIANT_IS_ANDROID
 vgdb_SOURCES += vgdb-invoker-none.c
@@ -68,6 +74,7 @@
 vgdb_SOURCES += vgdb-invoker-ptrace.c
 endif
 endif
+endif
 if VGCONF_OS_IS_DARWIN
 # Some darwin specific stuff is needed as ptrace is not
 # fully supported on MacOS. Till we find someone courageous
@@ -329,6 +336,7 @@
 	m_dispatch/dispatch-ppc32-linux.S \
 	m_dispatch/dispatch-ppc64-linux.S \
 	m_dispatch/dispatch-arm-linux.S \
+	m_dispatch/dispatch-arm64-linux.S \
 	m_dispatch/dispatch-s390x-linux.S \
 	m_dispatch/dispatch-mips32-linux.S \
 	m_dispatch/dispatch-mips64-linux.S \
@@ -345,6 +353,7 @@
 	m_gdbserver/valgrind-low-x86.c \
 	m_gdbserver/valgrind-low-amd64.c \
 	m_gdbserver/valgrind-low-arm.c \
+	m_gdbserver/valgrind-low-arm64.c \
 	m_gdbserver/valgrind-low-ppc32.c \
 	m_gdbserver/valgrind-low-ppc64.c \
 	m_gdbserver/valgrind-low-s390x.c \
@@ -368,6 +377,7 @@
 	m_sigframe/sigframe-ppc32-linux.c \
 	m_sigframe/sigframe-ppc64-linux.c \
 	m_sigframe/sigframe-arm-linux.c \
+	m_sigframe/sigframe-arm64-linux.c \
 	m_sigframe/sigframe-s390x-linux.c \
 	m_sigframe/sigframe-mips32-linux.c \
 	m_sigframe/sigframe-mips64-linux.c \
@@ -378,6 +388,7 @@
 	m_syswrap/syscall-ppc32-linux.S \
 	m_syswrap/syscall-ppc64-linux.S \
 	m_syswrap/syscall-arm-linux.S \
+	m_syswrap/syscall-arm64-linux.S \
 	m_syswrap/syscall-s390x-linux.S \
 	m_syswrap/syscall-mips32-linux.S \
 	m_syswrap/syscall-mips64-linux.S \
@@ -393,6 +404,7 @@
 	m_syswrap/syswrap-ppc32-linux.c \
 	m_syswrap/syswrap-ppc64-linux.c \
 	m_syswrap/syswrap-arm-linux.c \
+	m_syswrap/syswrap-arm64-linux.c \
 	m_syswrap/syswrap-s390x-linux.c \
 	m_syswrap/syswrap-mips32-linux.c \
 	m_syswrap/syswrap-mips64-linux.c \
diff --git a/coregrind/launcher-linux.c b/coregrind/launcher-linux.c
index eafdb2b..83035ea 100644
--- a/coregrind/launcher-linux.c
+++ b/coregrind/launcher-linux.c
@@ -57,6 +57,10 @@
 #define EM_X86_64 62    // elf.h doesn't define this on some older systems
 #endif
 
+#ifndef EM_AARCH64
+#define EM_AARCH64 183  // ditto
+#endif
+
 /* Report fatal errors */
 __attribute__((noreturn))
 static void barf ( const char *format, ... )
@@ -220,6 +224,10 @@
                 (ehdr->e_ident[EI_OSABI] == ELFOSABI_SYSV ||
                  ehdr->e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
                platform = "mips64-linux";
+            } else if (ehdr->e_machine == EM_AARCH64 &&
+                (ehdr->e_ident[EI_OSABI] == ELFOSABI_SYSV ||
+                 ehdr->e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
+               platform = "arm64-linux";
             }
          } else if (header[EI_DATA] == ELFDATA2MSB) {
 #           if !defined(VGPV_arm_linux_android) \
@@ -309,12 +317,13 @@
       target, because on most ppc64-linux setups, the basic /bin,
       /usr/bin, etc, stuff is built in 32-bit mode, not 64-bit
       mode. */
-   if ((0==strcmp(VG_PLATFORM,"x86-linux"))   ||
-       (0==strcmp(VG_PLATFORM,"amd64-linux")) ||
-       (0==strcmp(VG_PLATFORM,"ppc32-linux")) ||
-       (0==strcmp(VG_PLATFORM,"ppc64-linux")) ||
-       (0==strcmp(VG_PLATFORM,"arm-linux"))   ||
-       (0==strcmp(VG_PLATFORM,"s390x-linux")) ||
+   if ((0==strcmp(VG_PLATFORM,"x86-linux"))    ||
+       (0==strcmp(VG_PLATFORM,"amd64-linux"))  ||
+       (0==strcmp(VG_PLATFORM,"ppc32-linux"))  ||
+       (0==strcmp(VG_PLATFORM,"ppc64-linux"))  ||
+       (0==strcmp(VG_PLATFORM,"arm-linux"))    ||
+       (0==strcmp(VG_PLATFORM,"arm64-linux"))  ||
+       (0==strcmp(VG_PLATFORM,"s390x-linux"))  ||
        (0==strcmp(VG_PLATFORM,"mips32-linux")) ||
        (0==strcmp(VG_PLATFORM,"mips64-linux")))
       default_platform = VG_PLATFORM;
diff --git a/coregrind/m_aspacemgr/aspacemgr-common.c b/coregrind/m_aspacemgr/aspacemgr-common.c
index 7ffb78d..b8d694d 100644
--- a/coregrind/m_aspacemgr/aspacemgr-common.c
+++ b/coregrind/m_aspacemgr/aspacemgr-common.c
@@ -152,15 +152,19 @@
 {
    SysRes res;
    aspacem_assert(VG_IS_PAGE_ALIGNED(offset));
-#  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
-      || defined(VGP_arm_linux)
+
+#  if defined(VGP_arm64_linux)
+   res = VG_(do_syscall6)(__NR3264_mmap, (UWord)start, length, 
+                         prot, flags, fd, offset);
+#  elif defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
+        || defined(VGP_arm_linux)
    /* mmap2 uses 4096 chunks even if actual page size is bigger. */
    aspacem_assert((offset % 4096) == 0);
    res = VG_(do_syscall6)(__NR_mmap2, (UWord)start, length,
                           prot, flags, fd, offset / 4096);
 #  elif defined(VGP_amd64_linux) || defined(VGP_ppc64_linux) \
         || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
-        || defined(VGP_mips64_linux)
+        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
    res = VG_(do_syscall6)(__NR_mmap, (UWord)start, length, 
                          prot, flags, fd, offset);
 #  elif defined(VGP_x86_darwin)
@@ -242,8 +246,14 @@
 /* --- Pertaining to files --- */
 
 SysRes ML_(am_open) ( const HChar* pathname, Int flags, Int mode )
-{  
+{
+#  if defined(VGP_arm64_linux)
+   /* ARM64 wants to use __NR_openat rather than __NR_open. */
+   SysRes res = VG_(do_syscall4)(__NR_openat,
+                                 VKI_AT_FDCWD, (UWord)pathname, flags, mode);
+#  else
    SysRes res = VG_(do_syscall3)(__NR_open, (UWord)pathname, flags, mode);
+#  endif
    return res;
 }
 
@@ -261,7 +271,12 @@
 Int ML_(am_readlink)(HChar* path, HChar* buf, UInt bufsiz)
 {
    SysRes res;
+#  if defined(VGP_arm64_linux)
+   res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
+                                           (UWord)path, (UWord)buf, bufsiz);
+#  else
    res = VG_(do_syscall3)(__NR_readlink, (UWord)path, (UWord)buf, bufsiz);
+#  endif
    return sr_isError(res) ? -1 : sr_Res(res);
 }
 
diff --git a/coregrind/m_cache.c b/coregrind/m_cache.c
index ddec8af..0321db4 100644
--- a/coregrind/m_cache.c
+++ b/coregrind/m_cache.c
@@ -539,7 +539,7 @@
 }
 
 #elif defined(VGA_arm) || defined(VGA_ppc32) || defined(VGA_ppc64) || \
-      defined(VGA_mips32) || defined(VGA_mips64)
+   defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64)
 
 static Bool
 get_cache_info(VexArchInfo *vai)
diff --git a/coregrind/m_coredump/coredump-elf.c b/coregrind/m_coredump/coredump-elf.c
index d5d64fb..fcc16ec 100644
--- a/coregrind/m_coredump/coredump-elf.c
+++ b/coregrind/m_coredump/coredump-elf.c
@@ -233,7 +233,7 @@
 }
 
 static void fill_prstatus(const ThreadState *tst, 
-			  struct vki_elf_prstatus *prs, 
+			  /*OUT*/struct vki_elf_prstatus *prs, 
 			  const vki_siginfo_t *si)
 {
    struct vki_user_regs_struct *regs;
@@ -252,12 +252,11 @@
    prs->pr_pgrp = VG_(getpgrp)();
    prs->pr_sid = VG_(getpgrp)();
    
-#ifdef VGP_s390x_linux
+#if defined(VGP_s390x_linux)
    /* prs->pr_reg has struct type. Need to take address. */
    regs = (struct vki_user_regs_struct *)&(prs->pr_reg);
 #else
    regs = (struct vki_user_regs_struct *)prs->pr_reg;
-
    vg_assert(sizeof(*regs) == sizeof(prs->pr_reg));
 #endif
 
@@ -302,10 +301,6 @@
    regs->r14    = arch->vex.guest_R14;
    regs->r15    = arch->vex.guest_R15;
 
-//::    regs->cs     = arch->vex.guest_CS;
-//::    regs->fs     = arch->vex.guest_FS;
-//::    regs->gs     = arch->vex.guest_GS;
-
 #elif defined(VGP_ppc32_linux)
 #  define DO(n)  regs->gpr[n] = arch->vex.guest_GPR##n
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
@@ -367,6 +362,10 @@
    regs->ARM_pc   = arch->vex.guest_R15T;
    regs->ARM_cpsr = LibVEX_GuestARM_get_cpsr( &arch->vex );
 
+#elif defined(VGP_arm64_linux)
+   (void)arch;
+   I_die_here;
+
 #elif defined(VGP_s390x_linux)
 #  define DO(n)  regs->gprs[n] = arch->vex.guest_r##n
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
@@ -377,6 +376,7 @@
    DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
 #  undef DO
    regs->orig_gpr2 = arch->vex.guest_r2;
+
 #elif defined(VGP_mips32_linux)
 #  define DO(n)  regs->MIPS_r##n = arch->vex.guest_r##n
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
@@ -386,6 +386,7 @@
 #  undef DO
    regs->MIPS_hi   = arch->vex.guest_HI;
    regs->MIPS_lo   = arch->vex.guest_LO;
+
 #elif defined(VGP_mips64_linux)
 #  define DO(n)  regs->MIPS_r##n = arch->vex.guest_r##n
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
@@ -395,6 +396,7 @@
 #  undef DO
    regs->MIPS_hi   = arch->vex.guest_HI;
    regs->MIPS_lo   = arch->vex.guest_LO;
+
 #else
 #  error Unknown ELF platform
 #endif
@@ -470,6 +472,9 @@
 #elif defined(VGP_arm_linux)
    // umm ...
 
+#elif defined(VGP_arm64_linux)
+   I_die_here;
+
 #elif defined(VGP_s390x_linux)
 #  define DO(n)  fpu->fprs[n].ui = arch->vex.guest_f##n
    DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
@@ -606,16 +611,13 @@
       if (VG_(threads)[i].status == VgTs_Empty)
 	 continue;
 
-#     if defined(VGP_x86_linux)
-#     if !defined(VGPV_arm_linux_android) && !defined(VGPV_x86_linux_android) \
-         && !defined(VGPV_mips32_linux_android)
+#     if defined(VGP_x86_linux) && !defined(VGPV_x86_linux_android)
       {
          vki_elf_fpxregset_t xfpu;
          fill_xfpu(&VG_(threads)[i], &xfpu);
          add_note(&notelist, "LINUX", NT_PRXFPREG, &xfpu, sizeof(xfpu));
       }
 #     endif
-#     endif
 
       fill_fpu(&VG_(threads)[i], &fpu);
 #     if !defined(VGPV_arm_linux_android) && !defined(VGPV_x86_linux_android) \
diff --git a/coregrind/m_debugger.c b/coregrind/m_debugger.c
index f4a6183..7cd5ec1 100644
--- a/coregrind/m_debugger.c
+++ b/coregrind/m_debugger.c
@@ -232,6 +232,47 @@
    uregs.ARM_cpsr = LibVEX_GuestARM_get_cpsr(vex);
    return VG_(ptrace)(VKI_PTRACE_SETREGS, pid, NULL, &uregs);
 
+#elif defined(VGP_arm64_linux)
+   I_die_here;
+   //ATC
+   struct vki_user_pt_regs uregs;
+   VG_(memset)(&uregs, 0, sizeof(uregs));
+   uregs.regs[0]  = vex->guest_X0;
+   uregs.regs[1]  = vex->guest_X1;
+   uregs.regs[2]  = vex->guest_X2;
+   uregs.regs[3]  = vex->guest_X3;
+   uregs.regs[4]  = vex->guest_X4;
+   uregs.regs[5]  = vex->guest_X5;
+   uregs.regs[6]  = vex->guest_X6;
+   uregs.regs[7]  = vex->guest_X7;
+   uregs.regs[8]  = vex->guest_X8;
+   uregs.regs[9]  = vex->guest_X9;
+   uregs.regs[10] = vex->guest_X10;
+   uregs.regs[11] = vex->guest_X11;
+   uregs.regs[12] = vex->guest_X12;
+   uregs.regs[13] = vex->guest_X13;
+   uregs.regs[14] = vex->guest_X14;
+   uregs.regs[15] = vex->guest_X15;
+   uregs.regs[16] = vex->guest_X16;
+   uregs.regs[17] = vex->guest_X17;
+   uregs.regs[18] = vex->guest_X18;
+   uregs.regs[19] = vex->guest_X19;
+   uregs.regs[20] = vex->guest_X20;
+   uregs.regs[21] = vex->guest_X21;
+   uregs.regs[22] = vex->guest_X22;
+   uregs.regs[23] = vex->guest_X23;
+   uregs.regs[24] = vex->guest_X24;
+   uregs.regs[25] = vex->guest_X25;
+   uregs.regs[26] = vex->guest_X26;
+   uregs.regs[27] = vex->guest_X27;
+   uregs.regs[28] = vex->guest_X28;
+   uregs.regs[29] = vex->guest_X29;
+   uregs.regs[30] = vex->guest_X30;
+   uregs.sp       = vex->guest_SP;
+   uregs.pc       = vex->guest_PC;
+   uregs.pstate   = LibVEX_GuestARM64_get_nzcv(vex); /* is this correct? */
+   return VG_(ptrace)(VKI_PTRACE_SETREGS, pid, NULL, &uregs);
+
 #elif defined(VGP_x86_darwin)
    I_die_here;
 
diff --git a/coregrind/m_debuginfo/d3basics.c b/coregrind/m_debuginfo/d3basics.c
index b553a64..3999533 100644
--- a/coregrind/m_debuginfo/d3basics.c
+++ b/coregrind/m_debuginfo/d3basics.c
@@ -420,6 +420,8 @@
 #  elif defined(VGP_mips64_linux)
    if (regno == 29) { *a = regs->sp; return True; }
    if (regno == 30) { *a = regs->fp; return True; }
+#  elif defined(VGP_arm64_linux)
+   I_die_here;
 #  else
 #    error "Unknown platform"
 #  endif
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c
index 56fbe82..8a48929 100644
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -824,7 +824,8 @@
       || defined(VGA_mips64)
    is_rx_map = seg->hasR && seg->hasX;
    is_rw_map = seg->hasR && seg->hasW;
-#  elif defined(VGA_amd64) || defined(VGA_ppc64) || defined(VGA_arm)
+#  elif defined(VGA_amd64) || defined(VGA_ppc64) || defined(VGA_arm) \
+        || defined(VGA_arm64)
    is_rx_map = seg->hasR && seg->hasX && !seg->hasW;
    is_rw_map = seg->hasR && seg->hasW && !seg->hasX;
 #  elif defined(VGP_s390x_linux)
@@ -2113,6 +2114,8 @@
             case Creg_IA_BP: return eec->uregs->fp;
             case Creg_MIPS_RA: return eec->uregs->ra;
 #           elif defined(VGA_ppc32) || defined(VGA_ppc64)
+#           elif defined(VGP_arm64_linux)
+            I_die_here;
 #           else
 #             error "Unsupported arch"
 #           endif
@@ -2357,6 +2360,8 @@
          cfa = cfsi->cfa_off + uregs->fp;
          break;
 #     elif defined(VGA_ppc32) || defined(VGA_ppc64)
+#     elif defined(VGP_arm64_linux)
+      I_die_here;
 #     else
 #       error "Unsupported arch"
 #     endif
@@ -2453,6 +2458,8 @@
 #  elif defined(VGA_mips32) || defined(VGA_mips64)
    ipHere = uregsHere->pc;
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
+#  elif defined(VGP_arm64_linux)
+   I_die_here;
 #  else
 #    error "Unknown arch"
 #  endif
@@ -2533,6 +2540,8 @@
    COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi->sp_how, cfsi->sp_off);
    COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi->fp_how, cfsi->fp_off);
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
+#  elif defined(VGP_arm64_linux)
+   I_die_here;
 #  else
 #    error "Unknown arch"
 #  endif
diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h
index 9b9b431..2012043 100644
--- a/coregrind/m_debuginfo/priv_storage.h
+++ b/coregrind/m_debuginfo/priv_storage.h
@@ -277,6 +277,18 @@
       Int   fp_off;
    }
    DiCfSI;
+#elif defined(VGA_arm64)
+/* Be generic until we know more about what's needed. */
+typedef
+   struct {
+      Addr  base;
+      UInt  len;
+      UChar cfa_how; /* a CFIC_ value */
+      UChar ra_how;  /* a CFIR_ value */
+      Int   cfa_off;
+      Int   ra_off;
+   }
+   DiCfSI;
 #else
 #  error "Unknown arch"
 #endif
diff --git a/coregrind/m_debuginfo/readdwarf.c b/coregrind/m_debuginfo/readdwarf.c
index dce99c5..df4bf37 100644
--- a/coregrind/m_debuginfo/readdwarf.c
+++ b/coregrind/m_debuginfo/readdwarf.c
@@ -1841,6 +1841,10 @@
 #  define FP_REG         12
 #  define SP_REG         13
 #  define RA_REG_DEFAULT 14    //???
+#elif defined(VGP_arm64_linux)
+#  define FP_REG         29    //???
+#  define SP_REG         31    //???
+#  define RA_REG_DEFAULT 30    //???
 #elif defined(VGP_x86_darwin)
 #  define FP_REG         5
 #  define SP_REG         4
@@ -2179,6 +2183,8 @@
       si->cfa_how = CFIC_IA_SPREL;
 #     elif defined(VGA_arm)
       si->cfa_how = CFIC_ARM_R13REL;
+#     elif defined(VGA_arm64)
+      I_die_here;
 #     else
       si->cfa_how = 0; /* invalid */
 #     endif
@@ -2206,6 +2212,8 @@
       si->cfa_how = CFIC_ARM_R7REL;
       si->cfa_off = ctxs->cfa_off;
    }
+#  elif defined(VGA_arm64)
+   if (1) { I_die_here; } // do we need any arm64 specifics here?
 #  endif
    else {
       why = 1;
@@ -2249,6 +2257,7 @@
          why = 2; goto failed; /* otherwise give up */        \
    }
 
+
 #  if defined(VGA_x86) || defined(VGA_amd64)
 
    /* --- entire tail of this fn specialised for x86/amd64 --- */
@@ -2339,9 +2348,10 @@
 
    return True;
 
-
 #  elif defined(VGA_s390x)
 
+   /* --- entire tail of this fn specialised for s390 --- */
+
    SUMMARISE_HOW(si->ra_how, si->ra_off,
                              ctxs->reg[ctx->ra_reg] );
    SUMMARISE_HOW(si->fp_how, si->fp_off,
@@ -2387,7 +2397,6 @@
 
    return True;
 
-
 #  elif defined(VGA_mips32) || defined(VGA_mips64)
  
    /* --- entire tail of this fn specialised for mips --- */
@@ -2431,9 +2440,12 @@
 
    return True;
 
-
+#  elif defined(VGA_arm64)
+   I_die_here;
 
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
+   /* These don't use CFI based unwinding (is that really true?) */
+
 #  else
 #    error "Unknown arch"
 #  endif
@@ -2521,6 +2533,8 @@
             return ML_(CfiExpr_CfiReg)( dstxa, Creg_IA_BP );
          if (dwreg == srcuc->ra_reg)
             return ML_(CfiExpr_CfiReg)( dstxa, Creg_IA_IP );
+#        elif defined(VGA_arm64)
+         I_die_here;
 #        elif defined(VGA_ppc32) || defined(VGA_ppc64)
 #        else
 #           error "Unknown arch"
diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c
index b7d574a..69ccb4a 100644
--- a/coregrind/m_debuginfo/readelf.c
+++ b/coregrind/m_debuginfo/readelf.c
@@ -2088,7 +2088,8 @@
       /* PLT is different on different platforms, it seems. */
 #     if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
          || defined(VGP_arm_linux) || defined (VGP_s390x_linux) \
-         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
+         || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+         || defined(VGP_arm64_linux)
       /* Accept .plt where mapped as rx (code) */
       if (0 == VG_(strcmp)(name, ".plt")) {
          if (inrx && !di->plt_present) {
diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c
index b1f187b..5ffa738 100644
--- a/coregrind/m_debuginfo/storage.c
+++ b/coregrind/m_debuginfo/storage.c
@@ -195,6 +195,8 @@
    SHOW_HOW(si->sp_how, si->sp_off);
    VG_(printf)(" FP=");
    SHOW_HOW(si->fp_how, si->fp_off);
+#  elif defined(VGA_arm64)
+   I_die_here;
 #  else
 #    error "Unknown arch"
 #  endif
diff --git a/coregrind/m_debuglog.c b/coregrind/m_debuglog.c
index 0b4f967..2657c3a 100644
--- a/coregrind/m_debuglog.c
+++ b/coregrind/m_debuglog.c
@@ -103,6 +103,7 @@
 }
 
 #elif defined(VGP_amd64_linux)
+
 __attribute__((noinline))
 static UInt local_sys_write_stderr ( const HChar* buf, Int n )
 {
@@ -267,6 +268,42 @@
    return __res;
 }
 
+#elif defined(VGP_arm64_linux)
+
+static UInt local_sys_write_stderr ( const HChar* buf, Int n )
+{
+   volatile ULong block[2];
+   block[0] = (ULong)buf;
+   block[1] = (ULong)n;
+   __asm__ volatile (
+      "mov  x0, #2\n\t"        /* stderr */
+      "ldr  x1, [%0]\n\t"      /* buf */
+      "ldr  x2, [%0, #8]\n\t"  /* n */
+      "mov  x8, #"VG_STRINGIFY(__NR_write)"\n\t"
+      "svc  0x0\n"          /* write() */
+      "str  x0, [%0]\n\t"
+      :
+      : "r" (block)
+      : "x0","x1","x2","x7"
+   );
+   if (block[0] < 0)
+      block[0] = -1;
+   return (UInt)block[0];
+}
+
+static UInt local_sys_getpid ( void )
+{
+   UInt __res;
+   __asm__ volatile (
+      "mov  x8, #"VG_STRINGIFY(__NR_getpid)"\n"
+      "svc  0x0\n"      /* getpid() */
+      "mov  %0, x0\n"
+      : "=r" (__res)
+      :
+      : "x0", "x8" );
+   return (UInt)__res;
+}
+
 #elif defined(VGP_x86_darwin)
 
 /* We would use VG_DARWIN_SYSNO_TO_KERNEL instead of VG_DARWIN_SYSNO_INDEX
@@ -350,6 +387,7 @@
 }
 
 #elif defined(VGP_s390x_linux)
+
 static UInt local_sys_write_stderr ( const HChar* buf, Int n )
 {
    register Int          r2     asm("2") = 2;      /* file descriptor STDERR */
@@ -391,6 +429,7 @@
 }
 
 #elif defined(VGP_mips32_linux)
+
 static UInt local_sys_write_stderr ( const HChar* buf, Int n )
 {
    volatile Int block[2];
@@ -428,6 +467,7 @@
 }
 
 #elif defined(VGP_mips64_linux)
+
 static UInt local_sys_write_stderr ( const HChar* buf, Int n )
 {
    volatile Long block[2];
diff --git a/coregrind/m_dispatch/dispatch-arm64-linux.S b/coregrind/m_dispatch/dispatch-arm64-linux.S
new file mode 100644
index 0000000..9531275
--- /dev/null
+++ b/coregrind/m_dispatch/dispatch-arm64-linux.S
@@ -0,0 +1,241 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address.       ---*/
+/*---                                       dispatch-arm64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+  This file is part of Valgrind, a dynamic binary instrumentation
+  framework.
+
+  Copyright (C) 2013-2013 OpenWorks
+      info@open-works.net
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_arm64_linux)
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_dispatch_asm.h"
+#include "pub_core_transtab_asm.h"
+#include "libvex_guest_offsets.h"	/* for OFFSET_arm_R* */
+
+
+/*------------------------------------------------------------*/
+/*---                                                      ---*/
+/*--- The dispatch loop.  VG_(disp_run_translations) is    ---*/
+/*--- used to run all translations,                        ---*/
+/*--- including no-redir ones.                             ---*/
+/*---                                                      ---*/
+/*------------------------------------------------------------*/
+
+/*----------------------------------------------------*/
+/*--- Entry and preamble (set everything up)       ---*/
+/*----------------------------------------------------*/
+
+/* signature:
+void VG_(disp_run_translations)( UWord* two_words,
+                                 void*  guest_state, 
+                                 Addr   host_addr );
+*/
+.text
+.global VG_(disp_run_translations)
+VG_(disp_run_translations):
+        /* x0  holds two_words
+           x1  holds guest_state
+           x2  holds host_addr
+        */
+        /* Push the callee-saved registers.  Unclear if x19/x20 are
+           callee-saved, but be on the safe side.  Note this sequence
+           maintains 16-alignment of sp.  Also save x0 since it will
+           be needed in the postamble. */
+        stp  x29, x30, [sp, #-16]!
+        stp  x27, x28, [sp, #-16]!
+        stp  x25, x26, [sp, #-16]!
+        stp  x23, x24, [sp, #-16]!
+        stp  x21, x22, [sp, #-16]!
+        stp  x19, x20, [sp, #-16]!
+        stp  x0,  xzr, [sp, #-16]!
+
+        /* set FPSCR to vex-required default value */
+        // FIXME
+        // mov  r4, #0
+        // fmxr fpscr, r4
+
+       	/* Set up the guest state pointer */
+        mov x21, x1
+
+        /* and jump into the code cache.  Chained translations in
+           the code cache run, until for whatever reason, they can't
+           continue.  When that happens, the translation in question
+           will jump (or call) to one of the continuation points
+           VG_(cp_...) below. */
+        br x2
+        /* NOTREACHED */
+        
+/*----------------------------------------------------*/
+/*--- Postamble and exit.                          ---*/
+/*----------------------------------------------------*/
+
+postamble:
+        /* At this point, r1 and r2 contain two
+           words to be returned to the caller.  r1
+           holds a TRC value, and r2 optionally may
+           hold another word (for CHAIN_ME exits, the
+           address of the place to patch.) */
+
+        /* We're leaving.  Check that nobody messed with
+           FPSCR in ways we don't expect. */
+        // FIXME
+        // fmrx r4, fpscr
+        // bic  r4, #0xF8000000 /* mask out NZCV and QC */
+        // bic  r4, #0x0000009F /* mask out IDC,IXC,UFC,OFC,DZC,IOC */
+        // cmp  r4, #0
+        // beq  remove_frame /* we're OK */
+        /* otherwise we have an invariant violation */
+        // movw r1, #VG_TRC_INVARIANT_FAILED
+        // movw r2, #0
+        /* fall through */
+
+remove_frame:
+        /* Restore int regs, including importantly x0 (two_words),
+           but not x1 */
+        ldp  x0,  xzr, [sp], #16
+        ldp  x19, x20, [sp], #16
+        ldp  x21, x22, [sp], #16
+        ldp  x23, x24, [sp], #16
+        ldp  x25, x26, [sp], #16
+        ldp  x27, x28, [sp], #16
+        ldp  x29, x30, [sp], #16
+
+        /* Stash return values */
+        str  x1, [x0, #0]
+        str  x2, [x0, #8]
+        ret
+
+/*----------------------------------------------------*/
+/*--- Continuation points                          ---*/
+/*----------------------------------------------------*/
+
+/* ------ Chain me to slow entry point ------ */
+.global VG_(disp_cp_chain_me_to_slowEP)
+VG_(disp_cp_chain_me_to_slowEP):
+        /* We got called.  The return address indicates
+           where the patching needs to happen.  Collect
+           the return address and, exit back to C land,
+           handing the caller the pair (Chain_me_S, RA) */
+        mov  x1, #VG_TRC_CHAIN_ME_TO_SLOW_EP
+        mov  x2, x30 // 30 == LR
+        /* 4 = movw x9, disp_cp_chain_me_to_slowEP[15:0]
+           4 = movk x9, disp_cp_chain_me_to_slowEP[31:16], lsl 16
+           4 = movk x9, disp_cp_chain_me_to_slowEP[47:32], lsl 32
+           4 = movk x9, disp_cp_chain_me_to_slowEP[63:48], lsl 48
+           4 = blr  x9
+        */
+        sub  x2, x2, #4+4+4+4+4
+        b    postamble
+
+/* ------ Chain me to fast entry point ------ */
+.global VG_(disp_cp_chain_me_to_fastEP)
+VG_(disp_cp_chain_me_to_fastEP):
+        /* We got called.  The return address indicates
+           where the patching needs to happen.  Collect
+           the return address and, exit back to C land,
+           handing the caller the pair (Chain_me_F, RA) */
+        mov  x1, #VG_TRC_CHAIN_ME_TO_FAST_EP
+        mov  x2, x30 // 30 == LR
+        /* 4 = movw x9, disp_cp_chain_me_to_fastEP[15:0]
+           4 = movk x9, disp_cp_chain_me_to_fastEP[31:16], lsl 16
+           4 = movk x9, disp_cp_chain_me_to_fastEP[47:32], lsl 32
+           4 = movk x9, disp_cp_chain_me_to_fastEP[63:48], lsl 48
+           4 = blr  x9
+        */
+        sub  x2, x2, #4+4+4+4+4
+        b    postamble
+
+/* ------ Indirect but boring jump ------ */
+.global VG_(disp_cp_xindir)
+VG_(disp_cp_xindir):
+	/* Where are we going? */
+        ldr  x0, [x21, #OFFSET_arm64_PC]
+
+        /* stats only */
+        adrp x1,           VG_(stats__n_xindirs_32)
+        add  x1, x1, :lo12:VG_(stats__n_xindirs_32)
+        ldr  w2, [x1, #0]
+        add  w2, w2, #1
+        str  w2, [x1, #0]
+        
+        /* try a fast lookup in the translation cache */
+        // x0 = next guest, x1,x2,x3,x4 scratch
+        mov  x1, #VG_TT_FAST_MASK       // x1 = VG_TT_FAST_MASK
+	and  x2, x1, x0, LSR #2         // x2 = entry # = (x1 & (x0 >> 2))
+
+        adrp x4,           VG_(tt_fast)
+        add  x4, x4, :lo12:VG_(tt_fast) // x4 = &VG_(tt_fast)
+
+	add  x1, x4, x2, LSL #4         // r1 = &tt_fast[entry#]
+
+        ldp  x4, x5, [x1, #0]           // x4 = .guest, x5 = .host
+
+	cmp  x4, x0
+
+        // jump to host if lookup succeeded
+        bne  fast_lookup_failed
+	br   x5
+        /*NOTREACHED*/
+
+fast_lookup_failed:
+        /* RM ME -- stats only */
+        adrp x1,           VG_(stats__n_xindir_misses_32)
+        add  x1, x1, :lo12:VG_(stats__n_xindir_misses_32)
+        ldr  w2, [x1, #0]
+        add  w2, w2, #1
+        str  w2, [x1, #0]
+
+	mov  x1, #VG_TRC_INNER_FASTMISS
+        mov  x2, #0
+	b    postamble
+
+/* ------ Assisted jump ------ */
+.global VG_(disp_cp_xassisted)
+VG_(disp_cp_xassisted):
+        /* x21 contains the TRC */
+        mov  x1, x21
+        mov  x2, #0
+        b    postamble
+
+/* ------ Event check failed ------ */
+.global VG_(disp_cp_evcheck_fail)
+VG_(disp_cp_evcheck_fail):
+       	mov  x1, #VG_TRC_INNER_COUNTERZERO
+        mov  x2, #0
+	b    postamble
+
+
+.size VG_(disp_run_translations), .-VG_(disp_run_translations)
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",%progbits
+
+#endif // defined(VGP_arm64_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                   dispatch-arm64-linux.S ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_gdbserver/target.c b/coregrind/m_gdbserver/target.c
index eef80eb..a7795db 100644
--- a/coregrind/m_gdbserver/target.c
+++ b/coregrind/m_gdbserver/target.c
@@ -641,6 +641,8 @@
    amd64_init_architecture(&the_low_target);
 #elif defined(VGA_arm)
    arm_init_architecture(&the_low_target);
+#elif defined(VGA_arm64)
+   arm_init_architecture(&the_low_target);
 #elif defined(VGA_ppc32)
    ppc32_init_architecture(&the_low_target);
 #elif defined(VGA_ppc64)
@@ -652,6 +654,6 @@
 #elif defined(VGA_mips64)
    mips64_init_architecture(&the_low_target);
 #else
-   architecture missing in target.c valgrind_initialize_target
+   #error "architecture missing in target.c valgrind_initialize_target"
 #endif
 }
diff --git a/coregrind/m_gdbserver/valgrind-low-arm64.c b/coregrind/m_gdbserver/valgrind-low-arm64.c
new file mode 100644
index 0000000..36e1d3c
--- /dev/null
+++ b/coregrind/m_gdbserver/valgrind-low-arm64.c
@@ -0,0 +1,307 @@
+/* Low level interface to valgrind, for the remote server for GDB integrated
+   in valgrind.
+   Copyright (C) 2011
+   Free Software Foundation, Inc.
+
+   This file is part of VALGRIND.
+   It has been inspired from a file from gdbserver in gdb 6.6.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "server.h"
+#include "target.h"
+#include "regdef.h"
+#include "regcache.h"
+
+#include "pub_core_aspacemgr.h"
+#include "pub_tool_machine.h"
+#include "pub_core_threadstate.h"
+#include "pub_core_transtab.h"
+#include "pub_core_gdbserver.h" 
+#include "pub_core_debuginfo.h"
+
+#include "valgrind_low.h"
+
+#include "libvex_guest_arm64.h"
+
+//ZZ static struct reg regs[] = {
+//ZZ   { "r0", 0, 32 },
+//ZZ   { "r1", 32, 32 },
+//ZZ   { "r2", 64, 32 },
+//ZZ   { "r3", 96, 32 },
+//ZZ   { "r4", 128, 32 },
+//ZZ   { "r5", 160, 32 },
+//ZZ   { "r6", 192, 32 },
+//ZZ   { "r7", 224, 32 },
+//ZZ   { "r8", 256, 32 },
+//ZZ   { "r9", 288, 32 },
+//ZZ   { "r10", 320, 32 },
+//ZZ   { "r11", 352, 32 },
+//ZZ   { "r12", 384, 32 },
+//ZZ   { "sp", 416, 32 },
+//ZZ   { "lr", 448, 32 },
+//ZZ   { "pc", 480, 32 },
+//ZZ   { "", 512, 0 }, // It seems these entries are needed 
+//ZZ   { "", 512, 0 }, // as previous versions of arm <-> gdb placed 
+//ZZ   { "", 512, 0 }, // some floating point registers here. So, cpsr 
+//ZZ   { "", 512, 0 }, // must be register 25.
+//ZZ   { "", 512, 0 },
+//ZZ   { "", 512, 0 },
+//ZZ   { "", 512, 0 },
+//ZZ   { "", 512, 0 },
+//ZZ   { "", 512, 0 },
+//ZZ   { "cpsr", 512, 32 },
+//ZZ   { "d0", 544, 64 },
+//ZZ   { "d1", 608, 64 },
+//ZZ   { "d2", 672, 64 },
+//ZZ   { "d3", 736, 64 },
+//ZZ   { "d4", 800, 64 },
+//ZZ   { "d5", 864, 64 },
+//ZZ   { "d6", 928, 64 },
+//ZZ   { "d7", 992, 64 },
+//ZZ   { "d8", 1056, 64 },
+//ZZ   { "d9", 1120, 64 },
+//ZZ   { "d10", 1184, 64 },
+//ZZ   { "d11", 1248, 64 },
+//ZZ   { "d12", 1312, 64 },
+//ZZ   { "d13", 1376, 64 },
+//ZZ   { "d14", 1440, 64 },
+//ZZ   { "d15", 1504, 64 },
+//ZZ   { "d16", 1568, 64 },
+//ZZ   { "d17", 1632, 64 },
+//ZZ   { "d18", 1696, 64 },
+//ZZ   { "d19", 1760, 64 },
+//ZZ   { "d20", 1824, 64 },
+//ZZ   { "d21", 1888, 64 },
+//ZZ   { "d22", 1952, 64 },
+//ZZ   { "d23", 2016, 64 },
+//ZZ   { "d24", 2080, 64 },
+//ZZ   { "d25", 2144, 64 },
+//ZZ   { "d26", 2208, 64 },
+//ZZ   { "d27", 2272, 64 },
+//ZZ   { "d28", 2336, 64 },
+//ZZ   { "d29", 2400, 64 },
+//ZZ   { "d30", 2464, 64 },
+//ZZ   { "d31", 2528, 64 },
+//ZZ   { "fpscr", 2592, 32 }
+//ZZ };
+//ZZ static const char *expedite_regs[] = { "r11", "sp", "pc", 0 };
+//ZZ #define num_regs (sizeof (regs) / sizeof (regs[0]))
+//ZZ 
+//ZZ static
+//ZZ CORE_ADDR get_pc (void)
+//ZZ {
+//ZZ    unsigned long pc;
+//ZZ 
+//ZZ    collect_register_by_name ("pc", &pc);
+//ZZ    
+//ZZ    dlog(1, "stop pc is %p\n", (void *) pc);
+//ZZ    return pc;
+//ZZ }
+//ZZ 
+//ZZ static
+//ZZ void set_pc (CORE_ADDR newpc)
+//ZZ {
+//ZZ    Bool mod;
+//ZZ    supply_register_by_name ("pc", &newpc, &mod);
+//ZZ    if (mod)
+//ZZ       dlog(1, "set pc to %p\n", C2v (newpc));
+//ZZ    else
+//ZZ       dlog(1, "set pc not changed %p\n", C2v (newpc));
+//ZZ }
+//ZZ 
+//ZZ Addr thumb_pc (Addr pc)
+//ZZ {
+//ZZ    // If the thumb bit (bit 0) is already set, we trust it.
+//ZZ    if (pc & 1) {
+//ZZ       dlog (1, "%p = thumb (bit0 is set)\n", C2v (pc));
+//ZZ       return pc;
+//ZZ    }
+//ZZ 
+//ZZ    // Here, bit 0 is not set.
+//ZZ    // For a pc aligned on 4 bytes, we have to use the debug
+//ZZ    // info to determine the thumb-ness.
+//ZZ    // else (aligned on 2 bytes), we trust this is a thumb
+//ZZ    // address and we set the thumb bit.
+//ZZ 
+//ZZ    if (pc & 2) {
+//ZZ       dlog (1, "bit0 not set, bit1 set => %p = thumb\n", C2v (pc));
+//ZZ       return pc | 1;
+//ZZ    }
+//ZZ 
+//ZZ    // pc aligned on 4 bytes. We need to use debug info.
+//ZZ    {
+//ZZ       HChar fnname[200]; // ??? max size
+//ZZ       Addr entrypoint;
+//ZZ       Addr ptoc; // unused but needed.
+//ZZ       // If this is a thumb instruction, we need to ask
+//ZZ       // the debug info with the bit0 set
+//ZZ       // (why can't debug info do that for us ???)
+//ZZ       // (why if this is a 4 bytes thumb instruction ???)
+//ZZ       if (VG_(get_fnname_raw) (pc | 1, fnname, 200)) {
+//ZZ          if (VG_(lookup_symbol_SLOW)( "*", fnname, &entrypoint, &ptoc )) {
+//ZZ             dlog (1, "fnname %s lookupsym %p => %p %s.\n",
+//ZZ                   fnname, C2v(entrypoint), C2v(pc),
+//ZZ                   (entrypoint & 1 ? "thumb" : "arm"));
+//ZZ             if (entrypoint & 1)
+//ZZ                return pc | 1;
+//ZZ             else
+//ZZ                return pc;
+//ZZ             
+//ZZ          } else {
+//ZZ             dlog (1, "%p fnname %s lookupsym failed?. Assume arm\n",
+//ZZ                   C2v (pc), fnname);
+//ZZ             return pc;
+//ZZ          }
+//ZZ       } else {
+//ZZ          // Can't find function name. We assume this is arm
+//ZZ          dlog (1, "%p unknown fnname?. Assume arm\n", C2v (pc));
+//ZZ          return pc;
+//ZZ       }
+//ZZ    }
+//ZZ }
+//ZZ 
+//ZZ /* store registers in the guest state (gdbserver_to_valgrind)
+//ZZ    or fetch register from the guest state (valgrind_to_gdbserver). */
+//ZZ static
+//ZZ void transfer_register (ThreadId tid, int abs_regno, void * buf,
+//ZZ                         transfer_direction dir, int size, Bool *mod)
+//ZZ {
+//ZZ    ThreadState* tst = VG_(get_ThreadState)(tid);
+//ZZ    int set = abs_regno / num_regs;
+//ZZ    int regno = abs_regno % num_regs;
+//ZZ    *mod = False;
+//ZZ 
+//ZZ    VexGuestARMState* arm = (VexGuestARMState*) get_arch (set, tst);
+//ZZ 
+//ZZ    switch (regno) { 
+//ZZ    // numbers here have to match the order of regs above
+//ZZ    // Attention: gdb order does not match valgrind order.
+//ZZ    case 0:  VG_(transfer) (&arm->guest_R0,   buf, dir, size, mod); break;
+//ZZ    case 1:  VG_(transfer) (&arm->guest_R1,   buf, dir, size, mod); break;
+//ZZ    case 2:  VG_(transfer) (&arm->guest_R2,   buf, dir, size, mod); break;
+//ZZ    case 3:  VG_(transfer) (&arm->guest_R3,   buf, dir, size, mod); break;
+//ZZ    case 4:  VG_(transfer) (&arm->guest_R4,   buf, dir, size, mod); break;
+//ZZ    case 5:  VG_(transfer) (&arm->guest_R5,   buf, dir, size, mod); break;
+//ZZ    case 6:  VG_(transfer) (&arm->guest_R6,   buf, dir, size, mod); break;
+//ZZ    case 7:  VG_(transfer) (&arm->guest_R7,   buf, dir, size, mod); break;
+//ZZ    case 8:  VG_(transfer) (&arm->guest_R8,   buf, dir, size, mod); break;
+//ZZ    case 9:  VG_(transfer) (&arm->guest_R9,   buf, dir, size, mod); break;
+//ZZ    case 10: VG_(transfer) (&arm->guest_R10,  buf, dir, size, mod); break;
+//ZZ    case 11: VG_(transfer) (&arm->guest_R11,  buf, dir, size, mod); break;
+//ZZ    case 12: VG_(transfer) (&arm->guest_R12,  buf, dir, size, mod); break;
+//ZZ    case 13: VG_(transfer) (&arm->guest_R13,  buf, dir, size, mod); break;
+//ZZ    case 14: VG_(transfer) (&arm->guest_R14,  buf, dir, size, mod); break;
+//ZZ    case 15: { 
+//ZZ       VG_(transfer) (&arm->guest_R15T, buf, dir, size, mod);
+//ZZ       if (dir == gdbserver_to_valgrind && *mod) {
+//ZZ          // If gdb is changing the PC, we have to set the thumb bit
+//ZZ          // if needed.
+//ZZ          arm->guest_R15T = thumb_pc(arm->guest_R15T);
+//ZZ       }
+//ZZ       break;
+//ZZ    }
+//ZZ    case 16:
+//ZZ    case 17:
+//ZZ    case 18:
+//ZZ    case 19:
+//ZZ    case 20: /* 9 "empty registers". See struct reg regs above. */
+//ZZ    case 21:
+//ZZ    case 22:
+//ZZ    case 23:
+//ZZ    case 24: *mod = False; break;
+//ZZ    case 25: {
+//ZZ       UInt cpsr = LibVEX_GuestARM_get_cpsr (arm);
+//ZZ       if (dir == valgrind_to_gdbserver) {
+//ZZ          VG_(transfer) (&cpsr, buf, dir, size, mod); 
+//ZZ       } else {
+//ZZ #      if 0
+//ZZ          UInt newcpsr;
+//ZZ          VG_(transfer) (&newcpsr, buf, dir, size, mod);
+//ZZ          *mod = newcpsr != cpsr;
+//ZZ          // GDBTD ???? see FIXME in guest_arm_helpers.c
+//ZZ          LibVEX_GuestARM_put_flags (newcpsr, arm);
+//ZZ #      else
+//ZZ          *mod = False;
+//ZZ #      endif
+//ZZ       }
+//ZZ       break;
+//ZZ    }
+//ZZ    case 26: VG_(transfer) (&arm->guest_D0,  buf, dir, size, mod); break;
+//ZZ    case 27: VG_(transfer) (&arm->guest_D1,  buf, dir, size, mod); break;
+//ZZ    case 28: VG_(transfer) (&arm->guest_D2,  buf, dir, size, mod); break;
+//ZZ    case 29: VG_(transfer) (&arm->guest_D3,  buf, dir, size, mod); break;
+//ZZ    case 30: VG_(transfer) (&arm->guest_D4,  buf, dir, size, mod); break;
+//ZZ    case 31: VG_(transfer) (&arm->guest_D5,  buf, dir, size, mod); break;
+//ZZ    case 32: VG_(transfer) (&arm->guest_D6,  buf, dir, size, mod); break;
+//ZZ    case 33: VG_(transfer) (&arm->guest_D7,  buf, dir, size, mod); break;
+//ZZ    case 34: VG_(transfer) (&arm->guest_D8,  buf, dir, size, mod); break;
+//ZZ    case 35: VG_(transfer) (&arm->guest_D9,  buf, dir, size, mod); break;
+//ZZ    case 36: VG_(transfer) (&arm->guest_D10, buf, dir, size, mod); break;
+//ZZ    case 37: VG_(transfer) (&arm->guest_D11, buf, dir, size, mod); break;
+//ZZ    case 38: VG_(transfer) (&arm->guest_D12, buf, dir, size, mod); break;
+//ZZ    case 39: VG_(transfer) (&arm->guest_D13, buf, dir, size, mod); break;
+//ZZ    case 40: VG_(transfer) (&arm->guest_D14, buf, dir, size, mod); break;
+//ZZ    case 41: VG_(transfer) (&arm->guest_D15, buf, dir, size, mod); break;
+//ZZ    case 42: VG_(transfer) (&arm->guest_D16, buf, dir, size, mod); break;
+//ZZ    case 43: VG_(transfer) (&arm->guest_D17, buf, dir, size, mod); break;
+//ZZ    case 44: VG_(transfer) (&arm->guest_D18, buf, dir, size, mod); break;
+//ZZ    case 45: VG_(transfer) (&arm->guest_D19, buf, dir, size, mod); break;
+//ZZ    case 46: VG_(transfer) (&arm->guest_D20, buf, dir, size, mod); break;
+//ZZ    case 47: VG_(transfer) (&arm->guest_D21, buf, dir, size, mod); break;
+//ZZ    case 48: VG_(transfer) (&arm->guest_D22, buf, dir, size, mod); break;
+//ZZ    case 49: VG_(transfer) (&arm->guest_D23, buf, dir, size, mod); break;
+//ZZ    case 50: VG_(transfer) (&arm->guest_D24, buf, dir, size, mod); break;
+//ZZ    case 51: VG_(transfer) (&arm->guest_D25, buf, dir, size, mod); break;
+//ZZ    case 52: VG_(transfer) (&arm->guest_D26, buf, dir, size, mod); break;
+//ZZ    case 53: VG_(transfer) (&arm->guest_D27, buf, dir, size, mod); break;
+//ZZ    case 54: VG_(transfer) (&arm->guest_D28, buf, dir, size, mod); break;
+//ZZ    case 55: VG_(transfer) (&arm->guest_D29, buf, dir, size, mod); break;
+//ZZ    case 56: VG_(transfer) (&arm->guest_D30, buf, dir, size, mod); break;
+//ZZ    case 57: VG_(transfer) (&arm->guest_D31, buf, dir, size, mod); break;
+//ZZ    case 58: VG_(transfer) (&arm->guest_FPSCR, buf, dir, size, mod); break;
+//ZZ    default: vg_assert(0);
+//ZZ    }
+//ZZ }
+//ZZ 
+//ZZ static
+//ZZ const char* target_xml (Bool shadow_mode)
+//ZZ {
+//ZZ    if (shadow_mode) {
+//ZZ       return "arm-with-vfpv3-valgrind.xml";
+//ZZ    } else {
+//ZZ       return "arm-with-vfpv3.xml";
+//ZZ    }  
+//ZZ }
+//ZZ 
+//ZZ static struct valgrind_target_ops low_target = {
+//ZZ    num_regs,
+//ZZ    regs,
+//ZZ    13, //SP
+//ZZ    transfer_register,
+//ZZ    get_pc,
+//ZZ    set_pc,
+//ZZ    "arm",
+//ZZ    target_xml
+//ZZ };
+
+void arm64_init_architecture (struct valgrind_target_ops *target)
+{
+  vg_assert(0); // IMPLEMENT ME
+  //ZZ    *target = low_target;
+  //ZZ    set_register_cache (regs, num_regs);
+  //ZZ    gdbserver_expedite_regs = expedite_regs;
+}
diff --git a/coregrind/m_gdbserver/valgrind_low.h b/coregrind/m_gdbserver/valgrind_low.h
index 707d438..e0def75 100644
--- a/coregrind/m_gdbserver/valgrind_low.h
+++ b/coregrind/m_gdbserver/valgrind_low.h
@@ -73,6 +73,7 @@
 extern void x86_init_architecture (struct valgrind_target_ops *target);
 extern void amd64_init_architecture (struct valgrind_target_ops *target);
 extern void arm_init_architecture (struct valgrind_target_ops *target);
+extern void arm64_init_architecture (struct valgrind_target_ops *target);
 extern void ppc32_init_architecture (struct valgrind_target_ops *target);
 extern void ppc64_init_architecture (struct valgrind_target_ops *target);
 extern void s390x_init_architecture (struct valgrind_target_ops *target);
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
index bbef10a..9e6530d 100644
--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
@@ -1040,7 +1040,7 @@
    arch->vex.guest_GPR2 = iifii.initial_client_TOC;
    arch->vex.guest_CIA  = iifii.initial_client_IP;
 
-#   elif defined(VGP_arm_linux)
+#  elif defined(VGP_arm_linux)
    /* Zero out the initial state, and set up the simulated FPU in a
       sane way. */
    LibVEX_GuestARM_initialise(&arch->vex);
@@ -1056,6 +1056,17 @@
    // FIXME jrs: what's this for?
    arch->vex.guest_R1 =  iifii.initial_client_SP;
 
+#  elif defined(VGP_arm64_linux)
+   /* Zero out the initial state. */
+   LibVEX_GuestARM64_initialise(&arch->vex);
+
+   /* Zero out the shadow areas. */
+   VG_(memset)(&arch->vex_shadow1, 0, sizeof(VexGuestARM64State));
+   VG_(memset)(&arch->vex_shadow2, 0, sizeof(VexGuestARM64State));
+
+   arch->vex.guest_SP = iifii.initial_client_SP;
+   arch->vex.guest_PC = iifii.initial_client_IP;
+
 #  elif defined(VGP_s390x_linux)
    vg_assert(0 == sizeof(VexGuestS390XState) % 16);
 
diff --git a/coregrind/m_libcassert.c b/coregrind/m_libcassert.c
index d7706de..65a373e 100644
--- a/coregrind/m_libcassert.c
+++ b/coregrind/m_libcassert.c
@@ -136,6 +136,25 @@
         (srP)->misc.ARM.r11 = block[4];                   \
         (srP)->misc.ARM.r7  = block[5];                   \
       }
+#elif defined(VGP_arm64_linux)
+#  define GET_STARTREGS(srP)                              \
+      { ULong block[4];                                   \
+        __asm__ __volatile__(                             \
+           "adr x19, 0;"                                  \
+           "str x19, [%0, #+0];"   /* pc */               \
+           "mov x19, sp;"                                 \
+           "str x19, [%0, #+8];"   /* sp */               \
+           "str x29, [%0, #+16];"  /* fp */               \
+           "str x30, [%0, #+24];"  /* lr */               \
+           : /* out */                                    \
+           : /* in */ "r"(&block[0])                      \
+           : /* trash */ "memory","x19"                   \
+        );                                                \
+        (srP)->r_pc = block[0];                           \
+        (srP)->r_sp = block[1];                           \
+        (srP)->misc.ARM64.x29 = block[2];                 \
+        (srP)->misc.ARM64.x30 = block[3];                 \
+      }
 #elif defined(VGP_s390x_linux)
 #  define GET_STARTREGS(srP)                              \
       { ULong ia, sp, fp, lr;                             \
diff --git a/coregrind/m_libcfile.c b/coregrind/m_libcfile.c
index 4f6d3d0..f5d51af 100644
--- a/coregrind/m_libcfile.c
+++ b/coregrind/m_libcfile.c
@@ -116,8 +116,12 @@
 }
 
 SysRes VG_(open) ( const HChar* pathname, Int flags, Int mode )
-{  
-#  if defined(VGO_linux)
+{
+#  if defined(VGP_arm64_linux)
+   /* ARM64 wants to use __NR_openat rather than __NR_open. */
+   SysRes res = VG_(do_syscall4)(__NR_openat,
+                                 VKI_AT_FDCWD, (UWord)pathname, flags, mode);
+#  elif defined(VGO_linux)
    SysRes res = VG_(do_syscall3)(__NR_open,
                                  (UWord)pathname, flags, mode);
 #  elif defined(VGO_darwin)
@@ -204,6 +208,9 @@
    } else {
       return -1;
    }
+#  elif defined(VGP_arm64_linux)
+   SysRes res = VG_(do_syscall2)(__NR_pipe2, (UWord)fd, 0);
+   return sr_isError(res) ? -1 : 0;
 #  elif defined(VGO_linux)
    SysRes res = VG_(do_syscall1)(__NR_pipe, (UWord)fd);
    return sr_isError(res) ? -1 : 0;
@@ -289,8 +296,14 @@
      }
    }
 #  endif /* defined(__NR_stat64) */
+   /* This is the fallback ("vanilla version"). */
    { struct vki_stat buf;
+#    if defined(VGP_arm64_linux)
+     res = VG_(do_syscall3)(__NR3264_fstatat, VKI_AT_FDCWD,
+                                              (UWord)file_name, (UWord)&buf);
+#    else
      res = VG_(do_syscall2)(__NR_stat, (UWord)file_name, (UWord)&buf);
+#    endif
      if (!sr_isError(res))
         TRANSLATE_TO_vg_stat(vgbuf, &buf);
      return res;
@@ -385,7 +398,12 @@
 
 Int VG_(unlink) ( const HChar* file_name )
 {
+#  if defined(VGP_arm64_linux)
+   SysRes res = VG_(do_syscall2)(__NR_unlinkat, VKI_AT_FDCWD,
+                                                (UWord)file_name);
+#  else
    SysRes res = VG_(do_syscall1)(__NR_unlink, (UWord)file_name);
+#  endif
    return sr_isError(res) ? (-1) : 0;
 }
 
@@ -474,7 +492,12 @@
 {
    SysRes res;
    /* res = readlink( path, buf, bufsiz ); */
+#  if defined(VGP_arm64_linux)
+   res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
+                                           (UWord)path, (UWord)buf, bufsiz);
+#  else
    res = VG_(do_syscall3)(__NR_readlink, (UWord)path, (UWord)buf, bufsiz);
+#  endif
    return sr_isError(res) ? -1 : sr_Res(res);
 }
 
@@ -509,7 +532,11 @@
    UWord w = (irusr ? VKI_R_OK : 0)
              | (iwusr ? VKI_W_OK : 0)
              | (ixusr ? VKI_X_OK : 0);
+#  if defined(VGP_arm64_linux)
+   SysRes res = VG_(do_syscall3)(__NR_faccessat, VKI_AT_FDCWD, (UWord)path, w);
+#  else
    SysRes res = VG_(do_syscall2)(__NR_access, (UWord)path, w);
+#  endif
    return sr_isError(res) ? 1 : 0;   
 
 #  if defined(VGO_linux)
@@ -624,7 +651,8 @@
    return res;
 #  elif defined(VGP_amd64_linux) \
       || defined(VGP_ppc64_linux) || defined(VGP_s390x_linux) \
-      || defined(VGP_mips64_linux) 
+      || defined(VGP_mips64_linux) \
+      || defined(VGP_arm64_linux)
    res = VG_(do_syscall4)(__NR_pread64, fd, (UWord)buf, count, offset);
    return res;
 #  elif defined(VGP_amd64_darwin)
@@ -877,7 +905,8 @@
    return sr_isError(res) ? -1 : sr_Res(res);
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
-        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
+        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+        || defined(VGP_arm64_linux)
    SysRes res;
    res = VG_(do_syscall3)(__NR_socket, domain, type, protocol );
    return sr_isError(res) ? -1 : sr_Res(res);
@@ -916,7 +945,8 @@
    return sr_isError(res) ? -1 : sr_Res(res);
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
-        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
+        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+        || defined(VGP_arm64_linux)
    SysRes res;
    res = VG_(do_syscall3)(__NR_connect, sockfd, (UWord)serv_addr, addrlen);
    return sr_isError(res) ? -1 : sr_Res(res);
@@ -955,7 +985,8 @@
    return sr_isError(res) ? -1 : sr_Res(res);
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
-        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
+        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+        || defined(VGP_arm64_linux)
    SysRes res;
    res = VG_(do_syscall6)(__NR_sendto, sd, (UWord)msg, 
                                        count, VKI_MSG_NOSIGNAL, 0,0);
@@ -985,7 +1016,7 @@
    return sr_isError(res) ? -1 : sr_Res(res);
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
-        || defined(VGP_mips64_linux)
+        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
    SysRes res;
    res = VG_(do_syscall3)( __NR_getsockname,
                            (UWord)sd, (UWord)name, (UWord)namelen );
@@ -1016,7 +1047,7 @@
    return sr_isError(res) ? -1 : sr_Res(res);
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
-        || defined(VGP_mips64_linux)
+        || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
    SysRes res;
    res = VG_(do_syscall3)( __NR_getpeername,
                            (UWord)sd, (UWord)name, (UWord)namelen );
@@ -1049,7 +1080,8 @@
    return sr_isError(res) ? -1 : sr_Res(res);
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
-        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
+        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+        || defined(VGP_arm64_linux)
    SysRes res;
    res = VG_(do_syscall5)( __NR_getsockopt,
                            (UWord)sd, (UWord)level, (UWord)optname, 
@@ -1085,7 +1117,8 @@
    return sr_isError(res) ? -1 : sr_Res(res);
 
 #  elif defined(VGP_amd64_linux) || defined(VGP_arm_linux) \
-        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
+        || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+        || defined(VGP_arm64_linux)
    SysRes res;
    res = VG_(do_syscall5)( __NR_setsockopt,
                            (UWord)sd, (UWord)level, (UWord)optname, 
diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c
index 88dba87..d88f299 100644
--- a/coregrind/m_libcproc.c
+++ b/coregrind/m_libcproc.c
@@ -458,8 +458,14 @@
        * the /proc/self link is pointing...
        */
 
+#     if defined(VGP_arm64_linux)
+      res = VG_(do_syscall4)(__NR_readlinkat, VKI_AT_FDCWD,
+                             (UWord)"/proc/self",
+                             (UWord)pid, sizeof(pid));
+#     else
       res = VG_(do_syscall3)(__NR_readlink, (UWord)"/proc/self",
                              (UWord)pid, sizeof(pid));
+#     endif
       if (!sr_isError(res) && sr_Res(res) > 0) {
          HChar* s;
          pid[sr_Res(res)] = '\0';
@@ -552,7 +558,7 @@
 #  elif defined(VGP_amd64_linux) || defined(VGP_ppc64_linux)  \
         || defined(VGP_arm_linux)                             \
         || defined(VGO_darwin) || defined(VGP_s390x_linux)    \
-        || defined(VGP_mips32_linux)
+        || defined(VGP_mips32_linux) || defined(VGP_arm64_linux)
    SysRes sres;
    sres = VG_(do_syscall2)(__NR_getgroups, size, (Addr)list);
    if (sr_isError(sres))
@@ -763,6 +769,121 @@
    Addr endaddr   = startaddr + nbytes;
    VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr);
 
+#  elif defined(VGP_arm64_linux)
+   // This arm64_linux section of this function VG_(invalidate_icache)
+   // is copied from
+   // https://github.com/armvixl/vixl/blob/master/src/a64/cpu-a64.cc
+   // which has the following copyright notice:
+   /*
+   Copyright 2013, ARM Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+   
+   * Redistributions of source code must retain the above copyright notice,
+     this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright notice,
+     this list of conditions and the following disclaimer in the documentation
+     and/or other materials provided with the distribution.
+   * Neither the name of ARM Limited nor the names of its contributors may be
+     used to endorse or promote products derived from this software without
+     specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+   */
+
+   // Ask what the I and D line sizes are
+   UInt cache_type_register;
+   // Copy the content of the cache type register to a core register.
+   __asm__ __volatile__ ("mrs %[ctr], ctr_el0" // NOLINT
+                         : [ctr] "=r" (cache_type_register));
+
+   const Int kDCacheLineSizeShift = 16;
+   const Int kICacheLineSizeShift = 0;
+   const UInt kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
+   const UInt kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
+
+   // The cache type register holds the size of the I and D caches as a power of
+   // two.
+   const UInt dcache_line_size_power_of_two =
+       (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
+   const UInt icache_line_size_power_of_two =
+       (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
+
+   const UInt dcache_line_size_ = 1 << dcache_line_size_power_of_two;
+   const UInt icache_line_size_ = 1 << icache_line_size_power_of_two;
+
+   Addr start = (Addr)ptr;
+   // Sizes will be used to generate a mask big enough to cover a pointer.
+   Addr dsize = (Addr)dcache_line_size_;
+   Addr isize = (Addr)icache_line_size_;
+
+   // Cache line sizes are always a power of 2.
+   Addr dstart = start & ~(dsize - 1);
+   Addr istart = start & ~(isize - 1);
+   Addr end    = start + nbytes;
+
+   __asm__ __volatile__ (
+     // Clean every line of the D cache containing the target data.
+     "0: \n\t"
+     // dc : Data Cache maintenance
+     // c : Clean
+     // va : by (Virtual) Address
+     // u : to the point of Unification
+     // The point of unification for a processor is the point by which the
+     // instruction and data caches are guaranteed to see the same copy of a
+     // memory location. See ARM DDI 0406B page B2-12 for more information.
+     "dc cvau, %[dline] \n\t"
+     "add %[dline], %[dline], %[dsize] \n\t"
+     "cmp %[dline], %[end] \n\t"
+     "b.lt 0b \n\t"
+     // Barrier to make sure the effect of the code above is visible to the rest
+     // of the world.
+     // dsb : Data Synchronisation Barrier
+     // ish : Inner SHareable domain
+     // The point of unification for an Inner Shareable shareability domain is
+     // the point by which the instruction and data caches of all the processors
+     // in that Inner Shareable shareability domain are guaranteed to see the
+     // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
+     // information.
+     "dsb ish \n\t"
+     // Invalidate every line of the I cache containing the target data.
+     "1: \n\t"
+     // ic : instruction cache maintenance
+     // i : invalidate
+     // va : by address
+     // u : to the point of unification
+     "ic ivau, %[iline] \n\t"
+     "add %[iline], %[iline], %[isize] \n\t"
+     "cmp %[iline], %[end] \n\t"
+     "b.lt 1b \n\t"
+     // Barrier to make sure the effect of the code above is visible to the rest
+     // of the world.
+     "dsb ish \n\t"
+     // Barrier to ensure any prefetching which happened before this code is
+     // discarded.
+     // isb : Instruction Synchronisation Barrier
+     "isb \n\t"
+     : [dline] "+r" (dstart),
+       [iline] "+r" (istart)
+     : [dsize] "r" (dsize),
+       [isize] "r" (isize),
+       [end] "r" (end)
+     // This code does not write to memory but without the dependency gcc might
+     // move this code before the code is generated.
+     : "cc", "memory"
+   );
+
 #  elif defined(VGA_mips32) || defined(VGA_mips64)
    SysRes sres = VG_(do_syscall3)(__NR_cacheflush, (UWord) ptr,
                                  (UWord) nbytes, (UWord) 3);
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index 28a180e..ee5c78e 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -97,6 +97,11 @@
       = VG_(threads)[tid].arch.vex.guest_R11;
    regs->misc.ARM.r7
       = VG_(threads)[tid].arch.vex.guest_R7;
+#  elif defined(VGA_arm64)
+   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
+   regs->r_sp = VG_(threads)[tid].arch.vex.guest_SP;
+   regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
+   regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
 #  elif defined(VGA_s390x)
    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
@@ -127,38 +132,6 @@
 #  endif
 }
 
-
-void VG_(set_syscall_return_shadows) ( ThreadId tid,
-                                       /* shadow vals for the result */
-                                       UWord s1res, UWord s2res,
-                                       /* shadow vals for the error val */
-                                       UWord s1err, UWord s2err )
-{
-#  if defined(VGP_x86_linux)
-   VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
-   VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
-#  elif defined(VGP_amd64_linux)
-   VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
-   VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
-#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
-   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
-   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
-#  elif defined(VGP_arm_linux)
-   VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
-   VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
-#  elif defined(VGO_darwin)
-   // GrP fixme darwin syscalls may return more values (2 registers plus error)
-#  elif defined(VGP_s390x_linux)
-   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
-   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
-#  elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
-   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
-   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
-#  else
-#    error "Unknown plat"
-#  endif
-}
-
 void
 VG_(get_shadow_regs_area) ( ThreadId tid, 
                             /*DST*/UChar* dst,
@@ -339,6 +312,9 @@
    (*f)(tid, "r29", vex->guest_r29);
    (*f)(tid, "r30", vex->guest_r30);
    (*f)(tid, "r31", vex->guest_r31);
+#elif defined(VGA_arm64)
+   (void)vex; /* temporarily avoid unused-var warning from gcc */
+   I_die_here;
 #else
 #  error Unknown arch
 #endif
@@ -1193,7 +1169,7 @@
 
 #elif defined(VGA_s390x)
 
-#include "libvex_s390x_common.h"
+#  include "libvex_s390x_common.h"
 
    {
      /* Instruction set detection code borrowed from ppc above. */
@@ -1440,6 +1416,18 @@
      return True;
    }
 
+#elif defined(VGA_arm64)
+   {
+     va = VexArchARM64;
+
+     /* So far there are no variants. */
+     vai.hwcaps = 0;
+
+     VG_(machine_get_cache_info)(&vai);
+
+     return True;
+   }
+
 #elif defined(VGA_mips32)
    {
      va = VexArchMIPS32;
@@ -1649,6 +1637,10 @@
       assume we always do. */
    return 16;
 
+#  elif defined(VGA_arm64)
+   /* ARM64 always has Neon, AFAICS. */
+   return 16;
+
 #  elif defined(VGA_mips32)
    /* The guest state implies 4, but that can't really be true, can
       it? */
@@ -1671,7 +1663,7 @@
       || defined(VGP_arm_linux)                           \
       || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
-      || defined(VGP_mips64_linux)
+      || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
    return f;
 #  elif defined(VGP_ppc64_linux)
    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 41934f1..82a5dde 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -2023,6 +2023,8 @@
       iters = 5;
 #     elif defined(VGP_arm_linux)
       iters = 5;
+#     elif defined(VGP_arm64_linux)
+      iters = 5;
 #     elif defined(VGP_s390x_linux)
       iters = 10;
 #     elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
@@ -2910,6 +2912,31 @@
     "\t.word "VG_STRINGIFY(VG_STACK_GUARD_SZB)"\n"
     "\t.word "VG_STRINGIFY(VG_STACK_ACTIVE_SZB)"\n"
 );
+#elif defined(VGP_arm64_linux)
+asm("\n"
+    "\t.text\n"
+    "\t.align 2\n"
+    "\t.type _start,#function\n"
+    "\t.global _start\n"
+    "_start:\n"
+    "\tadrp x0, vgPlain_interim_stack\n"
+    "\tadd  x0, x0, :lo12:vgPlain_interim_stack\n"
+    // The next 2 assume that VG_STACK_GUARD_SZB fits in 32 bits
+    "\tmov  x1, (("VG_STRINGIFY(VG_STACK_GUARD_SZB)") >> 0) & 0xFFFF\n"
+    "\tmovk x1, (("VG_STRINGIFY(VG_STACK_GUARD_SZB)") >> 16) & 0xFFFF,"
+                " lsl 16\n"
+    "\tadd  x0, x0, x1\n"
+    // The next 2 assume that VG_STACK_ACTIVE_SZB fits in 32 bits
+    "\tmov  x1, (("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)") >> 0) & 0xFFFF\n"
+    "\tmovk x1, (("VG_STRINGIFY(VG_STACK_ACTIVE_SZB)") >> 16) & 0xFFFF,"
+                " lsl 16\n"
+    "\tadd  x0, x0, x1\n"
+    "\tand  x0, x0, -16\n"
+    "\tmov  x1, sp\n"
+    "\tmov  sp, x0\n"
+    "\tmov  x0, x1\n"
+    "\tb _start_in_C_linux\n"
+);
 #elif defined(VGP_mips32_linux)
 asm("\n"
     "\t.type _gp_disp,@object\n"
@@ -3035,7 +3062,8 @@
 
    the_iicii.sp_at_startup = (Addr)pArgc;
 
-#  if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
+#  if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) \
+      || defined(VGP_arm64_linux)
    {
       /* ppc/ppc64 can be configured with different page sizes.
          Determine this early.  This is an ugly hack and really should
diff --git a/coregrind/m_options.c b/coregrind/m_options.c
index fcc8e21..4aaa52c 100644
--- a/coregrind/m_options.c
+++ b/coregrind/m_options.c
@@ -48,7 +48,8 @@
 Int    VG_(clo_error_exitcode) = 0;
 
 #if defined(VGPV_arm_linux_android) || defined(VGPV_x86_linux_android) \
-    || defined(VGPV_mips32_linux_android)
+    || defined(VGPV_mips32_linux_android) \
+    || defined(VGP_arm64_linux) // temporarily disabled on arm64-linux
 VgVgdb VG_(clo_vgdb)           = Vg_VgdbNo; // currently disabled on Android
 #else
 VgVgdb VG_(clo_vgdb)           = Vg_VgdbYes;
diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c
index 2832e5c..1453b0b 100644
--- a/coregrind/m_redir.c
+++ b/coregrind/m_redir.c
@@ -1318,7 +1318,27 @@
          complain_about_stripped_glibc_ldso
       );
    }
-   /* nothing so far */
+
+#  elif defined(VGP_arm64_linux)
+   /* If we're using memcheck, use these intercepts right from
+      the start, otherwise ld.so makes a lot of noise. */
+   if (0==VG_(strcmp)("Memcheck", VG_(details).name)) {
+   //   add_hardwired_spec(
+   //      "ld-linux.so.3", "strlen",
+   //      (Addr)&VG_(arm_linux_REDIR_FOR_strlen),
+   //      complain_about_stripped_glibc_ldso
+   //   );
+   //   //add_hardwired_spec(
+   //   //   "ld-linux.so.3", "index",
+   //   //   (Addr)&VG_(arm_linux_REDIR_FOR_index),
+   //   //   NULL 
+   //   //);
+   //   add_hardwired_spec(
+   //      "ld-linux.so.3", "memcpy",
+   //      (Addr)&VG_(arm_linux_REDIR_FOR_memcpy),
+   //      complain_about_stripped_glibc_ldso
+   //   );
+   }
 
 #  elif defined(VGP_x86_darwin)
    /* If we're using memcheck, use these intercepts right from
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index 87c7d0e..2a0bd39 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -790,12 +790,21 @@
    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
 #  endif
 
+#  if defined(VGA_arm64)
+   vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
+   vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
+   vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
+   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
+   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
+   vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
+#  endif
+
 #  if defined(VGA_s390x)
    /* no special requirements */
 #  endif
 
 #  if defined(VGA_mips32) || defined(VGA_mips64)
-  /* no special requirements */
+   /* no special requirements */
 #  endif
 }
 
@@ -1598,6 +1607,9 @@
 #elif defined(VGA_arm)
 #  define VG_CLREQ_ARGS       guest_R4
 #  define VG_CLREQ_RET        guest_R3
+#elif defined(VGA_arm64)
+#  define VG_CLREQ_ARGS       guest_X4
+#  define VG_CLREQ_RET        guest_X3
 #elif defined (VGA_s390x)
 #  define VG_CLREQ_ARGS       guest_r2
 #  define VG_CLREQ_RET        guest_r3
diff --git a/coregrind/m_sigframe/sigframe-arm64-linux.c b/coregrind/m_sigframe/sigframe-arm64-linux.c
new file mode 100644
index 0000000..876fef4
--- /dev/null
+++ b/coregrind/m_sigframe/sigframe-arm64-linux.c
@@ -0,0 +1,337 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Create/destroy signal delivery frames.                       ---*/
+/*---                                       sigframe-arm64-linux.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2013-2013 OpenWorks
+      info@open-works.net
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_arm64_linux)
+
+#include "pub_core_basics.h"
+#include "pub_core_vki.h"
+//ZZ #include "pub_core_vkiscnums.h"
+#include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
+#include "pub_core_threadstate.h"
+#include "pub_core_aspacemgr.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_machine.h"
+#include "pub_core_options.h"
+#include "pub_core_sigframe.h"
+#include "pub_core_signals.h"
+#include "pub_core_tooliface.h"
+#include "pub_core_trampoline.h"
+//ZZ #include "pub_core_transtab.h"      // VG_(discard_translations)
+
+
+/* This uses the hack of dumping the vex guest state along with both
+   shadows in the frame, and restoring it afterwards from there,
+   rather than pulling it out of the ucontext.  That means that signal
+   handlers which modify the ucontext and then return, expecting their
+   modifications to take effect, will have those modifications
+   ignored.  This could be fixed properly with an hour or so more
+   effort. */
+
+/* This also always does the 'has siginfo' behaviour whether or
+   not it is requested. */
+
+struct vg_sig_private {
+   UInt magicPI;
+   UInt sigNo_private;
+   VexGuestARM64State vex;
+   VexGuestARM64State vex_shadow1;
+   VexGuestARM64State vex_shadow2;
+};
+
+struct sigframe {
+   struct vki_ucontext uc;
+   unsigned long retcode[2];
+   struct vg_sig_private vp;
+};
+
+struct rt_sigframe {
+   vki_siginfo_t info;
+   struct sigframe sig;
+};
+
+static Bool extend ( ThreadState *tst, Addr addr, SizeT size )
+{
+   ThreadId        tid = tst->tid;
+   NSegment const* stackseg = NULL;
+
+   if (VG_(extend_stack)(addr, tst->client_stack_szB)) {
+      stackseg = VG_(am_find_nsegment)(addr);
+      if (0 && stackseg)
+         VG_(printf)("frame=%#lx seg=%#lx-%#lx\n",
+                     addr, stackseg->start, stackseg->end);
+   }
+
+   if (stackseg == NULL || !stackseg->hasR || !stackseg->hasW) {
+      VG_(message)(
+         Vg_UserMsg,
+         "Can't extend stack to %#lx during signal delivery for thread %d:",
+         addr, tid);
+      if (stackseg == NULL)
+         VG_(message)(Vg_UserMsg, "  no stack segment");
+      else
+         VG_(message)(Vg_UserMsg, "  too small or bad protection modes");
+
+      /* set SIGSEGV to default handler */
+      VG_(set_default_handler)(VKI_SIGSEGV);
+      VG_(synth_fault_mapping)(tid, addr);
+
+      /* The whole process should be about to die, since the default
+         action of SIGSEGV to kill the whole process. */
+      return False;
+   }
+
+   /* For tracking memory events, indicate the entire frame has been
+      allocated. */
+   VG_TRACK( new_mem_stack_signal, addr - VG_STACK_REDZONE_SZB,
+             size + VG_STACK_REDZONE_SZB, tid );
+
+   return True;
+}
+
+static void synth_ucontext( ThreadId tid, const vki_siginfo_t *si,
+                            UWord trapno, UWord err, const vki_sigset_t *set, 
+                            struct vki_ucontext *uc) 
+{
+
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   struct vki_sigcontext *sc = &uc->uc_mcontext;
+
+   VG_(memset)(uc, 0, sizeof(*uc));
+
+   uc->uc_flags = 0;
+   uc->uc_link = 0;
+   uc->uc_sigmask = *set;
+   uc->uc_stack = tst->altstack;
+
+#  define SC2(reg)  sc->regs[reg] = tst->arch.vex.guest_X##reg
+   SC2(0);   SC2(1);   SC2(2);   SC2(3);
+   SC2(4);   SC2(5);   SC2(6);   SC2(7);
+   SC2(8);   SC2(9);   SC2(10);  SC2(11);
+   SC2(12);  SC2(13);  SC2(14);  SC2(15);
+   SC2(16);  SC2(17);  SC2(18);  SC2(19);
+   SC2(20);  SC2(21);  SC2(22);  SC2(23);
+   SC2(24);  SC2(25);  SC2(26);  SC2(27);
+   SC2(28);  SC2(29);  SC2(30);
+#  undef SC2
+   sc->sp = tst->arch.vex.guest_SP;
+   sc->pc = tst->arch.vex.guest_PC;
+   sc->pstate = 0; /* slack .. could do better */
+
+   //sc->trap_no = trapno;
+   //sc->error_code = err;
+   sc->fault_address = (ULong)si->_sifields._sigfault._addr;
+}
+
+
+static void build_sigframe(ThreadState *tst,
+                           struct sigframe *frame,
+                           const vki_siginfo_t *siginfo,
+                           const struct vki_ucontext *siguc,
+                           void *handler, UInt flags,
+                           const vki_sigset_t *mask,
+                           void *restorer)
+{
+   UWord trapno;
+   UWord err;
+   Int   sigNo = siginfo->si_signo;
+   struct vg_sig_private *priv = &frame->vp;
+
+   VG_TRACK( pre_mem_write, Vg_CoreSignal, tst->tid, "signal handler frame",
+             (Addr)frame, offsetof(struct sigframe, vp));
+
+   if (siguc) {
+      trapno = 0; //siguc->uc_mcontext.trap_no;
+      err = 0; //siguc->uc_mcontext.error_code;
+   } else {
+      trapno = 0;
+      err = 0;
+   }
+
+   synth_ucontext(tst->tid, siginfo, trapno, err, mask, &frame->uc);
+
+   VG_TRACK( post_mem_write, Vg_CoreSignal, tst->tid,
+             (Addr)frame, offsetof(struct sigframe, vp));
+
+   priv->magicPI = 0x31415927;
+   priv->sigNo_private = sigNo;
+   priv->vex         = tst->arch.vex;
+   priv->vex_shadow1 = tst->arch.vex_shadow1;
+   priv->vex_shadow2 = tst->arch.vex_shadow2;
+}
+
+
+/* EXPORTED */
+void VG_(sigframe_create)( ThreadId tid, 
+                           Addr sp_top_of_frame,
+                           const vki_siginfo_t *siginfo,
+                           const struct vki_ucontext *siguc,
+                           void *handler, 
+                           UInt flags,
+                           const vki_sigset_t *mask,
+                           void *restorer )
+{
+   ThreadState *tst;
+   Addr sp    = sp_top_of_frame;
+   Int  sigNo = siginfo->si_signo;
+   UInt size;
+
+   tst = VG_(get_ThreadState)(tid);
+
+   size = sizeof(struct rt_sigframe);
+
+   sp -= size;
+   sp = VG_ROUNDDN(sp, 16);
+
+   if (!extend(tst, sp, size))
+      return; // Give up.  No idea if this is correct
+
+   struct rt_sigframe *rsf = (struct rt_sigframe *)sp;
+      
+   /* Track our writes to siginfo */
+   VG_TRACK( pre_mem_write, Vg_CoreSignal, tst->tid,  /* VVVVV */
+             "signal handler siginfo", (Addr)rsf, 
+             offsetof(struct rt_sigframe, sig));
+
+   VG_(memcpy)(&rsf->info, siginfo, sizeof(vki_siginfo_t));
+
+   if (sigNo == VKI_SIGILL && siginfo->si_code > 0) {
+      rsf->info._sifields._sigfault._addr
+        = (Addr*)(tst)->arch.vex.guest_PC;
+   }
+   VG_TRACK( post_mem_write, Vg_CoreSignal, tst->tid, /* ^^^^^ */
+         (Addr)rsf, offsetof(struct rt_sigframe, sig));
+
+   build_sigframe(tst, &rsf->sig, siginfo, siguc,
+                       handler, flags, mask, restorer);
+   tst->arch.vex.guest_X1 = (Addr)&rsf->info;
+   tst->arch.vex.guest_X2 = (Addr)&rsf->sig.uc;
+
+   VG_(set_SP)(tid, sp);
+   VG_TRACK( post_reg_write, Vg_CoreSignal, tid, VG_O_STACK_PTR,
+             sizeof(Addr));
+   tst->arch.vex.guest_X0 = sigNo; 
+
+   if (flags & VKI_SA_RESTORER)
+       tst->arch.vex.guest_X30 = (Addr)restorer; 
+   else
+       tst->arch.vex.guest_X30
+          = (Addr)&VG_(arm64_linux_SUBST_FOR_rt_sigreturn);
+
+   tst->arch.vex.guest_PC = (Addr)handler;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Destroying signal frames                             ---*/
+/*------------------------------------------------------------*/
+
+/* EXPORTED */
+void VG_(sigframe_destroy)( ThreadId tid, Bool isRT )
+{
+   ThreadState *tst;
+   struct vg_sig_private *priv;
+   Addr sp;
+   UInt frame_size;
+//ZZ    struct vki_sigcontext *mc;
+   Int sigNo;
+   Bool has_siginfo = isRT;
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst = VG_(get_ThreadState)(tid);
+   sp = tst->arch.vex.guest_SP;
+
+//ZZ    if (has_siginfo) {
+      struct rt_sigframe *frame = (struct rt_sigframe *)sp;
+      frame_size = sizeof(*frame);
+      //mc = &frame->sig.uc.uc_mcontext;
+      priv = &frame->sig.vp;
+      vg_assert(priv->magicPI == 0x31415927);
+      tst->sig_mask = frame->sig.uc.uc_sigmask;
+//ZZ    } else {
+//ZZ       struct sigframe *frame = (struct sigframe *)sp;
+//ZZ       frame_size = sizeof(*frame);
+//ZZ       mc = &frame->uc.uc_mcontext;
+//ZZ       priv = &frame->vp;
+//ZZ       vg_assert(priv->magicPI == 0x31415927);
+//ZZ       tst->sig_mask = frame->uc.uc_sigmask;
+//ZZ       //VG_(printf)("Setting signmask to %08x%08x\n",
+//ZZ       //            tst->sig_mask[0],tst->sig_mask[1]);
+//ZZ    }
+   tst->tmp_sig_mask = tst->sig_mask;
+
+   sigNo = priv->sigNo_private;
+
+//ZZ     //XXX: restore regs
+//ZZ #  define REST(reg,REG)  tst->arch.vex.guest_##REG = mc->arm_##reg;
+//ZZ    REST(r0,R0);
+//ZZ    REST(r1,R1);
+//ZZ    REST(r2,R2);
+//ZZ    REST(r3,R3);
+//ZZ    REST(r4,R4);
+//ZZ    REST(r5,R5);
+//ZZ    REST(r6,R6);
+//ZZ    REST(r7,R7);
+//ZZ    REST(r8,R8);
+//ZZ    REST(r9,R9);
+//ZZ    REST(r10,R10);
+//ZZ    REST(fp,R11);
+//ZZ    REST(ip,R12);
+//ZZ    REST(sp,R13);
+//ZZ    REST(lr,R14);
+//ZZ    REST(pc,R15T);
+//ZZ #  undef REST
+
+   /* Uh, the next line makes all the REST() above pointless. */
+   tst->arch.vex         = priv->vex;
+
+   tst->arch.vex_shadow1 = priv->vex_shadow1;
+   tst->arch.vex_shadow2 = priv->vex_shadow2;
+
+   VG_TRACK( die_mem_stack_signal, sp - VG_STACK_REDZONE_SZB,
+             frame_size + VG_STACK_REDZONE_SZB );
+             
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg,
+                   "vg_pop_signal_frame (thread %d): "
+                   "isRT=%d valid magic; PC=%#llx\n",
+                   tid, has_siginfo, tst->arch.vex.guest_PC);
+
+   /* tell the tools */
+   VG_TRACK( post_deliver_signal, tid, sigNo );
+}
+
+#endif // defined(VGP_arm_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                   sigframe-arm64-linux.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c
index f4f04eb..40bd7ea 100644
--- a/coregrind/m_signals.c
+++ b/coregrind/m_signals.c
@@ -386,6 +386,19 @@
         (srP)->misc.ARM.r7  = (uc)->uc_mcontext.arm_r7; \
       }
 
+#elif defined(VGP_arm64_linux)
+#  define VG_UCONTEXT_INSTR_PTR(uc)       ((UWord)((uc)->uc_mcontext.pc))
+#  define VG_UCONTEXT_STACK_PTR(uc)       ((UWord)((uc)->uc_mcontext.sp))
+#  define VG_UCONTEXT_SYSCALL_SYSRES(uc)                        \
+      /* Convert the value in uc_mcontext.regs[0] into a SysRes. */ \
+      VG_(mk_SysRes_arm64_linux)( (uc)->uc_mcontext.regs[0] )
+#  define VG_UCONTEXT_TO_UnwindStartRegs(srP, uc)           \
+      { (srP)->r_pc = (uc)->uc_mcontext.pc;                 \
+        (srP)->r_sp = (uc)->uc_mcontext.sp;                 \
+        (srP)->misc.ARM64.x29 = (uc)->uc_mcontext.regs[29]; \
+        (srP)->misc.ARM64.x30 = (uc)->uc_mcontext.regs[30]; \
+      }
+
 #elif defined(VGP_x86_darwin)
 
    static inline Addr VG_UCONTEXT_INSTR_PTR( void* ucV ) {
@@ -862,6 +875,15 @@
    "    svc  0x00000000\n" \
    ".previous\n"
 
+#elif defined(VGP_arm64_linux)
+#  define _MY_SIGRETURN(name) \
+   ".text\n" \
+   ".globl my_sigreturn\n" \
+   "my_sigreturn:\n\t" \
+   "    mov  x8, #" #name "\n\t" \
+   "    svc  0x0\n" \
+   ".previous\n"
+
 #elif defined(VGP_x86_darwin)
 #  define _MY_SIGRETURN(name) \
    ".text\n" \
@@ -980,8 +1002,7 @@
 #        if !defined(VGP_ppc32_linux) && \
             !defined(VGP_x86_darwin) && !defined(VGP_amd64_darwin) && \
             !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux)
-         vg_assert(ksa_old.sa_restorer 
-                   == my_sigreturn);
+         vg_assert(ksa_old.sa_restorer == my_sigreturn);
 #        endif
          VG_(sigaddset)( &ksa_old.sa_mask, VKI_SIGKILL );
          VG_(sigaddset)( &ksa_old.sa_mask, VKI_SIGSTOP );
diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c
index 53022d3..f998f4c 100644
--- a/coregrind/m_stacktrace.c
+++ b/coregrind/m_stacktrace.c
@@ -1034,6 +1034,24 @@
 
 #endif
 
+/* ------------------------ arm64 ------------------------- */
+
+#if defined(VGP_arm64_linux)
+
+UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
+                               /*OUT*/Addr* ips, UInt max_n_ips,
+                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
+                               UnwindStartRegs* startRegs,
+                               Addr fp_max_orig )
+{
+   ips[0] = startRegs->r_pc;
+   if (sps) sps[0] = startRegs->r_sp;
+   if (fps) fps[0] = startRegs->misc.ARM64.x29;
+   return 1;
+}
+
+#endif
+
 /* ------------------------ s390x ------------------------- */
 
 #if defined(VGP_s390x_linux)
diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c
index 6f6c4cb..161be15 100644
--- a/coregrind/m_syscall.c
+++ b/coregrind/m_syscall.c
@@ -128,6 +128,18 @@
    return res;
 }
 
+SysRes VG_(mk_SysRes_arm64_linux) ( Long val ) {
+   SysRes res;
+   res._valEx   = 0; /* unused except on mips-linux */
+   res._isError = val >= -4095 && val <= -1;
+   if (res._isError) {
+      res._val = (ULong)(-val);
+   } else {
+      res._val = (ULong)val;
+   }
+   return res;
+}
+
 /* MIPS uses a3 != 0 to flag an error */
 SysRes VG_(mk_SysRes_mips32_linux) ( UWord v0, UWord v1, UWord a3 ) {
    SysRes res;
@@ -437,6 +449,34 @@
 ".previous\n"
 );
 
+#elif defined(VGP_arm64_linux)
+/* I think the conventions are:
+   args  in r0 r1 r2 r3 r4 r5
+   sysno in r8
+   return value in r0, w/ same conventions as x86-linux, viz r0 in
+   -4096 .. -1 is an error value.  All other values are success
+   values.
+
+   r0 to r5 remain unchanged, but syscall_no is in r6 and needs 
+   to be moved to r8 (??)
+*/
+extern UWord do_syscall_WRK (
+          UWord a1, UWord a2, UWord a3,
+          UWord a4, UWord a5, UWord a6,
+          UWord syscall_no
+       );
+asm(
+".text\n"
+".globl do_syscall_WRK\n"
+"do_syscall_WRK:\n"
+"        mov x8, x6\n"
+"        mov x6, 0\n"
+"        mov x7, 0\n"
+"        svc 0\n"
+"        ret\n"
+".previous\n"
+);
+
 #elif defined(VGP_x86_darwin)
 
 /* Incoming args (syscall number + up to 8 args) come in on the stack
@@ -696,6 +736,10 @@
    UWord val = do_syscall_WRK(a1,a2,a3,a4,a5,a6,sysno);
    return VG_(mk_SysRes_arm_linux)( val );
 
+#  elif defined(VGP_arm64_linux)
+   UWord val = do_syscall_WRK(a1,a2,a3,a4,a5,a6,sysno);
+   return VG_(mk_SysRes_arm64_linux)( val );
+
 #  elif defined(VGP_x86_darwin)
    UInt  wLO = 0, wHI = 0, err = 0;
    ULong u64;
diff --git a/coregrind/m_syswrap/priv_types_n_macros.h b/coregrind/m_syswrap/priv_types_n_macros.h
index e2edf95..325c53a 100644
--- a/coregrind/m_syswrap/priv_types_n_macros.h
+++ b/coregrind/m_syswrap/priv_types_n_macros.h
@@ -92,7 +92,7 @@
 #     if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
          || defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) \
          || defined(VGP_arm_linux) || defined(VGP_s390x_linux) \
-         || defined(VGP_mips64_linux)
+         || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
       Int o_arg1;
       Int o_arg2;
       Int o_arg3;
diff --git a/coregrind/m_syswrap/syscall-arm-linux.S b/coregrind/m_syswrap/syscall-arm-linux.S
index 1d1b266..3839299 100644
--- a/coregrind/m_syswrap/syscall-arm-linux.S
+++ b/coregrind/m_syswrap/syscall-arm-linux.S
@@ -112,7 +112,7 @@
    ldr r3, [sp, #32] /* nsigwords */
    svc 0x00000000
 
-  cmp r0, #0
+   cmp r0, #0
    blt 7f
    add sp, sp, #4 /* r0 contains return value */
 
diff --git a/coregrind/m_syswrap/syscall-arm64-linux.S b/coregrind/m_syswrap/syscall-arm64-linux.S
new file mode 100644
index 0000000..3fbf219
--- /dev/null
+++ b/coregrind/m_syswrap/syscall-arm64-linux.S
@@ -0,0 +1,180 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Support for doing system calls.        syscall-arm64-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+  This file is part of Valgrind, a dynamic binary instrumentation
+  framework.
+
+  Copyright (C) 2013-2013 OpenWorks
+     info@open-works.net
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_arm64_linux)
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_vkiscnums_asm.h"
+#include "libvex_guest_offsets.h"
+      
+
+/*----------------------------------------------------------------*/
+/*
+        Perform a syscall for the client.  This will run a syscall
+        with the client's specific per-thread signal mask.
+
+        The structure of this function is such that, if the syscall is
+        interrupted by a signal, we can determine exactly what
+        execution state we were in with respect to the execution of
+        the syscall by examining the value of IP in the signal
+        handler.  This means that we can always do the appropriate
+        thing to precisely emulate the kernel's signal/syscall
+        interactions.
+
+        The syscall number is taken from the argument, even though it
+        should also be in guest_state->guest_X8.  The syscall result
+	is written back to guest_state->guest_X0 on completion.
+
+        Returns 0 if the syscall was successfully called (even if the
+        syscall itself failed), or a nonzero error code in the lowest
+        8 bits if one of the sigprocmasks failed (there's no way to
+        determine which one failed).  And there's no obvious way to
+        recover from that either, but nevertheless we want to know.
+
+        VG_(fixup_guest_state_after_syscall_interrupted) does the
+        thread state fixup in the case where we were interrupted by a
+        signal.
+
+        Prototype:
+
+   UWord ML_(do_syscall_for_client_WRK)(
+              Int syscallno,                 // x0
+              void* guest_state,             // x1
+              const vki_sigset_t *sysmask,   // x2
+              const vki_sigset_t *postmask,  // x3
+              Int nsigwords)                 // x4
+*/
+/* from vki-arm64-linux.h */
+#define VKI_SIG_SETMASK 2
+
+.globl ML_(do_syscall_for_client_WRK)
+ML_(do_syscall_for_client_WRK):
+
+   /* Stash callee-saves and our args on the stack */
+   stp  x29, x30, [sp, #-16]!
+   stp  x27, x28, [sp, #-16]!
+   stp  x25, x26, [sp, #-16]!
+   stp  x23, x24, [sp, #-16]!
+   stp  x21, x22, [sp, #-16]!
+   stp  x19, x20, [sp, #-16]!
+   stp  x4,  x5,  [sp, #-16]!
+   stp  x2,  x3,  [sp, #-16]!
+   stp  x0,  x1,  [sp, #-16]!
+
+1:
+
+   mov x8, #__NR_rt_sigprocmask
+   mov x0, #VKI_SIG_SETMASK 
+   mov x1, x2 /* sysmask */
+   mov x2, x3 /* postmask */
+   mov x3, x4 /* nsigwords */
+   svc 0x00000000
+
+
+   ldr x5, [sp, #8] /* saved x1 == guest_state */
+
+   ldr x8, [sp, #0] /* saved x0 == syscall# */
+   ldr x0, [x5, #OFFSET_arm64_X0]
+   ldr x1, [x5, #OFFSET_arm64_X1]
+   ldr x2, [x5, #OFFSET_arm64_X2]
+   ldr x3, [x5, #OFFSET_arm64_X3]
+   ldr x4, [x5, #OFFSET_arm64_X4]
+   ldr x5, [x5, #OFFSET_arm64_X5]
+
+2: svc 0x00000000
+3:
+   ldr x5, [sp, #8] /* saved x1 == guest_state */
+   str x0, [x5, #OFFSET_arm64_X0]
+
+4:
+   mov x8, #__NR_rt_sigprocmask
+   mov x0, #VKI_SIG_SETMASK 
+   ldr x1, [sp, #24] /* saved x3 == postmask */
+   mov x2, #0
+   ldr x3, [sp, #32] /* saved x4 == nsigwords */
+   svc 0x00000000
+
+   cmp x0, #0
+   blt 7f
+
+5: /* Success: return zero */
+   mov  x0, #0
+   ldp  xzr, x1,  [sp], #16
+   ldp  x2,  x3,  [sp], #16
+   ldp  x4,  x5,  [sp], #16
+   ldp  x19, x20, [sp], #16
+   ldp  x21, x22, [sp], #16
+   ldp  x23, x24, [sp], #16
+   ldp  x25, x26, [sp], #16
+   ldp  x27, x28, [sp], #16
+   ldp  x29, x30, [sp], #16
+   ret
+        
+7: /* Failure: return 0x8000 | error code */
+   orr  x0, x0, #0x8000
+   ldp  xzr, x1,  [sp], #16
+   ldp  x2,  x3,  [sp], #16
+   ldp  x4,  x5,  [sp], #16
+   ldp  x19, x20, [sp], #16
+   ldp  x21, x22, [sp], #16
+   ldp  x23, x24, [sp], #16
+   ldp  x25, x26, [sp], #16
+   ldp  x27, x28, [sp], #16
+   ldp  x29, x30, [sp], #16
+   ret
+
+
+
+.section .rodata
+/* export the ranges so that
+   VG_(fixup_guest_state_after_syscall_interrupted) can do the
+   right thing */
+
+.globl ML_(blksys_setup)
+.globl ML_(blksys_restart)
+.globl ML_(blksys_complete)
+.globl ML_(blksys_committed)
+.globl ML_(blksys_finished)
+ML_(blksys_setup):      .quad 1b
+ML_(blksys_restart):    .quad 2b
+ML_(blksys_complete):   .quad 3b
+ML_(blksys_committed):  .quad 4b
+ML_(blksys_finished):   .quad 5b
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",%progbits
+
+.previous
+
+#endif // defined(VGP_arm_linux)
+   
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c
new file mode 100644
index 0000000..ce948f7
--- /dev/null
+++ b/coregrind/m_syswrap/syswrap-arm64-linux.c
@@ -0,0 +1,1341 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Platform-specific syscalls stuff.    syswrap-arm64-linux.c -----*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2013-2013 OpenWorks
+      info@open-works.net
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#if defined(VGP_arm64_linux)
+
+#include "pub_core_basics.h"
+#include "pub_core_vki.h"
+#include "pub_core_vkiscnums.h"
+#include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
+#include "pub_core_threadstate.h"
+#include "pub_core_aspacemgr.h"
+//ZZ #include "pub_core_debuglog.h"
+//ZZ #include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+//ZZ #include "pub_core_libcproc.h"
+//ZZ #include "pub_core_libcsignal.h"
+#include "pub_core_options.h"
+//ZZ #include "pub_core_scheduler.h"
+#include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
+//ZZ #include "pub_core_signals.h"
+#include "pub_core_syscall.h"
+#include "pub_core_syswrap.h"
+#include "pub_core_tooliface.h"
+//ZZ #include "pub_core_stacks.h"        // VG_(register_stack)
+//ZZ #include "pub_core_transtab.h"      // VG_(discard_translations)
+
+#include "priv_types_n_macros.h"
+#include "priv_syswrap-generic.h"   /* for decls of generic wrappers */
+#include "priv_syswrap-linux.h"     /* for decls of linux-ish wrappers */
+//ZZ #include "priv_syswrap-main.h"
+
+
+/* ---------------------------------------------------------------------
+   clone() handling
+   ------------------------------------------------------------------ */
+
+/* Call f(arg1), but first switch stacks, using 'stack' as the new
+   stack, and use 'retaddr' as f's return-to address.  Also, clear all
+   the integer registers before entering f.*/
+__attribute__((noreturn))
+void ML_(call_on_new_stack_0_1) ( Addr stack,
+                                  Addr retaddr,
+                                  void (*f)(Word),
+                                  Word arg1 );
+//    r0 = stack
+//    r1 = retaddr
+//    r2 = f
+//    r3 = arg1
+asm(
+".text\n"
+".globl vgModuleLocal_call_on_new_stack_0_1\n"
+"vgModuleLocal_call_on_new_stack_0_1:\n"
+"   mov    sp, x0\n\t" /* Stack pointer */
+"   mov    x30, x1\n\t" /* Return address (x30 is LR) */
+"   mov    x0, x3\n\t" /* First argument */
+"   mov    x9, x2\n\t" /* 'f': x9 won't be zeroed at start of f.  Oh well. */
+"   mov    x1, #0\n\t" /* Clear our GPRs */
+"   mov    x2, #0\n\t"
+"   mov    x3, #0\n\t"
+"   mov    x4, #0\n\t"
+"   mov    x5, #0\n\t"
+"   mov    x6, #0\n\t"
+"   mov    x7, #0\n\t"
+"   mov    x8, #0\n\t"
+/* don't zero out x9 */
+"   mov    x10, #0\n\t"
+"   mov    x11, #0\n\t"
+"   mov    x12, #0\n\t"
+"   mov    x13, #0\n\t"
+"   mov    x14, #0\n\t"
+"   mov    x15, #0\n\t"
+"   mov    x16, #0\n\t"
+"   mov    x17, #0\n\t"
+"   mov    x18, #0\n\t"
+"   mov    x19, #0\n\t"
+"   mov    x20, #0\n\t"
+"   mov    x21, #0\n\t"
+"   mov    x22, #0\n\t"
+"   mov    x23, #0\n\t"
+"   mov    x24, #0\n\t"
+"   mov    x25, #0\n\t"
+"   mov    x26, #0\n\t"
+"   mov    x27, #0\n\t"
+"   mov    x28, #0\n\t"
+"   mov    x29, sp\n\t" /* FP = SP, in the absence of better suggestions */
+"   br     x9\n\t"
+".previous\n"
+);
+
+
+//ZZ #define __NR_CLONE        VG_STRINGIFY(__NR_clone)
+//ZZ #define __NR_EXIT         VG_STRINGIFY(__NR_exit)
+//ZZ 
+//ZZ extern
+//ZZ ULong do_syscall_clone_arm_linux   ( Word (*fn)(void *), 
+//ZZ                                      void* stack, 
+//ZZ                                      Int   flags, 
+//ZZ                                      void* arg,
+//ZZ                                      Int*  child_tid,
+//ZZ                                      Int*  parent_tid,
+//ZZ                                      void* tls );
+//ZZ asm(
+//ZZ ".text\n"
+//ZZ ".globl do_syscall_clone_arm_linux\n"
+//ZZ "do_syscall_clone_arm_linux:\n"
+//ZZ 
+//ZZ /*Setup child stack */
+//ZZ "   str     r0, [r1, #-4]!\n"
+//ZZ "   str     r3, [r1, #-4]!\n"
+//ZZ "   push {r4,r7}\n" 
+//ZZ "   mov r0, r2\n" /* arg1: flags */
+//ZZ /* r1 (arg2) is already our child's stack */
+//ZZ "   ldr r2, [sp, #12]\n" // parent tid
+//ZZ "   ldr r3, [sp, #16]\n" // tls
+//ZZ "   ldr r4, [sp, #8]\n" // Child tid
+//ZZ "   mov r7, #"__NR_CLONE"\n"
+//ZZ "   svc 0x00000000\n"
+//ZZ "   cmp r0, #0\n"
+//ZZ "   beq 1f\n"
+//ZZ 
+//ZZ /* Parent */
+//ZZ "   pop {r4,r7}\n"
+//ZZ "   bx lr\n"
+//ZZ 
+//ZZ "1:\n" /*child*/
+//ZZ "   mov     lr, pc\n"
+//ZZ "   pop     {r0,pc}\n"
+//ZZ /* Retval from child is already in r0 */
+//ZZ "   mov r7, #"__NR_EXIT"\n"
+//ZZ "   svc 0x00000000\n"
+//ZZ /* Urh.. why did exit return? */
+//ZZ "   .long 0\n"
+//ZZ "   .previous\n"
+//ZZ );
+//ZZ 
+//ZZ #undef __NR_CLONE
+//ZZ #undef __NR_EXIT
+//ZZ 
+//ZZ // forward declarations
+//ZZ static void setup_child ( ThreadArchState*, ThreadArchState* );
+//ZZ static void assign_guest_tls(ThreadId ctid, Addr tlsptr);
+//ZZ static SysRes sys_set_tls ( ThreadId tid, Addr tlsptr );
+//ZZ             
+//ZZ /* 
+//ZZ    When a client clones, we need to keep track of the new thread.  This means:
+//ZZ    1. allocate a ThreadId+ThreadState+stack for the the thread
+//ZZ 
+//ZZ    2. initialize the thread's new VCPU state
+//ZZ 
+//ZZ    3. create the thread using the same args as the client requested,
+//ZZ    but using the scheduler entrypoint for IP, and a separate stack
+//ZZ    for SP.
+//ZZ  */
+//ZZ static SysRes do_clone ( ThreadId ptid, 
+//ZZ                          UInt flags, Addr sp, 
+//ZZ                          Int *parent_tidptr, 
+//ZZ                          Int *child_tidptr, 
+//ZZ                          Addr child_tls)
+//ZZ {
+//ZZ    const Bool debug = False;
+//ZZ 
+//ZZ    ThreadId ctid = VG_(alloc_ThreadState)();
+//ZZ    ThreadState* ptst = VG_(get_ThreadState)(ptid);
+//ZZ    ThreadState* ctst = VG_(get_ThreadState)(ctid);
+//ZZ    UInt r0;
+//ZZ    UWord *stack;
+//ZZ    NSegment const* seg;
+//ZZ    SysRes res;
+//ZZ    vki_sigset_t blockall, savedmask;
+//ZZ 
+//ZZ    VG_(sigfillset)(&blockall);
+//ZZ 
+//ZZ    vg_assert(VG_(is_running_thread)(ptid));
+//ZZ    vg_assert(VG_(is_valid_tid)(ctid));
+//ZZ 
+//ZZ    stack = (UWord*)ML_(allocstack)(ctid);
+//ZZ 
+//ZZ    if(stack == NULL) {
+//ZZ       res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
+//ZZ       goto out;
+//ZZ    }
+//ZZ 
+//ZZ    setup_child( &ctst->arch, &ptst->arch );
+//ZZ 
+//ZZ    ctst->arch.vex.guest_R0 = 0;
+//ZZ    if(sp != 0)
+//ZZ       ctst->arch.vex.guest_R13 = sp;
+//ZZ 
+//ZZ    ctst->os_state.parent = ptid;
+//ZZ 
+//ZZ    ctst->sig_mask = ptst->sig_mask;
+//ZZ    ctst->tmp_sig_mask = ptst->sig_mask;
+//ZZ 
+//ZZ    /* Start the child with its threadgroup being the same as the
+//ZZ       parent's.  This is so that any exit_group calls that happen
+//ZZ       after the child is created but before it sets its
+//ZZ       os_state.threadgroup field for real (in thread_wrapper in
+//ZZ       syswrap-linux.c), really kill the new thread.  a.k.a this avoids
+//ZZ       a race condition in which the thread is unkillable (via
+//ZZ       exit_group) because its threadgroup is not set.  The race window
+//ZZ       is probably only a few hundred or a few thousand cycles long.
+//ZZ       See #226116. */
+//ZZ    ctst->os_state.threadgroup = ptst->os_state.threadgroup;
+//ZZ 
+//ZZ    seg = VG_(am_find_nsegment)((Addr)sp);
+//ZZ    if (seg && seg->kind != SkResvn) {
+//ZZ       ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(sp);
+//ZZ       ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;
+//ZZ    
+//ZZ       VG_(register_stack)(seg->start, ctst->client_stack_highest_word);
+//ZZ    
+//ZZ       if (debug)
+//ZZ          VG_(printf)("tid %d: guessed client stack range %#lx-%#lx\n",
+//ZZ          ctid, seg->start, VG_PGROUNDUP(sp));
+//ZZ    } else {
+//ZZ       VG_(message)(Vg_UserMsg, "!? New thread %d starts with sp+%#lx) unmapped\n", ctid, sp);
+//ZZ       ctst->client_stack_szB  = 0;
+//ZZ    }
+//ZZ 
+//ZZ    vg_assert(VG_(owns_BigLock_LL)(ptid));
+//ZZ    VG_TRACK ( pre_thread_ll_create, ptid, ctid );
+//ZZ 
+//ZZ    if (flags & VKI_CLONE_SETTLS) {
+//ZZ       /* Just assign the tls pointer in the guest TPIDRURO. */
+//ZZ       assign_guest_tls(ctid, child_tls);
+//ZZ    }
+//ZZ     
+//ZZ    flags &= ~VKI_CLONE_SETTLS;
+//ZZ 
+//ZZ    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
+//ZZ 
+//ZZ    r0 = do_syscall_clone_arm_linux(
+//ZZ       ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
+//ZZ       child_tidptr, parent_tidptr, NULL
+//ZZ    );
+//ZZ    //VG_(printf)("AFTER SYSCALL, %x and %x  CHILD: %d PARENT: %d\n",child_tidptr, parent_tidptr,*child_tidptr,*parent_tidptr);
+//ZZ     
+//ZZ    res = VG_(mk_SysRes_arm_linux)( r0 );
+//ZZ 
+//ZZ    VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
+//ZZ 
+//ZZ out:
+//ZZ    if (sr_isError(res)) {
+//ZZ       VG_(cleanup_thread)(&ctst->arch);
+//ZZ       ctst->status = VgTs_Empty;
+//ZZ       VG_TRACK( pre_thread_ll_exit, ctid );
+//ZZ    }
+//ZZ 
+//ZZ    return res;
+//ZZ }
+
+
+
+/* ---------------------------------------------------------------------
+   More thread stuff
+   ------------------------------------------------------------------ */
+
+// ARM64 doesn't have any architecture specific thread stuff that
+// needs to be cleaned up
+void VG_(cleanup_thread) ( ThreadArchState* arch )
+{
+}  
+
+//ZZ void setup_child ( /*OUT*/ ThreadArchState *child,
+//ZZ                    /*IN*/  ThreadArchState *parent )
+//ZZ {
+//ZZ    child->vex = parent->vex;
+//ZZ    child->vex_shadow1 = parent->vex_shadow1;
+//ZZ    child->vex_shadow2 = parent->vex_shadow2;
+//ZZ }
+//ZZ 
+//ZZ static void assign_guest_tls(ThreadId tid, Addr tlsptr)
+//ZZ {
+//ZZ    VG_(threads)[tid].arch.vex.guest_TPIDRURO = tlsptr;
+//ZZ }
+//ZZ 
+//ZZ /* Assigns tlsptr to the guest TPIDRURO.
+//ZZ    If needed for the specific hardware, really executes
+//ZZ    the set_tls syscall.
+//ZZ */
+//ZZ static SysRes sys_set_tls ( ThreadId tid, Addr tlsptr )
+//ZZ {
+//ZZ    assign_guest_tls(tid, tlsptr);
+//ZZ #if defined(ANDROID_HARDWARE_emulator)
+//ZZ    /* Android emulator does not provide an hw tls register.
+//ZZ       So, the tls register is emulated by the kernel.
+//ZZ       This emulated value is set by the __NR_ARM_set_tls syscall.
+//ZZ       The emulated value must be read by the kernel helper function
+//ZZ       located at 0xffff0fe0.
+//ZZ       
+//ZZ       The emulated tlsptr is located at 0xffff0ff0
+//ZZ       (so slightly after the kernel helper function).
+//ZZ       Note that applications are not supposed to read this directly.
+//ZZ       
+//ZZ       For compatibility : if there is a hw tls register, the kernel
+//ZZ       will put at 0xffff0fe0 the instructions to read it, so
+//ZZ       as to have old applications calling the kernel helper
+//ZZ       working properly.
+//ZZ 
+//ZZ       For having emulated guest TLS working correctly with
+//ZZ       Valgrind, it is needed to execute the syscall to set
+//ZZ       the emulated TLS value in addition to the assignment
+//ZZ       of TPIDRURO.
+//ZZ 
+//ZZ       Note: the below means that if we need thread local storage
+//ZZ       for Valgrind host, then there will be a conflict between
+//ZZ       the need of the guest tls and of the host tls.
+//ZZ       If all the guest code would cleanly call 0xffff0fe0,
+//ZZ       then we might maybe intercept this. However, at least
+//ZZ       __libc_preinit reads directly 0xffff0ff0.
+//ZZ    */
+//ZZ    /* ??? might call the below if auxv->u.a_val & VKI_HWCAP_TLS ???
+//ZZ       Unclear if real hardware having tls hw register sets
+//ZZ       VKI_HWCAP_TLS. */
+//ZZ    return VG_(do_syscall1) (__NR_ARM_set_tls, tlsptr);
+//ZZ #else
+//ZZ    return VG_(mk_SysRes_Success)( 0 );
+//ZZ #endif
+//ZZ }
+
+/* ---------------------------------------------------------------------
+   PRE/POST wrappers for arm/Linux-specific syscalls
+   ------------------------------------------------------------------ */
+
+#define PRE(name)       DEFN_PRE_TEMPLATE(arm64_linux, name)
+#define POST(name)      DEFN_POST_TEMPLATE(arm64_linux, name)
+
+//ZZ /* Add prototypes for the wrappers declared here, so that gcc doesn't
+//ZZ    harass us for not having prototypes.  Really this is a kludge --
+//ZZ    the right thing to do is to make these wrappers 'static' since they
+//ZZ    aren't visible outside this file, but that requires even more macro
+//ZZ    magic. */
+//ZZ 
+DECL_TEMPLATE(arm64_linux, sys_mmap);
+//ZZ DECL_TEMPLATE(arm_linux, sys_stat64);
+//ZZ DECL_TEMPLATE(arm_linux, sys_lstat64);
+//ZZ DECL_TEMPLATE(arm_linux, sys_fstatat64);
+//ZZ DECL_TEMPLATE(arm_linux, sys_fstat64);
+DECL_TEMPLATE(arm64_linux, sys_clone);
+//ZZ DECL_TEMPLATE(arm_linux, sys_sigreturn);
+DECL_TEMPLATE(arm64_linux, sys_rt_sigreturn);
+//ZZ DECL_TEMPLATE(arm_linux, sys_sigsuspend);
+//ZZ DECL_TEMPLATE(arm_linux, sys_set_tls);
+//ZZ DECL_TEMPLATE(arm_linux, sys_cacheflush);
+//ZZ DECL_TEMPLATE(arm_linux, sys_ptrace);
+//ZZ 
+//ZZ PRE(sys_mmap2)
+//ZZ {
+//ZZ    SysRes r;
+//ZZ 
+//ZZ    // Exactly like old_mmap() except:
+//ZZ    //  - all 6 args are passed in regs, rather than in a memory-block.
+//ZZ    //  - the file offset is specified in pagesize units rather than bytes,
+//ZZ    //    so that it can be used for files bigger than 2^32 bytes.
+//ZZ    // pagesize or 4K-size units in offset?  For ppc32/64-linux, this is
+//ZZ    // 4K-sized.  Assert that the page size is 4K here for safety.
+//ZZ    vg_assert(VKI_PAGE_SIZE == 4096);
+//ZZ    PRINT("sys_mmap2 ( %#lx, %llu, %ld, %ld, %ld, %ld )",
+//ZZ          ARG1, (ULong)ARG2, ARG3, ARG4, ARG5, ARG6 );
+//ZZ    PRE_REG_READ6(long, "mmap2",
+//ZZ                  unsigned long, start, unsigned long, length,
+//ZZ                  unsigned long, prot,  unsigned long, flags,
+//ZZ                  unsigned long, fd,    unsigned long, offset);
+//ZZ 
+//ZZ    r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5, 
+//ZZ                                        4096 * (Off64T)ARG6 );
+//ZZ    SET_STATUS_from_SysRes(r);
+//ZZ }
+
+// ARM64 FIXME is this correct?
+PRE(sys_mmap)
+{
+   SysRes r;
+
+   PRINT("sys_mmap ( %#lx, %llu, %ld, %ld, %d, %ld )",
+         ARG1, (ULong)ARG2, ARG3, ARG4, (Int)ARG5, ARG6 );
+   PRE_REG_READ6(long, "mmap",
+                 unsigned long, start, unsigned long, length,
+                 unsigned long, prot,  unsigned long, flags,
+                 unsigned long, fd,    unsigned long, offset);
+
+   r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 );
+   SET_STATUS_from_SysRes(r);
+}
+
+//ZZ 
+//ZZ // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
+//ZZ // applicable to every architecture -- I think only to 32-bit archs.
+//ZZ // We're going to need something like linux/core_os32.h for such
+//ZZ // things, eventually, I think.  --njn
+//ZZ PRE(sys_lstat64)
+//ZZ {
+//ZZ    PRINT("sys_lstat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
+//ZZ    PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
+//ZZ    PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
+//ZZ    PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
+//ZZ }
+//ZZ 
+//ZZ POST(sys_lstat64)
+//ZZ {
+//ZZ    vg_assert(SUCCESS);
+//ZZ    if (RES == 0) {
+//ZZ       POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
+//ZZ    }
+//ZZ }
+//ZZ 
+//ZZ PRE(sys_stat64)
+//ZZ {
+//ZZ    PRINT("sys_stat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
+//ZZ    PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
+//ZZ    PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
+//ZZ    PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
+//ZZ }
+//ZZ 
+//ZZ POST(sys_stat64)
+//ZZ {
+//ZZ    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
+//ZZ }
+//ZZ 
+//ZZ PRE(sys_fstatat64)
+//ZZ {
+//ZZ    PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx )",ARG1,ARG2,(char*)ARG2,ARG3);
+//ZZ    PRE_REG_READ3(long, "fstatat64",
+//ZZ                  int, dfd, char *, file_name, struct stat64 *, buf);
+//ZZ    PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
+//ZZ    PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
+//ZZ }
+//ZZ 
+//ZZ POST(sys_fstatat64)
+//ZZ {
+//ZZ    POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
+//ZZ }
+//ZZ 
+//ZZ PRE(sys_fstat64)
+//ZZ {
+//ZZ    PRINT("sys_fstat64 ( %ld, %#lx )",ARG1,ARG2);
+//ZZ    PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
+//ZZ    PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
+//ZZ }
+//ZZ 
+//ZZ POST(sys_fstat64)
+//ZZ {
+//ZZ    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
+//ZZ }
+
+PRE(sys_clone)
+{
+    UInt cloneflags;
+
+   PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
+   PRE_REG_READ5(int, "clone",
+                 unsigned long, flags,
+                 void *, child_stack,
+                 int *, parent_tidptr,
+                 void *, child_tls,
+                 int *, child_tidptr);
+
+   if (ARG1 & VKI_CLONE_PARENT_SETTID) {
+      PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
+      if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int), 
+                                             VKI_PROT_WRITE)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         return;
+      }
+   }
+   if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
+      PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
+      if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int), 
+                                             VKI_PROT_WRITE)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         return;
+      }
+   }
+//ZZ    if (ARG1 & VKI_CLONE_SETTLS) {
+//ZZ       PRE_MEM_READ("clone(tls_user_desc)", ARG4, sizeof(vki_modify_ldt_t));
+//ZZ       if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t), 
+//ZZ                                              VKI_PROT_READ)) {
+//ZZ          SET_STATUS_Failure( VKI_EFAULT );
+//ZZ          return;
+//ZZ       }
+//ZZ    }
+
+   cloneflags = ARG1;
+
+   if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
+      SET_STATUS_Failure( VKI_EINVAL );
+      return;
+   }
+
+   /* Only look at the flags we really care about */
+   switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS 
+                         | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
+//ZZ    case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
+//ZZ       /* thread creation */
+//ZZ       SET_STATUS_from_SysRes(
+//ZZ          do_clone(tid,
+//ZZ                   ARG1,         /* flags */
+//ZZ                   (Addr)ARG2,   /* child ESP */
+//ZZ                   (Int *)ARG3,  /* parent_tidptr */
+//ZZ                   (Int *)ARG5,  /* child_tidptr */
+//ZZ                   (Addr)ARG4)); /* set_tls */
+//ZZ       break;
+
+   case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
+      /* FALLTHROUGH - assume vfork == fork */
+      cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
+
+   case 0: /* plain fork */
+      SET_STATUS_from_SysRes(
+         ML_(do_fork_clone)(tid,
+                       cloneflags,      /* flags */
+                       (Int *)ARG3,     /* parent_tidptr */
+                       (Int *)ARG5));   /* child_tidptr */
+      break;
+
+   default:
+      /* should we just ENOSYS? */
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx", ARG1);
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, "The only supported clone() uses are:");
+      VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)");
+      VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork");
+      VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver");
+      VG_(unimplemented)
+         ("Valgrind does not support general clone().");
+   }
+
+   if (SUCCESS) {
+      if (ARG1 & VKI_CLONE_PARENT_SETTID)
+         POST_MEM_WRITE(ARG3, sizeof(Int));
+      if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
+         POST_MEM_WRITE(ARG5, sizeof(Int));
+
+      /* Thread creation was successful; let the child have the chance
+         to run */
+      *flags |= SfYieldAfter;
+   }
+}
+
+//ZZ PRE(sys_sigreturn)
+//ZZ {
+//ZZ    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
+//ZZ      an explanation of what follows. */
+//ZZ 
+//ZZ    PRINT("sys_sigreturn ( )");
+//ZZ 
+//ZZ    vg_assert(VG_(is_valid_tid)(tid));
+//ZZ    vg_assert(tid >= 1 && tid < VG_N_THREADS);
+//ZZ    vg_assert(VG_(is_running_thread)(tid));
+//ZZ 
+//ZZ    /* Restore register state from frame and remove it */
+//ZZ    VG_(sigframe_destroy)(tid, False);
+//ZZ 
+//ZZ    /* Tell the driver not to update the guest state with the "result",
+//ZZ       and set a bogus result to keep it happy. */
+//ZZ    *flags |= SfNoWriteResult;
+//ZZ    SET_STATUS_Success(0);
+//ZZ 
+//ZZ    /* Check to see if any signals arose as a result of this. */
+//ZZ    *flags |= SfPollAfter;
+//ZZ }
+
+PRE(sys_rt_sigreturn)
+{
+  /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
+      an explanation of what follows. */
+
+   PRINT("rt_sigreturn ( )");
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(tid >= 1 && tid < VG_N_THREADS);
+   vg_assert(VG_(is_running_thread)(tid));
+
+   /* Restore register state from frame and remove it */
+   VG_(sigframe_destroy)(tid, True);
+
+   /* Tell the driver not to update the guest state with the "result",
+      and set a bogus result to keep it happy. */
+   *flags |= SfNoWriteResult;
+   SET_STATUS_Success(0);
+
+   /* Check to see if any signals arose as a result of this. */
+   *flags |= SfPollAfter;
+}
+
+//ZZ /* NB: clone of x86-linux version, and ppc32-linux has an almost
+//ZZ    identical one. */
+//ZZ PRE(sys_sigsuspend)
+//ZZ {
+//ZZ    /* The C library interface to sigsuspend just takes a pointer to
+//ZZ       a signal mask but this system call has three arguments - the first
+//ZZ       two don't appear to be used by the kernel and are always passed as
+//ZZ       zero by glibc and the third is the first word of the signal mask
+//ZZ       so only 32 signals are supported.
+//ZZ      
+//ZZ       In fact glibc normally uses rt_sigsuspend if it is available as
+//ZZ       that takes a pointer to the signal mask so supports more signals.
+//ZZ     */
+//ZZ    *flags |= SfMayBlock;
+//ZZ    PRINT("sys_sigsuspend ( %ld, %ld, %ld )", ARG1,ARG2,ARG3 );
+//ZZ    PRE_REG_READ3(int, "sigsuspend",
+//ZZ                  int, history0, int, history1,
+//ZZ                  vki_old_sigset_t, mask);
+//ZZ }
+//ZZ 
+//ZZ /* Very much ARM specific */
+//ZZ 
+//ZZ PRE(sys_set_tls)
+//ZZ {
+//ZZ    PRINT("set_tls (%lx)",ARG1);
+//ZZ    PRE_REG_READ1(long, "set_tls", unsigned long, addr);
+//ZZ 
+//ZZ    SET_STATUS_from_SysRes( sys_set_tls( tid, ARG1 ) );
+//ZZ }
+//ZZ 
+//ZZ PRE(sys_cacheflush)
+//ZZ {
+//ZZ    PRINT("cacheflush (%lx, %#lx, %#lx)",ARG1,ARG2,ARG3);
+//ZZ    PRE_REG_READ3(long, "cacheflush", void*, addrlow,void*, addrhigh,int, flags);
+//ZZ    VG_(discard_translations)( (Addr64)ARG1,
+//ZZ                               ((ULong)ARG2) - ((ULong)ARG1) + 1ULL/*paranoia*/,
+//ZZ                               "PRE(sys_cacheflush)" );
+//ZZ    SET_STATUS_Success(0);
+//ZZ }
+//ZZ 
+//ZZ // ARG3 is only used for pointers into the traced process's address
+//ZZ // space and for offsets into the traced process's struct
+//ZZ // user_regs_struct. It is never a pointer into this process's memory
+//ZZ // space, and we should therefore not check anything it points to.
+//ZZ PRE(sys_ptrace)
+//ZZ {
+//ZZ    PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
+//ZZ    PRE_REG_READ4(int, "ptrace", 
+//ZZ                  long, request, long, pid, long, addr, long, data);
+//ZZ    switch (ARG1) {
+//ZZ    case VKI_PTRACE_PEEKTEXT:
+//ZZ    case VKI_PTRACE_PEEKDATA:
+//ZZ    case VKI_PTRACE_PEEKUSR:
+//ZZ       PRE_MEM_WRITE( "ptrace(peek)", ARG4, 
+//ZZ 		     sizeof (long));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETREGS:
+//ZZ       PRE_MEM_WRITE( "ptrace(getregs)", ARG4, 
+//ZZ 		     sizeof (struct vki_user_regs_struct));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETFPREGS:
+//ZZ       PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4, 
+//ZZ 		     sizeof (struct vki_user_fp));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETWMMXREGS:
+//ZZ       PRE_MEM_WRITE( "ptrace(getwmmxregs)", ARG4, 
+//ZZ 		     VKI_IWMMXT_SIZE);
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETCRUNCHREGS:
+//ZZ       PRE_MEM_WRITE( "ptrace(getcrunchregs)", ARG4, 
+//ZZ 		     VKI_CRUNCH_SIZE);
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETVFPREGS:
+//ZZ       PRE_MEM_WRITE( "ptrace(getvfpregs)", ARG4, 
+//ZZ                      sizeof (struct vki_user_vfp) );
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETHBPREGS:
+//ZZ       PRE_MEM_WRITE( "ptrace(gethbpregs)", ARG4, 
+//ZZ                      sizeof (unsigned long) );
+//ZZ       break;
+//ZZ    case VKI_PTRACE_SETREGS:
+//ZZ       PRE_MEM_READ( "ptrace(setregs)", ARG4, 
+//ZZ 		     sizeof (struct vki_user_regs_struct));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_SETFPREGS:
+//ZZ       PRE_MEM_READ( "ptrace(setfpregs)", ARG4, 
+//ZZ 		     sizeof (struct vki_user_fp));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_SETWMMXREGS:
+//ZZ       PRE_MEM_READ( "ptrace(setwmmxregs)", ARG4, 
+//ZZ 		     VKI_IWMMXT_SIZE);
+//ZZ       break;
+//ZZ    case VKI_PTRACE_SETCRUNCHREGS:
+//ZZ       PRE_MEM_READ( "ptrace(setcrunchregs)", ARG4, 
+//ZZ 		     VKI_CRUNCH_SIZE);
+//ZZ       break;
+//ZZ    case VKI_PTRACE_SETVFPREGS:
+//ZZ       PRE_MEM_READ( "ptrace(setvfpregs)", ARG4, 
+//ZZ                      sizeof (struct vki_user_vfp));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_SETHBPREGS:
+//ZZ       PRE_MEM_READ( "ptrace(sethbpregs)", ARG4, sizeof(unsigned long));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GET_THREAD_AREA:
+//ZZ       PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4, sizeof(unsigned long));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETEVENTMSG:
+//ZZ       PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETSIGINFO:
+//ZZ       PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_SETSIGINFO:
+//ZZ       PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETREGSET:
+//ZZ       ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
+//ZZ       break;
+//ZZ    case VKI_PTRACE_SETREGSET:
+//ZZ       ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
+//ZZ       break;
+//ZZ    default:
+//ZZ       break;
+//ZZ    }
+//ZZ }
+//ZZ 
+//ZZ POST(sys_ptrace)
+//ZZ {
+//ZZ    switch (ARG1) {
+//ZZ    case VKI_PTRACE_PEEKTEXT:
+//ZZ    case VKI_PTRACE_PEEKDATA:
+//ZZ    case VKI_PTRACE_PEEKUSR:
+//ZZ       POST_MEM_WRITE( ARG4, sizeof (long));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETREGS:
+//ZZ       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETFPREGS:
+//ZZ       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_fp));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETWMMXREGS:
+//ZZ       POST_MEM_WRITE( ARG4, VKI_IWMMXT_SIZE);
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETCRUNCHREGS:
+//ZZ       POST_MEM_WRITE( ARG4, VKI_CRUNCH_SIZE);
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETVFPREGS:
+//ZZ       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_vfp));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GET_THREAD_AREA:
+//ZZ    case VKI_PTRACE_GETHBPREGS:
+//ZZ    case VKI_PTRACE_GETEVENTMSG:
+//ZZ       POST_MEM_WRITE( ARG4, sizeof(unsigned long));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETSIGINFO:
+//ZZ       /* XXX: This is a simplification. Different parts of the
+//ZZ        * siginfo_t are valid depending on the type of signal.
+//ZZ        */
+//ZZ       POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
+//ZZ       break;
+//ZZ    case VKI_PTRACE_GETREGSET:
+//ZZ       ML_(linux_POST_getregset)(tid, ARG3, ARG4);
+//ZZ       break;
+//ZZ    default:
+//ZZ       break;
+//ZZ    }
+//ZZ }
+//ZZ 
+//ZZ #undef PRE
+//ZZ #undef POST
+
+/* ---------------------------------------------------------------------
+   The arm64/Linux syscall table
+   ------------------------------------------------------------------ */
+
+//ZZ #if 0
+//ZZ #define __NR_OABI_SYSCALL_BASE 0x900000
+//ZZ #else
+//ZZ #define __NR_OABI_SYSCALL_BASE 0x0
+//ZZ #endif
+
+#define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(arm64_linux, sysno, name) 
+#define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(arm64_linux, sysno, name)
+
+// This table maps from __NR_xxx syscall numbers (from
+// linux/include/asm-arm/unistd.h) to the appropriate PRE/POST sys_foo()
+// wrappers on arm64 (as per sys_call_table in linux/arch/arm/kernel/entry.S).
+//
+// For those syscalls not handled by Valgrind, the annotation indicate its
+// arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
+// (unknown).
+
+static SyscallTableEntry syscall_main_table[] = {
+   LINXY(__NR_getxattr,          sys_getxattr),          // 8
+   LINXY(__NR_lgetxattr,         sys_lgetxattr),         // 9
+   GENXY(__NR_getcwd,            sys_getcwd),            // 17
+   LINXY(__NR_dup3,              sys_dup3),              // 24
+
+   // FIXME IS THIS CORRECT?
+   LINXY(__NR3264_fcntl,         sys_fcntl),             // 25
+
+   LINXY(__NR_ioctl,             sys_ioctl),             // 29
+   LINX_(__NR_mkdirat,           sys_mkdirat),           // 34
+   LINX_(__NR_unlinkat,          sys_unlinkat),          // 35
+
+   // FIXME IS THIS CORRECT?  it may well not be.
+   GENXY(__NR3264_statfs,        sys_statfs),            // 43
+
+   LINX_(__NR_faccessat,         sys_faccessat),         // 48
+   GENX_(__NR_chdir,             sys_chdir),             // 49
+   LINXY(__NR_openat,            sys_openat),            // 56
+   GENXY(__NR_close,             sys_close),             // 57
+   LINXY(__NR_pipe2,             sys_pipe2),             // 59
+   GENXY(__NR_getdents64,        sys_getdents64),        // 61
+
+   // FIXME IS THIS CORRECT?
+   LINX_(__NR3264_lseek,         sys_lseek),             // 62
+
+   GENXY(__NR_read,              sys_read),              // 63
+   GENX_(__NR_write,             sys_write),             // 64
+   GENX_(__NR_writev,            sys_writev),            // 66
+
+   LINX_(__NR_readlinkat,        sys_readlinkat),        // 78
+
+   // FIXME IS THIS CORRECT?
+   LINXY(__NR3264_fstatat,       sys_newfstatat),        // 79
+   GENXY(__NR3264_fstat,         sys_newfstat),          // 80
+
+   LINX_(__NR_exit_group,        sys_exit_group),        // 94
+   LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 96
+   LINXY(__NR_futex,             sys_futex),             // 98
+   LINX_(__NR_set_robust_list,   sys_set_robust_list),   // 99
+   LINXY(__NR_clock_gettime,     sys_clock_gettime),     // 113
+   LINX_(__NR_tgkill,            sys_tgkill),            // 131 */Linux
+   LINXY(__NR_rt_sigaction,      sys_rt_sigaction),      // 134
+   LINXY(__NR_rt_sigprocmask,    sys_rt_sigprocmask),    // 135
+   PLAX_(__NR_rt_sigreturn,      sys_rt_sigreturn),      // 139
+   GENX_(__NR_getpgid,           sys_getpgid),           // 155
+   GENXY(__NR_uname,             sys_newuname),          // 160
+   GENXY(__NR_getrlimit,         sys_old_getrlimit),     // 163
+   GENXY(__NR_getrusage,         sys_getrusage),         // 165
+   GENXY(__NR_gettimeofday,      sys_gettimeofday),      // 169
+   GENX_(__NR_getpid,            sys_getpid),            // 172
+   GENX_(__NR_getppid,           sys_getppid),           // 173
+   GENX_(__NR_getuid,            sys_getuid),            // 174
+   GENX_(__NR_geteuid,           sys_geteuid),           // 175
+   GENX_(__NR_getgid,            sys_getgid),            // 176
+   GENX_(__NR_getegid,           sys_getegid),           // 177
+   LINX_(__NR_gettid,            sys_gettid),            // 178
+   LINXY(__NR_socket,            sys_socket),            // 198
+   LINX_(__NR_connect,           sys_connect),           // 203
+   GENX_(__NR_brk,               sys_brk),               // 214
+   GENXY(__NR_munmap,            sys_munmap),            // 215
+   PLAX_(__NR_clone,             sys_clone),             // 220
+   GENX_(__NR_execve,            sys_execve),            // 221
+
+   // FIXME IS THIS CORRECT?
+   PLAX_(__NR3264_mmap,          sys_mmap),              // 222
+
+   GENXY(__NR_mprotect,          sys_mprotect),          // 226
+   GENXY(__NR_wait4,             sys_wait4),             // 260
+
+// The numbers below are bogus.  (See comment further down.)
+// When pulling entries above this line, change the numbers
+// to be correct.
+
+//ZZ    GENX_(__NR_exit,              sys_exit),           // 93
+//ZZ //zz    //   (restart_syscall)                             // 0
+//ZZ    GENX_(__NR_fork,              sys_fork),           // 2
+//ZZ 
+//ZZ    GENXY(__NR_open,              sys_open),           // 5
+//ZZ //   GENXY(__NR_waitpid,           sys_waitpid),        // 7
+//ZZ    GENXY(__NR_creat,             sys_creat),          // 8
+//ZZ    GENX_(__NR_link,              sys_link),           // 9
+//ZZ 
+//ZZ    GENX_(__NR_unlink,            sys_unlink),         // 10
+//ZZ    GENXY(__NR_time,              sys_time),           // 13
+//ZZ    GENX_(__NR_mknod,             sys_mknod),          // 14
+//ZZ 
+//ZZ    GENX_(__NR_chmod,             sys_chmod),          // 15
+//ZZ //zz    LINX_(__NR_lchown,            sys_lchown16),       // 16
+//ZZ //   GENX_(__NR_break,             sys_ni_syscall),     // 17
+//ZZ //zz    //   (__NR_oldstat,           sys_stat),           // 18 (obsolete)
+//ZZ    LINX_(__NR_lseek,             sys_lseek),          // 19
+//ZZ 
+//ZZ    GENX_(__NR_getpid,            sys_getpid),         // 20
+//ZZ    LINX_(__NR_mount,             sys_mount),          // 21
+//ZZ    LINX_(__NR_umount,            sys_oldumount),      // 22
+//ZZ    LINX_(__NR_setuid,            sys_setuid16),       // 23 ## P
+//ZZ    LINX_(__NR_getuid,            sys_getuid16),       // 24 ## P
+//ZZ //zz 
+//ZZ //zz    //   (__NR_stime,             sys_stime),          // 25 * (SVr4,SVID,X/OPEN)
+//ZZ    PLAXY(__NR_ptrace,            sys_ptrace),         // 26
+//ZZ    GENX_(__NR_alarm,             sys_alarm),          // 27
+//ZZ //zz    //   (__NR_oldfstat,          sys_fstat),          // 28 * L -- obsolete
+//ZZ    GENX_(__NR_pause,             sys_pause),          // 29
+//ZZ 
+//ZZ    LINX_(__NR_utime,             sys_utime),          // 30
+//ZZ //   GENX_(__NR_stty,              sys_ni_syscall),     // 31
+//ZZ //   GENX_(__NR_gtty,              sys_ni_syscall),     // 32
+//ZZ    GENX_(__NR_access,            sys_access),         // 33
+//ZZ    GENX_(__NR_nice,              sys_nice),           // 34
+//ZZ 
+//ZZ //   GENX_(__NR_ftime,             sys_ni_syscall),     // 35
+//ZZ    GENX_(__NR_sync,              sys_sync),           // 36
+//ZZ    GENX_(__NR_kill,              sys_kill),           // 37
+//ZZ    GENX_(__NR_rename,            sys_rename),         // 38
+//ZZ    GENX_(__NR_mkdir,             sys_mkdir),          // 39
+//ZZ 
+//ZZ    GENX_(__NR_rmdir,             sys_rmdir),          // 40
+//ZZ    GENXY(__NR_dup,               sys_dup),            // 41
+//ZZ    LINXY(__NR_pipe,              sys_pipe),           // 42
+//ZZ    GENXY(__NR_times,             sys_times),          // 43
+//ZZ //   GENX_(__NR_prof,              sys_ni_syscall),     // 44
+
+//ZZ    LINX_(__NR_setgid,            sys_setgid16),       // 46
+//ZZ    LINX_(__NR_getgid,            sys_getgid16),       // 47
+//ZZ //zz    //   (__NR_signal,            sys_signal),         // 48 */* (ANSI C)
+//ZZ    LINX_(__NR_geteuid,           sys_geteuid16),      // 49
+//ZZ 
+//ZZ    LINX_(__NR_getegid,           sys_getegid16),      // 50
+//ZZ    GENX_(__NR_acct,              sys_acct),           // 51
+//ZZ    LINX_(__NR_umount2,           sys_umount),         // 52
+//ZZ //   GENX_(__NR_lock,              sys_ni_syscall),     // 53
+//ZZ 
+//ZZ    LINXY(__NR_fcntl,             sys_fcntl),          // 55
+//ZZ //   GENX_(__NR_mpx,               sys_ni_syscall),     // 56
+//ZZ    GENX_(__NR_setpgid,           sys_setpgid),        // 57
+//ZZ //   GENX_(__NR_ulimit,            sys_ni_syscall),     // 58
+//ZZ //zz    //   (__NR_oldolduname,       sys_olduname),       // 59 Linux -- obsolete
+//ZZ //zz 
+//ZZ    GENX_(__NR_umask,             sys_umask),          // 60
+//ZZ    GENX_(__NR_chroot,            sys_chroot),         // 61
+//ZZ //zz    //   (__NR_ustat,             sys_ustat)           // 62 SVr4 -- deprecated
+//ZZ    GENXY(__NR_dup2,              sys_dup2),           // 63
+//ZZ    GENX_(__NR_getppid,           sys_getppid),        // 64
+//ZZ 
+//ZZ    GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
+//ZZ    GENX_(__NR_setsid,            sys_setsid),         // 66
+//ZZ    LINXY(__NR_sigaction,         sys_sigaction),      // 67
+//ZZ //zz    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
+//ZZ //zz    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
+//ZZ //zz 
+//ZZ    LINX_(__NR_setreuid,          sys_setreuid16),     // 70
+//ZZ    LINX_(__NR_setregid,          sys_setregid16),     // 71
+//ZZ    PLAX_(__NR_sigsuspend,        sys_sigsuspend),     // 72
+//ZZ    LINXY(__NR_sigpending,        sys_sigpending),     // 73
+//ZZ //zz    //   (__NR_sethostname,       sys_sethostname),    // 74 */*
+//ZZ //zz 
+//ZZ    GENX_(__NR_setrlimit,         sys_setrlimit),      // 75
+//ZZ    GENXY(__NR_getrlimit,         sys_old_getrlimit),  // 76
+//ZZ    GENX_(__NR_settimeofday,      sys_settimeofday),   // 79
+//ZZ 
+//ZZ    LINXY(__NR_getgroups,         sys_getgroups16),    // 80
+//ZZ    LINX_(__NR_setgroups,         sys_setgroups16),    // 81
+//ZZ //   PLAX_(__NR_select,            old_select),         // 82
+//ZZ    GENX_(__NR_symlink,           sys_symlink),        // 83
+//ZZ //zz    //   (__NR_oldlstat,          sys_lstat),          // 84 -- obsolete
+//ZZ //zz 
+//ZZ    GENX_(__NR_readlink,          sys_readlink),       // 85
+//ZZ //zz    //   (__NR_uselib,            sys_uselib),         // 86 */Linux
+//ZZ //zz    //   (__NR_swapon,            sys_swapon),         // 87 */Linux
+//ZZ //zz    //   (__NR_reboot,            sys_reboot),         // 88 */Linux
+//ZZ //zz    //   (__NR_readdir,           old_readdir),        // 89 -- superseded
+//ZZ //zz 
+//ZZ //   _____(__NR_mmap,              old_mmap),           // 90
+//ZZ    GENXY(__NR_munmap,            sys_munmap),         // 91
+//ZZ    GENX_(__NR_truncate,          sys_truncate),       // 92
+//ZZ    GENX_(__NR_ftruncate,         sys_ftruncate),      // 93
+//ZZ    GENX_(__NR_fchmod,            sys_fchmod),         // 94
+//ZZ 
+//ZZ    LINX_(__NR_fchown,            sys_fchown16),       // 95
+//ZZ    GENX_(__NR_getpriority,       sys_getpriority),    // 96
+//ZZ    GENX_(__NR_setpriority,       sys_setpriority),    // 97
+//ZZ //   GENX_(__NR_profil,            sys_ni_syscall),     // 98
+//ZZ    GENXY(__NR_statfs,            sys_statfs),         // 99
+//ZZ 
+//ZZ    GENXY(__NR_fstatfs,           sys_fstatfs),        // 100
+//ZZ //   LINX_(__NR_ioperm,            sys_ioperm),         // 101
+//ZZ    LINXY(__NR_socketcall,        sys_socketcall),     // 102
+//ZZ    LINXY(__NR_syslog,            sys_syslog),         // 103
+//ZZ    GENXY(__NR_setitimer,         sys_setitimer),      // 104
+//ZZ 
+//ZZ    GENXY(__NR_getitimer,         sys_getitimer),      // 105
+//ZZ    GENXY(__NR_stat,              sys_newstat),        // 106
+//ZZ    GENXY(__NR_lstat,             sys_newlstat),       // 107
+//ZZ    GENXY(__NR_fstat,             sys_newfstat),       // 108
+//ZZ //zz    //   (__NR_olduname,          sys_uname),          // 109 -- obsolete
+//ZZ //zz 
+//ZZ //   GENX_(__NR_iopl,              sys_iopl),           // 110
+//ZZ    LINX_(__NR_vhangup,           sys_vhangup),        // 111
+//ZZ //   GENX_(__NR_idle,              sys_ni_syscall),     // 112
+//ZZ // PLAXY(__NR_vm86old,           sys_vm86old),        // 113 __NR_syscall... weird
+//ZZ //zz 
+//ZZ //zz    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux 
+//ZZ    LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
+//ZZ //   _____(__NR_ipc,               sys_ipc),            // 117
+//ZZ    GENX_(__NR_fsync,             sys_fsync),          // 118
+//ZZ    PLAX_(__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
+//ZZ 
+//ZZ //zz    //   (__NR_setdomainname,     sys_setdomainname),  // 121 */*(?)
+//ZZ //   PLAX_(__NR_modify_ldt,        sys_modify_ldt),     // 123
+//ZZ //zz    LINXY(__NR_adjtimex,          sys_adjtimex),       // 124
+//ZZ //zz 
+//ZZ    LINXY(__NR_sigprocmask,       sys_sigprocmask),    // 126
+//ZZ //zz    // Nb: create_module() was removed 2.4-->2.6
+//ZZ //   GENX_(__NR_create_module,     sys_ni_syscall),     // 127
+//ZZ    LINX_(__NR_init_module,       sys_init_module),    // 128
+//ZZ    LINX_(__NR_delete_module,     sys_delete_module),  // 129
+//ZZ //zz 
+//ZZ //zz    // Nb: get_kernel_syms() was removed 2.4-->2.6
+//ZZ //   GENX_(__NR_get_kernel_syms,   sys_ni_syscall),     // 130
+//ZZ    LINX_(__NR_quotactl,          sys_quotactl),       // 131
+//ZZ    GENX_(__NR_getpgid,           sys_getpgid),        // 132
+//ZZ    GENX_(__NR_fchdir,            sys_fchdir),         // 133
+//ZZ //zz    //   (__NR_bdflush,           sys_bdflush),        // 134 */Linux
+//ZZ //zz 
+//ZZ //zz    //   (__NR_sysfs,             sys_sysfs),          // 135 SVr4
+//ZZ    LINX_(__NR_personality,       sys_personality),    // 136
+//ZZ //   GENX_(__NR_afs_syscall,       sys_ni_syscall),     // 137
+//ZZ    LINX_(__NR_setfsuid,          sys_setfsuid16),     // 138
+//ZZ    LINX_(__NR_setfsgid,          sys_setfsgid16),     // 139
+//ZZ  
+//ZZ    LINXY(__NR__llseek,           sys_llseek),         // 140
+//ZZ    GENXY(__NR_getdents,          sys_getdents),       // 141
+//ZZ    GENX_(__NR__newselect,        sys_select),         // 142
+//ZZ    GENX_(__NR_flock,             sys_flock),          // 143
+//ZZ    GENX_(__NR_msync,             sys_msync),          // 144
+//ZZ 
+//ZZ    GENXY(__NR_readv,             sys_readv),          // 145
+//ZZ    GENX_(__NR_getsid,            sys_getsid),         // 147
+//ZZ    GENX_(__NR_fdatasync,         sys_fdatasync),      // 148
+//ZZ    LINXY(__NR__sysctl,           sys_sysctl),         // 149
+//ZZ 
+//ZZ    GENX_(__NR_mlock,             sys_mlock),          // 150
+//ZZ    GENX_(__NR_munlock,           sys_munlock),        // 151
+//ZZ    GENX_(__NR_mlockall,          sys_mlockall),       // 152
+//ZZ    LINX_(__NR_munlockall,        sys_munlockall),     // 153
+//ZZ    LINXY(__NR_sched_setparam,    sys_sched_setparam), // 154
+//ZZ 
+//ZZ    LINXY(__NR_sched_getparam,         sys_sched_getparam),        // 155
+//ZZ    LINX_(__NR_sched_setscheduler,     sys_sched_setscheduler),    // 156
+//ZZ    LINX_(__NR_sched_getscheduler,     sys_sched_getscheduler),    // 157
+//ZZ    LINX_(__NR_sched_yield,            sys_sched_yield),           // 158
+//ZZ    LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
+//ZZ 
+//ZZ    LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
+//ZZ //zz    //LINX?(__NR_sched_rr_get_interval,  sys_sched_rr_get_interval), // 161 */*
+//ZZ    GENXY(__NR_nanosleep,         sys_nanosleep),      // 162
+//ZZ    GENX_(__NR_mremap,            sys_mremap),         // 163
+//ZZ    LINX_(__NR_setresuid,         sys_setresuid16),    // 164
+//ZZ 
+//ZZ    LINXY(__NR_getresuid,         sys_getresuid16),    // 165
+//ZZ //   PLAXY(__NR_vm86,              sys_vm86),           // 166 x86/Linux-only
+//ZZ //   GENX_(__NR_query_module,      sys_ni_syscall),     // 167
+//ZZ    GENXY(__NR_poll,              sys_poll),           // 168
+//ZZ //zz    //   (__NR_nfsservctl,        sys_nfsservctl),     // 169 */Linux
+//ZZ //zz 
+//ZZ    LINX_(__NR_setresgid,         sys_setresgid16),    // 170
+//ZZ    LINXY(__NR_getresgid,         sys_getresgid16),    // 171
+//ZZ    LINXY(__NR_prctl,             sys_prctl),          // 172
+//ZZ    LINXY(__NR_rt_sigaction,      sys_rt_sigaction),   // 174
+//ZZ 
+//ZZ    LINXY(__NR_rt_sigpending,     sys_rt_sigpending),  // 176
+//ZZ    LINXY(__NR_rt_sigtimedwait,   sys_rt_sigtimedwait),// 177
+//ZZ    LINXY(__NR_rt_sigqueueinfo,   sys_rt_sigqueueinfo),// 178
+//ZZ    LINX_(__NR_rt_sigsuspend,     sys_rt_sigsuspend),  // 179
+//ZZ 
+//ZZ    GENXY(__NR_pread64,           sys_pread64),        // 180
+//ZZ    GENX_(__NR_pwrite64,          sys_pwrite64),       // 181
+//ZZ    LINX_(__NR_chown,             sys_chown16),        // 182
+//ZZ    LINXY(__NR_capget,            sys_capget),         // 184
+//ZZ 
+//ZZ    LINX_(__NR_capset,            sys_capset),         // 185
+//ZZ    GENXY(__NR_sigaltstack,       sys_sigaltstack),    // 186
+//ZZ    LINXY(__NR_sendfile,          sys_sendfile),       // 187
+//ZZ //   GENXY(__NR_getpmsg,           sys_getpmsg),        // 188
+//ZZ //   GENX_(__NR_putpmsg,           sys_putpmsg),        // 189
+//ZZ 
+//ZZ    // Nb: we treat vfork as fork
+//ZZ    GENX_(__NR_vfork,             sys_fork),           // 190
+//ZZ    GENXY(__NR_ugetrlimit,        sys_getrlimit),      // 191
+//ZZ    GENX_(__NR_truncate64,        sys_truncate64),     // 193
+//ZZ    GENX_(__NR_ftruncate64,       sys_ftruncate64),    // 194
+//ZZ    
+//ZZ    PLAXY(__NR_stat64,            sys_stat64),         // 195
+//ZZ    PLAXY(__NR_lstat64,           sys_lstat64),        // 196
+//ZZ    PLAXY(__NR_fstat64,           sys_fstat64),        // 197
+//ZZ    GENX_(__NR_lchown32,          sys_lchown),         // 198
+//ZZ    GENX_(__NR_getuid32,          sys_getuid),         // 199
+//ZZ 
+//ZZ    GENX_(__NR_getgid32,          sys_getgid),         // 200
+//ZZ    GENX_(__NR_geteuid32,         sys_geteuid),        // 201
+//ZZ    GENX_(__NR_getegid32,         sys_getegid),        // 202
+//ZZ    GENX_(__NR_setreuid32,        sys_setreuid),       // 203
+//ZZ    GENX_(__NR_setregid32,        sys_setregid),       // 204
+//ZZ 
+//ZZ    GENXY(__NR_getgroups32,       sys_getgroups),      // 205
+//ZZ    GENX_(__NR_setgroups32,       sys_setgroups),      // 206
+//ZZ    GENX_(__NR_fchown32,          sys_fchown),         // 207
+//ZZ    LINX_(__NR_setresuid32,       sys_setresuid),      // 208
+//ZZ    LINXY(__NR_getresuid32,       sys_getresuid),      // 209
+//ZZ 
+//ZZ    LINX_(__NR_setresgid32,       sys_setresgid),      // 210
+//ZZ    LINXY(__NR_getresgid32,       sys_getresgid),      // 211
+//ZZ    GENX_(__NR_chown32,           sys_chown),          // 212
+//ZZ    GENX_(__NR_setuid32,          sys_setuid),         // 213
+//ZZ    GENX_(__NR_setgid32,          sys_setgid),         // 214
+//ZZ 
+//ZZ    LINX_(__NR_setfsuid32,        sys_setfsuid),       // 215
+//ZZ    LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
+//ZZ //zz    //   (__NR_pivot_root,        sys_pivot_root),     // 217 */Linux
+//ZZ    GENXY(__NR_mincore,           sys_mincore),        // 218
+//ZZ    GENX_(__NR_madvise,           sys_madvise),        // 219
+//ZZ 
+//ZZ    LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
+//ZZ //   GENX_(222,                    sys_ni_syscall),     // 222
+//ZZ //   PLAXY(223,                    sys_syscall223),     // 223 // sys_bproc?
+//ZZ 
+//ZZ    LINX_(__NR_readahead,         sys_readahead),      // 225 */Linux
+//ZZ    LINX_(__NR_setxattr,          sys_setxattr),       // 226
+//ZZ    LINX_(__NR_lsetxattr,         sys_lsetxattr),      // 227
+//ZZ    LINX_(__NR_fsetxattr,         sys_fsetxattr),      // 228
+//ZZ 
+//ZZ    LINXY(__NR_fgetxattr,         sys_fgetxattr),      // 231
+//ZZ    LINXY(__NR_listxattr,         sys_listxattr),      // 232
+//ZZ    LINXY(__NR_llistxattr,        sys_llistxattr),     // 233
+//ZZ    LINXY(__NR_flistxattr,        sys_flistxattr),     // 234
+//ZZ 
+//ZZ    LINX_(__NR_removexattr,       sys_removexattr),    // 235
+//ZZ    LINX_(__NR_lremovexattr,      sys_lremovexattr),   // 236
+//ZZ    LINX_(__NR_fremovexattr,      sys_fremovexattr),   // 237
+//ZZ    LINXY(__NR_tkill,             sys_tkill),          // 238 */Linux
+//ZZ    LINXY(__NR_sendfile64,        sys_sendfile64),     // 239
+//ZZ 
+//ZZ    LINXY(__NR_futex,             sys_futex),             // 240
+//ZZ    LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
+//ZZ    LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
+//ZZ //   PLAX_(__NR_set_thread_area,   sys_set_thread_area),   // 243
+//ZZ //   PLAX_(__NR_get_thread_area,   sys_get_thread_area),   // 244
+//ZZ 
+//ZZ    LINXY(__NR_io_setup,          sys_io_setup),       // 245
+//ZZ    LINX_(__NR_io_destroy,        sys_io_destroy),     // 246
+//ZZ    LINXY(__NR_io_getevents,      sys_io_getevents),   // 247
+//ZZ    LINX_(__NR_io_submit,         sys_io_submit),      // 248
+//ZZ    LINXY(__NR_io_cancel,         sys_io_cancel),      // 249
+//ZZ 
+//ZZ //   LINX_(__NR_fadvise64,         sys_fadvise64),      // 250 */(Linux?)
+//ZZ    GENX_(251,                    sys_ni_syscall),     // 251
+//ZZ //   GENXY(__NR_lookup_dcookie,    sys_lookup_dcookie), // 253
+//ZZ    LINXY(__NR_epoll_create,      sys_epoll_create),   // 254
+//ZZ 
+//ZZ    LINX_(__NR_epoll_ctl,         sys_epoll_ctl),         // 255
+//ZZ    LINXY(__NR_epoll_wait,        sys_epoll_wait),        // 256
+//ZZ //zz    //   (__NR_remap_file_pages,  sys_remap_file_pages),  // 257 */Linux
+//ZZ    LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 258
+//ZZ    LINXY(__NR_timer_create,      sys_timer_create),      // 259
+//ZZ 
+//ZZ    LINXY(__NR_timer_settime,     sys_timer_settime),  // (timer_create+1)
+//ZZ    LINXY(__NR_timer_gettime,     sys_timer_gettime),  // (timer_create+2)
+//ZZ    LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),//(timer_create+3)
+//ZZ    LINX_(__NR_timer_delete,      sys_timer_delete),   // (timer_create+4)
+//ZZ    LINX_(__NR_clock_settime,     sys_clock_settime),  // (timer_create+5)
+//ZZ 
+//ZZ    LINXY(__NR_clock_getres,      sys_clock_getres),   // (timer_create+7)
+//ZZ    LINXY(__NR_clock_nanosleep,   sys_clock_nanosleep),// (timer_create+8) */*
+//ZZ    GENXY(__NR_statfs64,          sys_statfs64),       // 268
+//ZZ    GENXY(__NR_fstatfs64,         sys_fstatfs64),      // 269
+//ZZ 
+//ZZ    GENX_(__NR_utimes,            sys_utimes),         // 271
+//ZZ //   LINX_(__NR_fadvise64_64,      sys_fadvise64_64),   // 272 */(Linux?)
+//ZZ    GENX_(__NR_vserver,           sys_ni_syscall),     // 273
+//ZZ    LINX_(__NR_mbind,             sys_mbind),          // 274 ?/?
+//ZZ 
+//ZZ    LINXY(__NR_get_mempolicy,     sys_get_mempolicy),  // 275 ?/?
+//ZZ    LINX_(__NR_set_mempolicy,     sys_set_mempolicy),  // 276 ?/?
+//ZZ    LINXY(__NR_mq_open,           sys_mq_open),        // 277
+//ZZ    LINX_(__NR_mq_unlink,         sys_mq_unlink),      // (mq_open+1)
+//ZZ    LINX_(__NR_mq_timedsend,      sys_mq_timedsend),   // (mq_open+2)
+//ZZ 
+//ZZ    LINXY(__NR_mq_timedreceive,   sys_mq_timedreceive),// (mq_open+3)
+//ZZ    LINX_(__NR_mq_notify,         sys_mq_notify),      // (mq_open+4)
+//ZZ    LINXY(__NR_mq_getsetattr,     sys_mq_getsetattr),  // (mq_open+5)
+//ZZ    LINXY(__NR_waitid,            sys_waitid),         // 280
+//ZZ 
+//ZZ    LINX_(__NR_bind,              sys_bind),           // 282
+//ZZ    LINX_(__NR_listen,            sys_listen),         // 284
+//ZZ    LINXY(__NR_accept,            sys_accept),         // 285
+//ZZ    LINXY(__NR_getsockname,       sys_getsockname),    // 286
+//ZZ    LINXY(__NR_getpeername,       sys_getpeername),    // 287
+//ZZ    LINXY(__NR_socketpair,        sys_socketpair),     // 288
+//ZZ    LINX_(__NR_send,              sys_send),
+//ZZ    LINX_(__NR_sendto,            sys_sendto),         // 290
+//ZZ    LINXY(__NR_recv,              sys_recv),
+//ZZ    LINXY(__NR_recvfrom,          sys_recvfrom),       // 292
+//ZZ    LINX_(__NR_shutdown,          sys_shutdown),       // 293
+//ZZ    LINX_(__NR_setsockopt,        sys_setsockopt),     // 294
+//ZZ    LINXY(__NR_getsockopt,        sys_getsockopt),     // 295
+//ZZ    LINX_(__NR_sendmsg,           sys_sendmsg),        // 296
+//ZZ    LINXY(__NR_recvmsg,           sys_recvmsg),        // 297
+//ZZ    LINX_(__NR_semop,             sys_semop),          // 298 
+//ZZ    LINX_(__NR_semget,            sys_semget),         // 299
+//ZZ    LINXY(__NR_semctl,            sys_semctl),         // 300
+//ZZ    LINX_(__NR_msgget,            sys_msgget),         
+//ZZ    LINX_(__NR_msgsnd,            sys_msgsnd),          
+//ZZ    LINXY(__NR_msgrcv,            sys_msgrcv),         
+//ZZ    LINXY(__NR_msgctl,            sys_msgctl),         // 304
+//ZZ    LINX_(__NR_semtimedop,        sys_semtimedop),     // 312
+//ZZ 
+//ZZ    LINX_(__NR_add_key,           sys_add_key),        // 286
+//ZZ    LINX_(__NR_request_key,       sys_request_key),    // 287
+//ZZ    LINXY(__NR_keyctl,            sys_keyctl),         // not 288...
+//ZZ //   LINX_(__NR_ioprio_set,        sys_ioprio_set),     // 289
+//ZZ 
+//ZZ //   LINX_(__NR_ioprio_get,        sys_ioprio_get),     // 290
+//ZZ    LINX_(__NR_inotify_init,    sys_inotify_init),   // 291
+//ZZ    LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
+//ZZ    LINX_(__NR_inotify_rm_watch,    sys_inotify_rm_watch), // 293
+//ZZ //   LINX_(__NR_migrate_pages,    sys_migrate_pages),    // 294
+//ZZ 
+//ZZ    LINX_(__NR_mknodat,       sys_mknodat),          // 297
+//ZZ    LINX_(__NR_fchownat,       sys_fchownat),         // 298
+//ZZ    LINX_(__NR_futimesat,    sys_futimesat),        // 326 on arm
+//ZZ 
+//ZZ    PLAXY(__NR_fstatat64,    sys_fstatat64),        // 300
+//ZZ    LINX_(__NR_renameat,       sys_renameat),         // 302
+//ZZ    LINX_(__NR_linkat,       sys_linkat),           // 303
+//ZZ    LINX_(__NR_symlinkat,    sys_symlinkat),        // 304
+//ZZ 
+//ZZ    LINX_(__NR_fchmodat,       sys_fchmodat),         //
+//ZZ    LINXY(__NR_shmat,         wrap_sys_shmat),       //305
+//ZZ    LINXY(__NR_shmdt,             sys_shmdt),          //306 
+//ZZ    LINX_(__NR_shmget,            sys_shmget),         //307 
+//ZZ    LINXY(__NR_shmctl,            sys_shmctl),         // 308 
+//ZZ //   LINX_(__NR_pselect6,       sys_pselect6),         //
+//ZZ 
+//ZZ //   LINX_(__NR_unshare,       sys_unshare),          // 310
+//ZZ    LINX_(__NR_set_robust_list,    sys_set_robust_list),  // 311
+//ZZ    LINXY(__NR_get_robust_list,    sys_get_robust_list),  // 312
+//ZZ //   LINX_(__NR_splice,            sys_ni_syscall),       // 313
+//ZZ //   LINX_(__NR_sync_file_range,   sys_sync_file_range),  // 314
+//ZZ 
+//ZZ //   LINX_(__NR_tee,               sys_ni_syscall),       // 315
+//ZZ //   LINX_(__NR_vmsplice,          sys_ni_syscall),       // 316
+//ZZ    LINXY(__NR_move_pages,        sys_move_pages),       // 317
+//ZZ //   LINX_(__NR_getcpu,            sys_ni_syscall),       // 318
+//ZZ 
+//ZZ    LINX_(__NR_utimensat,         sys_utimensat),        // 320
+//ZZ    LINXY(__NR_signalfd,          sys_signalfd),         // 321
+//ZZ    LINXY(__NR_timerfd_create,    sys_timerfd_create),   // 322
+//ZZ    LINX_(__NR_eventfd,           sys_eventfd),          // 323
+//ZZ 
+//ZZ    LINXY(__NR_timerfd_settime,   sys_timerfd_settime),  // 325
+//ZZ    LINXY(__NR_timerfd_gettime,   sys_timerfd_gettime),   // 326
+//ZZ 
+//ZZ    ///////////////
+//ZZ 
+//ZZ    // JRS 2010-Jan-03: I believe that all the numbers listed 
+//ZZ    // in comments in the table prior to this point (eg "// 326",
+//ZZ    // etc) are bogus since it looks to me like they are copied
+//ZZ    // verbatim from syswrap-x86-linux.c and they certainly do not
+//ZZ    // correspond to what's in include/vki/vki-scnums-arm-linux.h.
+//ZZ    // From here onwards, please ensure the numbers are correct.
+//ZZ 
+//ZZ    LINX_(__NR_pselect6,          sys_pselect6),         // 335
+//ZZ    LINXY(__NR_ppoll,             sys_ppoll),            // 336
+//ZZ 
+//ZZ    LINXY(__NR_epoll_pwait,       sys_epoll_pwait),      // 346
+//ZZ 
+//ZZ    LINX_(__NR_fallocate,         sys_fallocate),        // 352
+//ZZ 
+//ZZ    LINXY(__NR_signalfd4,         sys_signalfd4),        // 355
+//ZZ    LINX_(__NR_eventfd2,          sys_eventfd2),         // 356
+//ZZ    LINXY(__NR_epoll_create1,     sys_epoll_create1),    // 357
+//ZZ    LINXY(__NR_inotify_init1,     sys_inotify_init1),    // 360
+//ZZ    LINXY(__NR_preadv,            sys_preadv),           // 361
+//ZZ    LINX_(__NR_pwritev,           sys_pwritev),          // 362
+//ZZ    LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 363
+//ZZ    LINXY(__NR_perf_event_open,   sys_perf_event_open),  // 364
+//ZZ 
+//ZZ    LINXY(__NR_accept4,           sys_accept4),          // 366
+//ZZ 
+//ZZ    LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 370
+//ZZ    LINXY(__NR_open_by_handle_at, sys_open_by_handle_at) // 371
+};
+
+
+//ZZ /* These are not in the main table because there indexes are not small
+//ZZ    integers, but rather values close to one million.  So their
+//ZZ    inclusion would force the main table to be huge (about 8 MB). */
+//ZZ 
+//ZZ static SyscallTableEntry ste___ARM_set_tls
+//ZZ    = { WRAPPER_PRE_NAME(arm_linux,sys_set_tls), NULL };
+//ZZ 
+//ZZ static SyscallTableEntry ste___ARM_cacheflush
+//ZZ    = { WRAPPER_PRE_NAME(arm_linux,sys_cacheflush), NULL };
+
+SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
+{
+   const UInt syscall_main_table_size
+      = sizeof(syscall_main_table) / sizeof(syscall_main_table[0]);
+
+   /* Is it in the contiguous initial section of the table? */
+   if (sysno < syscall_main_table_size) {
+      SyscallTableEntry* sys = &syscall_main_table[sysno];
+      if (sys->before == NULL)
+         return NULL; /* no entry */
+      else
+         return sys;
+   }
+
+//ZZ    /* Check if it's one of the out-of-line entries. */
+//ZZ    switch (sysno) {
+//ZZ       case __NR_ARM_set_tls:    return &ste___ARM_set_tls;
+//ZZ       case __NR_ARM_cacheflush: return &ste___ARM_cacheflush;
+//ZZ       default: break;
+//ZZ    }
+
+   /* Can't find a wrapper */
+   return NULL;
+}
+
+#endif // defined(VGP_arm64_linux)
+
+/*--------------------------------------------------------------------*/
+/*--- end                                    syswrap-arm64-linux.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c
index e8b516c..c0b3367 100644
--- a/coregrind/m_syswrap/syswrap-linux.c
+++ b/coregrind/m_syswrap/syswrap-linux.c
@@ -209,6 +209,8 @@
       /* This releases the run lock */
       VG_(exit_thread)(tid);
       vg_assert(tst->status == VgTs_Zombie);
+      vg_assert(sizeof(tst->status) == 4);
+      vg_assert(sizeof(tst->os_state.exitcode) == sizeof(Word));
 
       INNER_REQUEST (VALGRIND_STACK_DEREGISTER (registered_vgstack_id));
 
@@ -264,6 +266,16 @@
          : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode)
          : "r0", "r7"
       );
+#elif defined(VGP_arm64_linux)
+      asm volatile (
+         "str  %w1, %0\n"     /* set tst->status = VgTs_Empty (32-bit store) */
+         "mov  x8,  %2\n"     /* set %r7 = __NR_exit */
+         "ldr  x0,  %3\n"     /* set %r0 = tst->os_state.exitcode */
+         "svc  0x00000000\n"  /* exit(tst->os_state.exitcode) */
+         : "=m" (tst->status)
+         : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode)
+         : "r0", "r7"
+      );
 #elif defined(VGP_s390x_linux)
       asm volatile (
          "st   %1, %0\n"        /* set tst->status = VgTs_Empty */
@@ -276,7 +288,7 @@
 #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
       asm volatile (
          "sw   %1, %0\n\t"     /* set tst->status = VgTs_Empty */
-         "li  	$2, %2\n\t"     /* set v0 = __NR_exit */
+         "li   $2, %2\n\t"     /* set v0 = __NR_exit */
          "lw   $4, %3\n\t"     /* set a0 = tst->os_state.exitcode */
          "syscall\n\t"         /* exit(tst->os_state.exitcode) */
          "nop"
@@ -428,7 +440,7 @@
 #if defined(VGP_x86_linux) \
     || defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) \
     || defined(VGP_arm_linux) || defined(VGP_mips32_linux) \
-    || defined(VGP_mips64_linux)
+    || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
    res = VG_(do_syscall5)( __NR_clone, flags, 
                            (UWord)NULL, (UWord)parent_tidptr, 
                            (UWord)NULL, (UWord)child_tidptr );
diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c
index 3cb20e1..292723c 100644
--- a/coregrind/m_syswrap/syswrap-main.c
+++ b/coregrind/m_syswrap/syswrap-main.c
@@ -70,6 +70,7 @@
    arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
    mips32 v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
    mips64 v0    a0   a1   a2   a3   a4   a5   a6   a7   v0        (== NUM)
+   arm64  x8    x0   x1   x2   x3   x4   x5   n/a  n/a  x0 ??     (== ARG1??)
 
    On s390x the svc instruction is used for system calls. The system call
    number is encoded in the instruction (8 bit immediate field). Since Linux
@@ -464,6 +465,18 @@
    canonical->arg7  = 0;
    canonical->arg8  = 0;
 
+#elif defined(VGP_arm64_linux)
+   VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
+   canonical->sysno = gst->guest_X8;
+   canonical->arg1  = gst->guest_X0;
+   canonical->arg2  = gst->guest_X1;
+   canonical->arg3  = gst->guest_X2;
+   canonical->arg4  = gst->guest_X3;
+   canonical->arg5  = gst->guest_X4;
+   canonical->arg6  = gst->guest_X5;
+   canonical->arg7  = 0;
+   canonical->arg8  = 0;
+
 #elif defined(VGP_mips32_linux)
    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
    canonical->sysno = gst->guest_r2;    // v0
@@ -694,6 +707,16 @@
    gst->guest_R4 = canonical->arg5;
    gst->guest_R5 = canonical->arg6;
 
+#elif defined(VGP_arm64_linux)
+   VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
+   gst->guest_X8 = canonical->sysno;
+   gst->guest_X0 = canonical->arg1;
+   gst->guest_X1 = canonical->arg2;
+   gst->guest_X2 = canonical->arg3;
+   gst->guest_X3 = canonical->arg4;
+   gst->guest_X4 = canonical->arg5;
+   gst->guest_X5 = canonical->arg6;
+
 #elif defined(VGP_x86_darwin)
    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    UWord *stack = (UWord *)gst->guest_ESP;
@@ -746,8 +769,8 @@
       gst->guest_r5 = canonical->arg2;
       gst->guest_r6 = canonical->arg3;
       gst->guest_r7 = canonical->arg4;
-      *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5;    // 16(guest_GPR29/sp)
-      *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6;    // 20(sp)
+      *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5; // 16(guest_GPR29/sp)
+      *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6; // 20(sp)
    } else {
       canonical->arg8 = 0;
       gst->guest_r2 = __NR_syscall;
@@ -755,9 +778,9 @@
       gst->guest_r5 = canonical->arg1;
       gst->guest_r6 = canonical->arg2;
       gst->guest_r7 = canonical->arg3;
-      *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4;    // 16(guest_GPR29/sp)
-      *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5;    // 20(sp)
-      *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6;    // 24(sp)
+      *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4; // 16(guest_GPR29/sp)
+      *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5; // 20(sp)
+      *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6; // 24(sp)
    }
 
 #elif defined(VGP_mips64_linux)
@@ -807,6 +830,11 @@
    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
    canonical->what = SsComplete;
 
+#  elif defined(VGP_arm64_linux)
+   VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
+   canonical->sres = VG_(mk_SysRes_arm64_linux)( gst->guest_X0 );
+   canonical->what = SsComplete;
+
 #  elif defined(VGP_mips32_linux)
    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
    UInt                v0 = gst->guest_r2;    // v0
@@ -980,6 +1008,20 @@
    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 
              OFFSET_arm_R0, sizeof(UWord) );
 
+#  elif defined(VGP_arm64_linux)
+   VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
+   vg_assert(canonical->what == SsComplete);
+   if (sr_isError(canonical->sres)) {
+      /* This isn't exactly right, in that really a Failure with res
+         not in the range 1 .. 4095 is unrepresentable in the
+         Linux-arm64 scheme.  Oh well. */
+      gst->guest_X0 = - (Long)sr_Err(canonical->sres);
+   } else {
+      gst->guest_X0 = sr_Res(canonical->sres);
+   }
+   VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 
+             OFFSET_arm64_X0, sizeof(UWord) );
+
 #elif defined(VGP_x86_darwin)
    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
    SysRes sres = canonical->sres;
@@ -1104,6 +1146,8 @@
 static
 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
 {
+   VG_(bzero_inline)(layout, sizeof(*layout));
+
 #if defined(VGP_x86_linux)
    layout->o_sysno  = OFFSET_x86_EAX;
    layout->o_arg1   = OFFSET_x86_EBX;
@@ -1159,6 +1203,17 @@
    layout->uu_arg7  = -1; /* impossible value */
    layout->uu_arg8  = -1; /* impossible value */
 
+#elif defined(VGP_arm64_linux)
+   layout->o_sysno  = OFFSET_arm64_X8;
+   layout->o_arg1   = OFFSET_arm64_X0;
+   layout->o_arg2   = OFFSET_arm64_X1;
+   layout->o_arg3   = OFFSET_arm64_X2;
+   layout->o_arg4   = OFFSET_arm64_X3;
+   layout->o_arg5   = OFFSET_arm64_X4;
+   layout->o_arg6   = OFFSET_arm64_X5;
+   layout->uu_arg7  = -1; /* impossible value */
+   layout->uu_arg8  = -1; /* impossible value */
+
 #elif defined(VGP_mips32_linux)
    layout->o_sysno  = OFFSET_mips32_r2;
    layout->o_arg1   = OFFSET_mips32_r4;
@@ -1990,6 +2045,10 @@
       vg_assert(valid);
    }
 
+#elif defined(VGP_arm64_linux)
+   // probably simplest to copy the ppc version
+   I_die_here;
+
 #elif defined(VGP_x86_darwin)
    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL; 
 
diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S
index aac7282..475133f 100644
--- a/coregrind/m_trampoline.S
+++ b/coregrind/m_trampoline.S
@@ -519,9 +519,9 @@
 #	undef UD2_1024
 #	undef UD2_PAGE
 
-/*---------------- ppc32-linux ----------------*/
-
-#elif defined(VGP_arm_linux)
+/*---------------- arm-linux ----------------*/
+#else
+#if defined(VGP_arm_linux)
 
 #       define UD2_4      .word 0xFFFFFFFF
 #	define UD2_16     UD2_4    ; UD2_4    ; UD2_4    ; UD2_4
@@ -680,6 +680,45 @@
 #	undef UD2_1024
 #	undef UD2_PAGE
         
+/*---------------- arm64-linux ----------------*/
+#else
+#if defined(VGP_arm64_linux)
+
+#       define UD2_4      .word 0xFFFFFFFF
+#	define UD2_16     UD2_4    ; UD2_4    ; UD2_4    ; UD2_4
+#	define UD2_64     UD2_16   ; UD2_16   ; UD2_16   ; UD2_16
+#	define UD2_256    UD2_64   ; UD2_64   ; UD2_64   ; UD2_64
+#	define UD2_1024   UD2_256  ; UD2_256  ; UD2_256  ; UD2_256
+#	define UD2_PAGE   UD2_1024 ; UD2_1024 ; UD2_1024 ; UD2_1024  
+
+	/* a leading page of unexecutable code */
+	UD2_PAGE
+
+.global VG_(trampoline_stuff_start)
+VG_(trampoline_stuff_start):
+
+.global VG_(arm64_linux_SUBST_FOR_rt_sigreturn)
+.type   VG_(arm64_linux_SUBST_FOR_rt_sigreturn),#function
+VG_(arm64_linux_SUBST_FOR_rt_sigreturn):
+	mov x8, # __NR_rt_sigreturn
+        svc #0
+        .long 0xFFFFFFFF /*illegal insn*/
+.size VG_(arm64_linux_SUBST_FOR_rt_sigreturn), \
+        .-VG_(arm64_linux_SUBST_FOR_rt_sigreturn)
+
+.global VG_(trampoline_stuff_end)
+VG_(trampoline_stuff_end):
+
+	/* and a trailing page of unexecutable code */
+	UD2_PAGE
+
+#	undef UD2_4
+#	undef UD2_16
+#	undef UD2_64
+#	undef UD2_256
+#	undef UD2_1024
+#	undef UD2_PAGE
+        
 /*---------------- x86-darwin ----------------*/
 #else
 #if defined(VGP_x86_darwin)
@@ -1148,6 +1187,8 @@
 #endif
 #endif
 #endif
+#endif
+#endif
 
 #if defined(VGO_linux)
 /* Let the linker know we don't need an executable stack */
diff --git a/coregrind/m_vki.c b/coregrind/m_vki.c
index e67c24b..1563623 100644
--- a/coregrind/m_vki.c
+++ b/coregrind/m_vki.c
@@ -42,7 +42,8 @@
 /* ppc32/64-linux determines page size at startup, hence m_vki is
    the logical place to store that info. */
 
-#if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
+#if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) \
+    || defined(VGP_arm64_linux)
 unsigned long VKI_PAGE_SHIFT = 12;
 unsigned long VKI_PAGE_SIZE  = 1UL << 12;
 #endif
diff --git a/coregrind/pub_core_aspacemgr.h b/coregrind/pub_core_aspacemgr.h
index 1708fbe..b6b4e4f 100644
--- a/coregrind/pub_core_aspacemgr.h
+++ b/coregrind/pub_core_aspacemgr.h
@@ -345,7 +345,8 @@
 // protects such stacks.
 
 #if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) \
-    || defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
+    || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
+    || defined(VGP_arm64_linux)
 # define VG_STACK_GUARD_SZB  65536  // 1 or 16 pages
 # define VG_STACK_ACTIVE_SZB (4096 * 256) // 1Mb
 #else
diff --git a/coregrind/pub_core_basics.h b/coregrind/pub_core_basics.h
index a0b9508..4e50517 100644
--- a/coregrind/pub_core_basics.h
+++ b/coregrind/pub_core_basics.h
@@ -58,6 +58,8 @@
 #  include "libvex_guest_ppc64.h"
 #elif defined(VGA_arm)
 #  include "libvex_guest_arm.h"
+#elif defined(VGA_arm64)
+#  include "libvex_guest_arm64.h"
 #elif defined(VGA_s390x)
 #  include "libvex_guest_s390x.h"
 #elif defined(VGA_mips32)
@@ -109,6 +111,11 @@
             UInt r7;
          } ARM;
          struct {
+            // FIXME ARM64 is this correct?
+            ULong x29; /* FP */
+            ULong x30; /* LR */
+         } ARM64;
+         struct {
             ULong r_fp;
             ULong r_lr;
          } S390X;
diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h
index 100f2c7..ed78c5c 100644
--- a/coregrind/pub_core_debuginfo.h
+++ b/coregrind/pub_core_debuginfo.h
@@ -112,6 +112,10 @@
 typedef
    struct { Addr r15; Addr r14; Addr r13; Addr r12; Addr r11; Addr r7; }
    D3UnwindRegs;
+#elif defined(VGA_arm64)
+typedef
+   struct { Addr pc; Addr sp; Addr lr; Addr fp; } /* PC, 31, 30, 29 */
+   D3UnwindRegs;
 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
 typedef
    UChar  /* should be void, but gcc complains at use points */
diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h
index 3666d28..3443061 100644
--- a/coregrind/pub_core_machine.h
+++ b/coregrind/pub_core_machine.h
@@ -66,6 +66,11 @@
 #  define VG_ELF_MACHINE      EM_ARM
 #  define VG_ELF_CLASS        ELFCLASS32
 #  undef  VG_PLAT_USES_PPCTOC
+#elif defined(VGP_arm64_linux)
+#  define VG_ELF_DATA2XXX     ELFDATA2LSB
+#  define VG_ELF_MACHINE      EM_AARCH64
+#  define VG_ELF_CLASS        ELFCLASS64
+#  undef  VG_PLAT_USES_PPCTOC
 #elif defined(VGO_darwin)
 #  undef  VG_ELF_DATA2XXX
 #  undef  VG_ELF_MACHINE
@@ -122,6 +127,10 @@
 #  define VG_INSTR_PTR        guest_R15T
 #  define VG_STACK_PTR        guest_R13
 #  define VG_FRAME_PTR        guest_R11
+#elif defined(VGA_arm64)
+#  define VG_INSTR_PTR        guest_PC
+#  define VG_STACK_PTR        guest_SP
+#  define VG_FRAME_PTR        guest_SP   // FIXME: is this right?
 #elif defined(VGA_s390x)
 #  define VG_INSTR_PTR        guest_IA
 #  define VG_STACK_PTR        guest_SP
diff --git a/coregrind/pub_core_mallocfree.h b/coregrind/pub_core_mallocfree.h
index 657ec7f..3267c8f 100644
--- a/coregrind/pub_core_mallocfree.h
+++ b/coregrind/pub_core_mallocfree.h
@@ -77,7 +77,8 @@
       defined(VGP_s390x_linux)  || \
       defined(VGP_mips64_linux) || \
       defined(VGP_x86_darwin)   || \
-      defined(VGP_amd64_darwin)
+      defined(VGP_amd64_darwin) || \
+      defined(VGP_arm64_linux)
 #  define VG_MIN_MALLOC_SZB       16
 #else
 #  error Unknown platform
diff --git a/coregrind/pub_core_syscall.h b/coregrind/pub_core_syscall.h
index 8774049..e35ff44 100644
--- a/coregrind/pub_core_syscall.h
+++ b/coregrind/pub_core_syscall.h
@@ -76,6 +76,7 @@
 extern SysRes VG_(mk_SysRes_ppc32_linux) ( UInt  val, UInt  cr0so );
 extern SysRes VG_(mk_SysRes_ppc64_linux) ( ULong val, ULong cr0so );
 extern SysRes VG_(mk_SysRes_arm_linux)   ( Int val );
+extern SysRes VG_(mk_SysRes_arm64_linux) ( Long val );
 extern SysRes VG_(mk_SysRes_x86_darwin)  ( UChar scclass, Bool isErr,
                                            UInt wHI, UInt wLO );
 extern SysRes VG_(mk_SysRes_amd64_darwin)( UChar scclass, Bool isErr,
diff --git a/coregrind/pub_core_threadstate.h b/coregrind/pub_core_threadstate.h
index 7a9611b..694dddc 100644
--- a/coregrind/pub_core_threadstate.h
+++ b/coregrind/pub_core_threadstate.h
@@ -88,6 +88,8 @@
    typedef VexGuestPPC64State VexGuestArchState;
 #elif defined(VGA_arm)
    typedef VexGuestARMState   VexGuestArchState;
+#elif defined(VGA_arm64)
+   typedef VexGuestARM64State VexGuestArchState;
 #elif defined(VGA_s390x)
    typedef VexGuestS390XState VexGuestArchState;
 #elif defined(VGA_mips32)
diff --git a/coregrind/pub_core_trampoline.h b/coregrind/pub_core_trampoline.h
index 5134184..39baac6 100644
--- a/coregrind/pub_core_trampoline.h
+++ b/coregrind/pub_core_trampoline.h
@@ -103,6 +103,10 @@
 extern void* VG_(arm_linux_REDIR_FOR_memcpy)( void*, void*, Int );
 #endif
 
+#if defined(VGP_arm64_linux)
+extern Addr  VG_(arm64_linux_SUBST_FOR_rt_sigreturn);
+#endif
+
 #if defined(VGP_x86_darwin)
 extern Addr  VG_(x86_darwin_SUBST_FOR_sigreturn);
 extern SizeT VG_(x86_darwin_REDIR_FOR_strlen)( void* );
diff --git a/coregrind/pub_core_transtab_asm.h b/coregrind/pub_core_transtab_asm.h
index ab3acc1..2a3f0f1 100644
--- a/coregrind/pub_core_transtab_asm.h
+++ b/coregrind/pub_core_transtab_asm.h
@@ -37,10 +37,10 @@
    On x86/amd64, the cache index is computed as
    'address[VG_TT_FAST_BITS-1 : 0]'.
 
-   On ppc32/ppc64, the bottom two bits of instruction addresses are
-   zero, which means that function causes only 1/4 of the entries to
-   ever be used.  So instead the function is '(address >>u
-   2)[VG_TT_FAST_BITS-1 : 0]' on those targets.
+   On ppc32/ppc64/mips32/mips64/arm64, the bottom two bits of
+   instruction addresses are zero, which means that function causes
+   only 1/4 of the entries to ever be used.  So instead the function
+   is '(address >>u 2)[VG_TT_FAST_BITS-1 : 0]' on those targets.
 
    On ARM we shift by 1, since Thumb insns can be of size 2, hence to
    minimise collisions and maximise cache utilisation we need to take
@@ -63,7 +63,7 @@
 #  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr)) >> 1) & VG_TT_FAST_MASK)
 
 #elif defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_mips32) \
-      || defined(VGA_mips64)
+      || defined(VGA_mips64) || defined(VGA_arm64)
 #  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr)) >> 2) & VG_TT_FAST_MASK)
 
 #else
diff --git a/docs/internals/register-uses.txt b/docs/internals/register-uses.txt
index 16f7e60..9e946e2 100644
--- a/docs/internals/register-uses.txt
+++ b/docs/internals/register-uses.txt
@@ -134,6 +134,46 @@
 often isn't enough.
 
 
+arm64-linux
+~~~~~~~~~~~
+
+Reg        Callee     Arg
+Name       Saves?     Reg?       Comment              Vex-uses?
+---------------------------------------------------------------
+r0                    int#0      ret#0 (??)
+r1                    int#1      ret#1 (??)
+r2-7                  int#2..7 
+r8                              "Indirect res loc reg"
+r9                              "Temporary regs"      chaining scratch
+r10-15                          "Temporary regs"      avail
+r16(IP0)
+r17(IP1)
+r18                             "Platform reg"
+r19-20     maybe                "Temporary regs"
+r21        y                    "Callee saved"        GSP
+r22-28     y                    "Callee saved"
+r29(FP)    y
+r30(LR)    y
+
+NZCV                            "Status register"
+
+Is there a TLS register?
+
+x21 is the GSP.  x9 is a scratch chaining/spill temp.  Neither
+are available to the register allocator.
+
+Q registers:
+It's a little awkward. Basically, D registers are the same as ARM,
+so d0-d7 and d16-d31 are caller-saved, but d8-d15 are callee-saved.
+
+Q registers are the same, except that the upper 64 bits of q8-q15
+are caller-saved.
+
+The idea is that you only need to preserve D registers, not Q
+registers.
+
+
+
 s390x-linux
 ~~~~~~~~~~~
 
diff --git a/drd/drd_bitmap.h b/drd/drd_bitmap.h
index fa5d54e..939afe4 100644
--- a/drd/drd_bitmap.h
+++ b/drd/drd_bitmap.h
@@ -140,7 +140,7 @@
     || defined(VGA_mips32)
 #define BITS_PER_BITS_PER_UWORD 5
 #elif defined(VGA_amd64) || defined(VGA_ppc64) || defined(VGA_s390x) \
-      || defined(VGA_mips64)
+      || defined(VGA_mips64) || defined(VGA_arm64)
 #define BITS_PER_BITS_PER_UWORD 6
 #else
 #error Unknown platform.
diff --git a/drd/drd_load_store.c b/drd/drd_load_store.c
index 3ae0b34..005f728 100644
--- a/drd/drd_load_store.c
+++ b/drd/drd_load_store.c
@@ -47,6 +47,8 @@
 #define STACK_POINTER_OFFSET OFFSET_ppc64_GPR1
 #elif defined(VGA_arm)
 #define STACK_POINTER_OFFSET OFFSET_arm_R13
+#elif defined(VGA_arm64)
+#define STACK_POINTER_OFFSET OFFSET_arm64_SP
 #elif defined(VGA_s390x)
 #define STACK_POINTER_OFFSET OFFSET_s390x_r15
 #elif defined(VGA_mips32)
diff --git a/include/pub_tool_basics.h b/include/pub_tool_basics.h
index ce977f3..2236d00 100644
--- a/include/pub_tool_basics.h
+++ b/include/pub_tool_basics.h
@@ -269,7 +269,8 @@
 #undef VG_LITTLEENDIAN
 
 #if defined(VGA_x86) || defined(VGA_amd64) || defined (VGA_arm) \
-    || ((defined(VGA_mips32) || defined(VGA_mips64)) && defined (_MIPSEL))
+    || ((defined(VGA_mips32) || defined(VGA_mips64)) && defined (_MIPSEL)) \
+    || defined(VGA_arm64)
 #  define VG_LITTLEENDIAN 1
 #elif defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_s390x) \
       || ((defined(VGA_mips32) || defined(VGA_mips64)) && defined (_MIPSEB))
@@ -283,7 +284,8 @@
 #  define VG_REGPARM(n)            __attribute__((regparm(n)))
 #elif defined(VGA_amd64) || defined(VGA_ppc32) \
       || defined(VGA_ppc64) || defined(VGA_arm) || defined(VGA_s390x) \
-      || defined(VGA_mips32) || defined(VGA_mips64)
+      || defined(VGA_mips32) || defined(VGA_mips64) \
+      || defined(VGA_arm64)
 #  define VG_REGPARM(n)            /* */
 #else
 #  error Unknown arch
diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h
index 410d347..e956419 100644
--- a/include/pub_tool_machine.h
+++ b/include/pub_tool_machine.h
@@ -67,6 +67,12 @@
 #  define VG_CLREQ_SZB             20
 #  define VG_STACK_REDZONE_SZB      0
 
+#elif defined(VGP_arm64_linux)
+#  define VG_MIN_INSTR_SZB          4
+#  define VG_MAX_INSTR_SZB          4 
+#  define VG_CLREQ_SZB             20
+#  define VG_STACK_REDZONE_SZB      0
+
 #elif defined(VGP_s390x_linux)
 #  define VG_MIN_INSTR_SZB          2
 #  define VG_MAX_INSTR_SZB          6
@@ -124,14 +130,6 @@
                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
                             /*SRC*/const UChar* src );
 
-// Sets the shadow values for the syscall return value register(s).
-// This is platform specific.
-void VG_(set_syscall_return_shadows) ( ThreadId tid,
-                                       /* shadow vals for the result */
-                                       UWord s1res, UWord s2res,
-                                       /* shadow vals for the error val */
-                                       UWord s1err, UWord s2err );
-
 // Apply a function 'f' to all the general purpose registers in all the
 // current threads. This is all live threads, or (when the process is exiting)
 // all threads that were instructed to die by the thread calling exit.
diff --git a/include/pub_tool_vkiscnums_asm.h b/include/pub_tool_vkiscnums_asm.h
index eccb664..6f84651 100644
--- a/include/pub_tool_vkiscnums_asm.h
+++ b/include/pub_tool_vkiscnums_asm.h
@@ -51,6 +51,9 @@
 #elif defined(VGP_arm_linux)
 #  include "vki/vki-scnums-arm-linux.h"
 
+#elif defined(VGP_arm64_linux)
+#  include "vki/vki-scnums-arm64-linux.h"
+
 #elif defined(VGP_mips32_linux)
 #  include "vki/vki-scnums-mips32-linux.h"
 
diff --git a/include/valgrind.h b/include/valgrind.h
index 3c2098c..9c86548 100644
--- a/include/valgrind.h
+++ b/include/valgrind.h
@@ -117,6 +117,7 @@
 #undef PLAT_ppc32_linux
 #undef PLAT_ppc64_linux
 #undef PLAT_arm_linux
+#undef PLAT_arm64_linux
 #undef PLAT_s390x_linux
 #undef PLAT_mips32_linux
 #undef PLAT_mips64_linux
@@ -139,16 +140,16 @@
 #  define PLAT_ppc32_linux 1
 #elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__)
 #  define PLAT_ppc64_linux 1
-#elif defined(__linux__) && defined(__arm__)
+#elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
 #  define PLAT_arm_linux 1
+#elif defined(__linux__) && defined(__aarch64__) && !defined(__arm__)
+#  define PLAT_arm64_linux 1
 #elif defined(__linux__) && defined(__s390__) && defined(__s390x__)
 #  define PLAT_s390x_linux 1
-#elif defined(__linux__) && defined(__mips__)
-#if (__mips==64)
+#elif defined(__linux__) && defined(__mips__) && (__mips==64)
 #  define PLAT_mips64_linux 1
-#else
+#elif defined(__linux__) && defined(__mips__) && (__mips!=64)
 #  define PLAT_mips32_linux 1
-#endif
 #else
 /* If we're not compiling for our target platform, don't generate
    any inline asms.  */
@@ -654,6 +655,74 @@
 
 #endif /* PLAT_arm_linux */
 
+/* ------------------------ arm64-linux ------------------------- */
+
+#if defined(PLAT_arm64_linux)
+
+typedef
+   struct { 
+      unsigned long long int nraddr; /* where's the code? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+            "ror x12, x12, #3  ;  ror x12, x12, #13 \n\t"         \
+            "ror x12, x12, #51 ;  ror x12, x12, #61 \n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
+        _zzq_default, _zzq_request,                               \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                  \
+  __extension__                                                   \
+  ({volatile unsigned long long int  _zzq_args[6];                \
+    volatile unsigned long long int  _zzq_result;                 \
+    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
+    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
+    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
+    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
+    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
+    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
+    __asm__ volatile("mov x3, %1\n\t" /*default*/                 \
+                     "mov x4, %2\n\t" /*ptr*/                     \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* X3 = client_request ( X4 ) */             \
+                     "orr x10, x10, x10\n\t"                      \
+                     "mov %0, x3"     /*result*/                  \
+                     : "=r" (_zzq_result)                         \
+                     : "r" (_zzq_default), "r" (&_zzq_args[0])    \
+                     : "cc","memory", "x3", "x4");                \
+    _zzq_result;                                                  \
+  })
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    unsigned long long int __addr;                                \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* X3 = guest_NRADDR */                      \
+                     "orr x11, x11, x11\n\t"                      \
+                     "mov %0, x3"                                 \
+                     : "=r" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory", "x3"                       \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+  }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                    \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* branch-and-link-to-noredir X8 */          \
+                     "orr x12, x12, x12\n\t"
+
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "orr x9, x9, x9\n\t"                        \
+                     : : : "cc", "memory"                        \
+                    );                                           \
+ } while (0)
+
+#endif /* PLAT_arm64_linux */
+
 /* ------------------------ s390x-linux ------------------------ */
 
 #if defined(PLAT_s390x_linux)
@@ -3470,6 +3539,143 @@
 
 #endif /* PLAT_arm_linux */
 
+/* ------------------------ arm64-linux ------------------------ */
+
+#if defined(PLAT_arm64_linux)
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+     "x0", "x1", "x2", "x3","x4", "x5", "x6", "x7", "x8", "x9",   \
+     "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17",      \
+     "x18", "x19", "x20",                                         \
+     "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",  \
+     "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",      \
+     "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",      \
+     "v26", "v27", "v28", "v29", "v30", "v31"
+
+#define VALGRIND_ALIGN_STACK   /* FIXME! */
+#define VALGRIND_RESTORE_STACK /* FIXME! */
+
+/* These CALL_FN_ macros assume that on arm64-linux,
+   sizeof(unsigned long) == 8. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[1];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0\n"                                           \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[2];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0\n"                                           \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS/*, "r10"*/     \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0\n"                                           \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS/*, "r10"*/     \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[4];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0\n"                                           \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS/*, "r10"*/     \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[5];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS/*, "r10"*/     \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_arm64_linux */
+
 /* ------------------------- s390x-linux ------------------------- */
 
 #if defined(PLAT_s390x_linux)
diff --git a/include/vki/vki-arm64-linux.h b/include/vki/vki-arm64-linux.h
new file mode 100644
index 0000000..277e632
--- /dev/null
+++ b/include/vki/vki-arm64-linux.h
@@ -0,0 +1,678 @@
+
+/*--------------------------------------------------------------------*/
+/*--- ARM64/Linux-specific kernel interface.     vki-arm64-linux.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2013-2013 OpenWorks
+      info@open-works.net
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VKI_ARM64_LINUX_H
+#define __VKI_ARM64_LINUX_H
+
+// ARM64 is little-endian.
+#define VKI_LITTLE_ENDIAN  1
+
+//----------------------------------------------------------------------
+// From linux-3.9.9/include/uapi/asm-generic/int-ll64.h
+//----------------------------------------------------------------------
+
+typedef unsigned char __vki_u8;
+
+typedef __signed__ short __vki_s16;
+typedef unsigned short __vki_u16;
+
+typedef __signed__ int __vki_s32;
+typedef unsigned int __vki_u32;
+
+typedef __signed__ long long __vki_s64;
+typedef unsigned long long __vki_u64;
+
+typedef unsigned short vki_u16;
+
+typedef unsigned int vki_u32;
+
+//----------------------------------------------------------------------
+// From linux-3.9.9/arch/arm64/include/asm/page.h
+//----------------------------------------------------------------------
+
+/* Looks like arm64 can do both 4k and 64k pages, so we
+   use the at-startup detection scheme that ppc32/64 do. */
+extern UWord VKI_PAGE_SHIFT;
+extern UWord VKI_PAGE_SIZE;
+#define VKI_MAX_PAGE_SHIFT	16
+#define VKI_MAX_PAGE_SIZE	(1UL << VKI_MAX_PAGE_SHIFT)
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/arch/arm64/include/asm/shmparam.h
+//----------------------------------------------------------------------
+
+// Trying to make sense of this .. it seems as if, for doing
+// shared memory with 64 bit processes, VKI_PAGE_SIZE is good
+// enough.  But if sharing with a 32 bit process then we need
+// the old-style 16k value (4 * VKI_PAGE_SIZE) to be safe.
+// (From reading between the lines of arch/arm64/include/asm/shmparam.h)
+#define VKI_SHMLBA  (4 * VKI_PAGE_SIZE)
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/signal.h
+//----------------------------------------------------------------------
+
+#define _VKI_NSIG	64
+#define _VKI_NSIG_BPW	64
+#define _VKI_NSIG_WORDS	(_VKI_NSIG / _VKI_NSIG_BPW)
+
+typedef unsigned long vki_old_sigset_t;
+
+typedef struct {
+	unsigned long sig[_VKI_NSIG_WORDS];
+} vki_sigset_t;
+
+#define VKI_SIGHUP		 1
+#define VKI_SIGINT		 2
+#define VKI_SIGQUIT		 3
+#define VKI_SIGILL		 4
+#define VKI_SIGTRAP		 5
+#define VKI_SIGABRT		 6
+#define VKI_SIGBUS		 7
+#define VKI_SIGFPE		 8
+#define VKI_SIGKILL		 9
+#define VKI_SIGUSR1		10
+#define VKI_SIGSEGV		11
+#define VKI_SIGUSR2		12
+#define VKI_SIGPIPE		13
+#define VKI_SIGALRM		14
+#define VKI_SIGTERM		15
+#define VKI_SIGSTKFLT		16
+#define VKI_SIGCHLD		17
+#define VKI_SIGCONT		18
+#define VKI_SIGSTOP		19
+#define VKI_SIGTSTP		20
+#define VKI_SIGTTIN		21
+#define VKI_SIGTTOU		22
+#define VKI_SIGURG		23
+#define VKI_SIGXCPU		24
+#define VKI_SIGXFSZ		25
+#define VKI_SIGVTALRM		26
+#define VKI_SIGPROF		27
+#define VKI_SIGWINCH		28
+#define VKI_SIGIO		29
+#define VKI_SIGPWR		30
+#define VKI_SIGSYS		31
+#define	VKI_SIGUNUSED		31
+
+#define VKI_SIGRTMIN		32
+#define VKI_SIGRTMAX		_VKI_NSIG
+
+#define VKI_SA_NOCLDSTOP	0x00000001
+#define VKI_SA_NOCLDWAIT	0x00000002
+#define VKI_SA_SIGINFO		0x00000004
+#define VKI_SA_ONSTACK		0x08000000
+#define VKI_SA_RESTART		0x10000000
+#define VKI_SA_NODEFER		0x40000000
+#define VKI_SA_RESETHAND	0x80000000
+
+#define VKI_SA_NOMASK	VKI_SA_NODEFER
+#define VKI_SA_ONESHOT	VKI_SA_RESETHAND
+
+// This is obsolete and should not be defined for new archs
+#define VKI_SA_RESTORER	0x04000000
+
+#define VKI_SS_ONSTACK	1
+#define VKI_SS_DISABLE	2
+
+#define VKI_MINSIGSTKSZ	2048
+
+#define VKI_SIG_BLOCK          0	/* for blocking signals */
+#define VKI_SIG_UNBLOCK        1	/* for unblocking signals */
+#define VKI_SIG_SETMASK        2	/* for setting the signal mask */
+
+typedef void __vki_signalfn_t(int);
+typedef __vki_signalfn_t __user *__vki_sighandler_t;
+
+typedef void __vki_restorefn_t(void);
+typedef __vki_restorefn_t __user *__vki_sigrestore_t;
+
+#define VKI_SIG_DFL	((__vki_sighandler_t)0)	/* default signal handling */
+#define VKI_SIG_IGN	((__vki_sighandler_t)1)	/* ignore signal */
+
+struct vki_sigaction_base {
+        // [[Nb: a 'k' prefix is added to "sa_handler" because
+        // bits/sigaction.h (which gets dragged in somehow via signal.h)
+        // #defines it as something else.  Since that is done for glibc's
+        // purposes, which we don't care about here, we use our own name.]]
+	__vki_sighandler_t ksa_handler;
+	unsigned long sa_flags;
+        __vki_sigrestore_t sa_restorer; // I don't think arm64 has this
+	vki_sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+/* On Linux we use the same type for passing sigactions to
+   and from the kernel.  Hence: */
+typedef  struct vki_sigaction_base  vki_sigaction_toK_t;
+typedef  struct vki_sigaction_base  vki_sigaction_fromK_t;
+
+
+typedef struct vki_sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	vki_size_t ss_size;
+} vki_stack_t;
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/arch/arm64/include/uapi/asm/sigcontext.h
+//----------------------------------------------------------------------
+
+struct vki_sigcontext {
+        __vki_u64 fault_address;
+        /* AArch64 registers */
+        __vki_u64 regs[31];
+        __vki_u64 sp;
+        __vki_u64 pc;
+        __vki_u64 pstate;
+        /* 4K reserved for FP/SIMD state and future expansion */
+        __vki_u8 __reserved[4096] __attribute__((__aligned__(16)));
+};
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/uapi/include/asm-generic/mman-common.h
+//----------------------------------------------------------------------
+
+#define VKI_PROT_READ	0x1		/* page can be read */
+#define VKI_PROT_WRITE	0x2		/* page can be written */
+#define VKI_PROT_EXEC	0x4		/* page can be executed */
+#define VKI_PROT_NONE	0x0		/* page can not be accessed */
+#define VKI_PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define VKI_PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define VKI_MAP_SHARED	0x01		/* Share changes */
+#define VKI_MAP_PRIVATE	0x02		/* Changes are private */
+#define VKI_MAP_FIXED	0x10		/* Interpret addr exactly */
+#define VKI_MAP_ANONYMOUS	0x20	/* don't use a file */
+#define VKI_MAP_NORESERVE       0x4000  /* don't check for reservations */
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/uapi/include/asm-generic/fcntl.h
+//----------------------------------------------------------------------
+
+#define VKI_O_ACCMODE	     03
+#define VKI_O_RDONLY	     00
+#define VKI_O_WRONLY	     01
+#define VKI_O_RDWR	     02
+#define VKI_O_CREAT	   0100	/* not fcntl */
+#define VKI_O_EXCL	   0200	/* not fcntl */
+#define VKI_O_TRUNC	  01000	/* not fcntl */
+#define VKI_O_APPEND	  02000
+#define VKI_O_NONBLOCK	  04000
+#define VKI_O_LARGEFILE	0100000
+
+#define VKI_AT_FDCWD            -100
+
+#define VKI_F_DUPFD		0	/* dup */
+#define VKI_F_GETFD		1	/* get close_on_exec */
+#define VKI_F_SETFD		2	/* set/clear close_on_exec */
+#define VKI_F_GETFL		3	/* get file->f_flags */
+#define VKI_F_SETFL		4	/* set file->f_flags */
+#define VKI_F_GETLK		5
+#define VKI_F_SETLK		6
+#define VKI_F_SETLKW		7
+
+#define VKI_F_SETOWN		8	/*  for sockets. */
+#define VKI_F_GETOWN		9	/*  for sockets. */
+#define VKI_F_SETSIG		10	/*  for sockets. */
+#define VKI_F_GETSIG		11	/*  for sockets. */
+
+#define VKI_F_SETOWN_EX		15
+#define VKI_F_GETOWN_EX		16
+
+#define VKI_F_OWNER_TID		0
+#define VKI_F_OWNER_PID		1
+#define VKI_F_OWNER_PGRP	2
+
+struct vki_f_owner_ex {
+	int	type;
+	__vki_kernel_pid_t	pid;
+};
+
+#define VKI_FD_CLOEXEC	1	/* actually anything with low bit set goes */
+
+#define VKI_F_LINUX_SPECIFIC_BASE	1024
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/resource.h
+//----------------------------------------------------------------------
+
+#define VKI_RLIMIT_DATA		2	/* max data size */
+#define VKI_RLIMIT_STACK	3	/* max stack size */
+#define VKI_RLIMIT_CORE		4	/* max core file size */
+#define VKI_RLIMIT_NOFILE	7	/* max number of open files */
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/socket.h
+//----------------------------------------------------------------------
+
+#define VKI_SOL_SOCKET	1
+
+#define VKI_SO_TYPE	3
+
+#define VKI_SO_ATTACH_FILTER	26
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/sockios.h
+//----------------------------------------------------------------------
+
+#define VKI_SIOCSPGRP		0x8902
+#define VKI_SIOCGPGRP		0x8904
+#define VKI_SIOCGSTAMP		0x8906		/* Get stamp (timeval) */
+#define VKI_SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/stat.h
+//----------------------------------------------------------------------
+
+struct vki_stat {
+        unsigned long   st_dev;
+        unsigned long   st_ino;
+        unsigned int    st_mode;
+        unsigned int    st_nlink;
+        unsigned int    st_uid;
+        unsigned int    st_gid;
+        unsigned long   st_rdev;
+        unsigned long   __pad1;
+        long            st_size;
+        int             st_blksize;
+        int             __pad2;
+        long            st_blocks;
+        long            st_atime;
+        unsigned long   st_atime_nsec;
+        long            st_mtime;
+        unsigned long   st_mtime_nsec;
+        long            st_ctime;
+        unsigned long   st_ctime_nsec;
+        unsigned int    __unused4;
+        unsigned int    __unused5;
+};
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/statfs.h
+//----------------------------------------------------------------------
+
+struct vki_statfs {
+	long f_type;
+	long f_bsize;
+	long f_blocks;
+	long f_bfree;
+	long f_bavail;
+	long f_files;
+	long f_ffree;
+	__vki_kernel_fsid_t f_fsid;
+	long f_namelen;
+	long f_frsize;
+	long f_flags;
+	long f_spare[4];
+};
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/termios.h
+//----------------------------------------------------------------------
+
+struct vki_winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define VKI_NCC 8
+struct vki_termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[VKI_NCC];	/* control characters */
+};
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/termbits.h
+//----------------------------------------------------------------------
+
+typedef unsigned char	vki_cc_t;
+typedef unsigned int	vki_tcflag_t;
+
+#define VKI_NCCS 19
+struct vki_termios {
+	vki_tcflag_t c_iflag;		/* input mode flags */
+	vki_tcflag_t c_oflag;		/* output mode flags */
+	vki_tcflag_t c_cflag;		/* control mode flags */
+	vki_tcflag_t c_lflag;		/* local mode flags */
+	vki_cc_t c_line;		/* line discipline */
+	vki_cc_t c_cc[VKI_NCCS];	/* control characters */
+};
+
+//----------------------------------------------------------------------
+// From linux-3.9.9/include/uapi/asm-generic/ioctl.h
+//----------------------------------------------------------------------
+
+#define _VKI_IOC_NRBITS		8
+#define _VKI_IOC_TYPEBITS	8
+#define _VKI_IOC_SIZEBITS	14
+#define _VKI_IOC_DIRBITS	2
+
+#define _VKI_IOC_SIZEMASK	((1 << _VKI_IOC_SIZEBITS)-1)
+#define _VKI_IOC_DIRMASK	((1 << _VKI_IOC_DIRBITS)-1)
+
+#define _VKI_IOC_NRSHIFT	0
+#define _VKI_IOC_TYPESHIFT	(_VKI_IOC_NRSHIFT+_VKI_IOC_NRBITS)
+#define _VKI_IOC_SIZESHIFT	(_VKI_IOC_TYPESHIFT+_VKI_IOC_TYPEBITS)
+#define _VKI_IOC_DIRSHIFT	(_VKI_IOC_SIZESHIFT+_VKI_IOC_SIZEBITS)
+
+#define _VKI_IOC_NONE	0U
+#define _VKI_IOC_WRITE	1U
+#define _VKI_IOC_READ	2U
+
+#define _VKI_IOC(dir,type,nr,size) \
+	(((dir)  << _VKI_IOC_DIRSHIFT) | \
+	 ((type) << _VKI_IOC_TYPESHIFT) | \
+	 ((nr)   << _VKI_IOC_NRSHIFT) | \
+	 ((size) << _VKI_IOC_SIZESHIFT))
+
+#define _VKI_IO(type,nr)	_VKI_IOC(_VKI_IOC_NONE,(type),(nr),0)
+#define _VKI_IOR(type,nr,size)	_VKI_IOC(_VKI_IOC_READ,(type),(nr),sizeof(size))
+#define _VKI_IOW(type,nr,size)	_VKI_IOC(_VKI_IOC_WRITE,(type),(nr),sizeof(size))
+#define _VKI_IOWR(type,nr,size)	_VKI_IOC(_VKI_IOC_READ|_VKI_IOC_WRITE,(type),(nr),sizeof(size))
+
+#define _VKI_IOC_DIR(nr)		(((nr) >> _VKI_IOC_DIRSHIFT) & _VKI_IOC_DIRMASK)
+#define _VKI_IOC_SIZE(nr)		(((nr) >> _VKI_IOC_SIZESHIFT) & _VKI_IOC_SIZEMASK)
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/ioctls.h
+//----------------------------------------------------------------------
+
+#define VKI_TCGETS	0x5401
+#define VKI_TCSETS	0x5402
+#define VKI_TCSETSW	0x5403
+#define VKI_TCSETSF	0x5404
+#define VKI_TCGETA	0x5405
+#define VKI_TCSETA	0x5406
+#define VKI_TCSETAW	0x5407
+#define VKI_TCSETAF	0x5408
+#define VKI_TCSBRK	0x5409
+#define VKI_TCXONC	0x540A
+#define VKI_TCFLSH	0x540B
+#define VKI_TIOCSCTTY	0x540E
+#define VKI_TIOCGPGRP	0x540F
+#define VKI_TIOCSPGRP	0x5410
+#define VKI_TIOCOUTQ	0x5411
+#define VKI_TIOCGWINSZ	0x5413
+#define VKI_TIOCSWINSZ	0x5414
+#define VKI_TIOCMGET	0x5415
+#define VKI_TIOCMBIS	0x5416
+#define VKI_TIOCMBIC	0x5417
+#define VKI_TIOCMSET	0x5418
+#define VKI_FIONREAD	0x541B
+#define VKI_TIOCLINUX	0x541C
+#define VKI_FIONBIO	0x5421
+#define VKI_TCSBRKP	0x5425
+#define VKI_TIOCGPTN	_VKI_IOR('T',0x30, unsigned int)
+#define VKI_TIOCSPTLCK	_VKI_IOW('T',0x31, int)
+
+#define VKI_FIONCLEX    0x5450
+#define VKI_FIOCLEX     0x5451
+#define VKI_FIOASYNC	0x5452
+#define VKI_TIOCSERGETLSR   0x5459
+
+#define VKI_TIOCGICOUNT	0x545D
+
+//----------------------------------------------------------------------
+// From linux-3.9.9/include/uapi/asm-generic/poll.h
+//----------------------------------------------------------------------
+
+#define VKI_POLLIN		0x0001
+
+struct vki_pollfd {
+	int fd;
+	short events;
+	short revents;
+};
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/arch/arm64/include/uapi/asm/ptrace.h
+//----------------------------------------------------------------------
+
+//ZZ struct vki_user_i387_struct {
+//ZZ 	unsigned short	cwd;
+//ZZ 	unsigned short	swd;
+//ZZ 	unsigned short	twd; /* Note this is not the same as the 32bit/x87/FSAVE twd */
+//ZZ 	unsigned short	fop;
+//ZZ 	__vki_u64	rip;
+//ZZ 	__vki_u64	rdp;
+//ZZ 	__vki_u32	mxcsr;
+//ZZ 	__vki_u32	mxcsr_mask;
+//ZZ 	__vki_u32	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
+//ZZ 	__vki_u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg = 256 bytes */
+//ZZ 	__vki_u32	padding[24];
+//ZZ };
+//ZZ 
+//ZZ struct vki_user_regs_struct {
+//ZZ 	unsigned long r15,r14,r13,r12,rbp,rbx,r11,r10;
+//ZZ 	unsigned long r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
+//ZZ 	unsigned long rip,cs,eflags;
+//ZZ 	unsigned long rsp,ss;
+//ZZ   	unsigned long fs_base, gs_base;
+//ZZ 	unsigned long ds,es,fs,gs; 
+//ZZ }; 
+
+struct vki_user_pt_regs {
+        __vki_u64           regs[31];
+        __vki_u64           sp;
+        __vki_u64           pc;
+        __vki_u64           pstate;
+};
+
+/* I think that the new name in the kernel for these is "user_pt_regs"
+   and the old name is "user_regs_struct".  Unfortunately can't clone
+   a 'struct' type using 'typedef' and still have a 'struct' type, so
+   use a blunter instrument instead. */
+#define vki_user_regs_struct vki_user_pt_regs
+
+struct vki_user_fpsimd_state {
+        __uint128_t     vregs[32];
+        __vki_u32           fpsr;
+        __vki_u32           fpcr;
+};
+
+//----------------------------------------------------------------------
+// From linux-3.9.9/arch/arm64/include/asm/elf.h
+//----------------------------------------------------------------------
+
+typedef unsigned long vki_elf_greg_t;
+
+#define VKI_ELF_NGREG (sizeof (struct vki_user_pt_regs) / sizeof(vki_elf_greg_t))
+typedef vki_elf_greg_t vki_elf_gregset_t[VKI_ELF_NGREG];
+
+typedef struct vki_user_fpsimd_state vki_elf_fpregset_t;
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/ucontext.h
+//----------------------------------------------------------------------
+
+struct vki_ucontext {
+	unsigned long		uc_flags;
+	struct vki_ucontext    *uc_link;
+	vki_stack_t		uc_stack;
+	struct vki_sigcontext	uc_mcontext;
+	vki_sigset_t		uc_sigmask;
+};
+
+//ZZ //----------------------------------------------------------------------
+//ZZ // From linux-2.6.9/include/asm-x86_64/segment.h
+//ZZ //----------------------------------------------------------------------
+//ZZ 
+//ZZ #define VKI_GDT_ENTRY_TLS_ENTRIES 3
+//ZZ 
+//ZZ #define VKI_GDT_ENTRY_TLS_MIN 11
+//ZZ #define VKI_GDT_ENTRY_TLS_MAX 13
+//ZZ 
+//ZZ //----------------------------------------------------------------------
+//ZZ // From linux-2.6.11.9/include/asm-x86_64/prctl.h
+//ZZ //----------------------------------------------------------------------
+//ZZ 
+//ZZ #define VKI_ARCH_SET_GS 0x1001
+//ZZ #define VKI_ARCH_SET_FS 0x1002
+//ZZ #define VKI_ARCH_GET_FS 0x1003
+//ZZ #define VKI_ARCH_GET_GS 0x1004
+//ZZ 
+//ZZ //----------------------------------------------------------------------
+//ZZ // From linux-2.6.9/include/asm-x86_64/ldt.h
+//ZZ //----------------------------------------------------------------------
+//ZZ 
+//ZZ // I think this LDT stuff will have to be reinstated for amd64, but I'm not
+//ZZ // certain.  (Nb: The sys_arch_prctl seems to have replaced
+//ZZ // [gs]et_thread_area syscalls.)
+//ZZ //
+//ZZ // Note that the type here is very slightly different to the
+//ZZ // type for x86 (the final 'lm' field is added);  I'm not sure about the
+//ZZ // significance of that... --njn
+//ZZ 
+//ZZ /* [[Nb: This is the structure passed to the modify_ldt syscall.  Just so as
+//ZZ    to confuse and annoy everyone, this is _not_ the same as an
+//ZZ    VgLdtEntry and has to be translated into such.  The logic for doing
+//ZZ    so, in vg_ldt.c, is copied from the kernel sources.]] */
+//ZZ /* Note also that a comment in ldt.h indicates that the below
+//ZZ    contains several fields ignored on 64bit, and that modify_ldt
+//ZZ    is rather for 32bit. */
+//ZZ struct vki_user_desc {
+//ZZ 	unsigned int  entry_number;
+//ZZ 	unsigned long base_addr;
+//ZZ 	unsigned int  limit;
+//ZZ 	unsigned int  seg_32bit:1;
+//ZZ 	unsigned int  contents:2;
+//ZZ 	unsigned int  read_exec_only:1;
+//ZZ 	unsigned int  limit_in_pages:1;
+//ZZ 	unsigned int  seg_not_present:1;
+//ZZ 	unsigned int  useable:1;
+//ZZ         unsigned int  lm:1;
+//ZZ };
+//ZZ 
+//ZZ // [[Nb: for our convenience within Valgrind, use a more specific name]]
+//ZZ typedef struct vki_user_desc vki_modify_ldt_t;
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/asm-generic/ipcbuf.h
+//----------------------------------------------------------------------
+
+struct vki_ipc64_perm
+{
+	__vki_kernel_key_t	key;
+	__vki_kernel_uid32_t	uid;
+	__vki_kernel_gid32_t	gid;
+	__vki_kernel_uid32_t	cuid;
+	__vki_kernel_gid32_t	cgid;
+	__vki_kernel_mode_t	mode;
+        unsigned char           __pad1[4 - sizeof(__vki_kernel_mode_t)];
+	unsigned short		seq;
+	unsigned short		__pad2;
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/sembuf.h
+//----------------------------------------------------------------------
+
+struct vki_semid64_ds {
+	struct vki_ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+	__vki_kernel_time_t	sem_otime;		/* last semop time */
+	__vki_kernel_time_t	sem_ctime;		/* last change time */
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/msgbuf.h
+//----------------------------------------------------------------------
+
+struct vki_msqid64_ds {
+	struct vki_ipc64_perm msg_perm;
+	__vki_kernel_time_t msg_stime;	/* last msgsnd time */
+	__vki_kernel_time_t msg_rtime;	/* last msgrcv time */
+	__vki_kernel_time_t msg_ctime;	/* last change time */
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__vki_kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__vki_kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+//----------------------------------------------------------------------
+// From linux-3.10.5/include/uapi/asm-generic/shmbuf.h
+//----------------------------------------------------------------------
+
+struct vki_shmid64_ds {
+	struct vki_ipc64_perm	shm_perm;	/* operation perms */
+	vki_size_t		shm_segsz;	/* size of segment (bytes) */
+	__vki_kernel_time_t	shm_atime;	/* last attach time */
+	__vki_kernel_time_t	shm_dtime;	/* last detach time */
+	__vki_kernel_time_t	shm_ctime;	/* last change time */
+	__vki_kernel_pid_t	shm_cpid;	/* pid of creator */
+	__vki_kernel_pid_t	shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+struct vki_shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+//----------------------------------------------------------------------
+// From linux-3.9.9/arch/arm64/include/asm/ptrace.h
+//----------------------------------------------------------------------
+
+#define VKI_PTRACE_GETREGS            12
+#define VKI_PTRACE_SETREGS            13
+//#define VKI_PTRACE_GETFPREGS          14
+//#define VKI_PTRACE_SETFPREGS          15
+
+//----------------------------------------------------------------------
+// And that's it!
+//----------------------------------------------------------------------
+ 
+#endif // __VKI_ARM64_LINUX_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/vki/vki-linux.h b/include/vki/vki-linux.h
index b78c9f5..14bb8bf 100644
--- a/include/vki/vki-linux.h
+++ b/include/vki/vki-linux.h
@@ -89,6 +89,8 @@
 #  include "vki-posixtypes-ppc64-linux.h"
 #elif defined(VGA_arm)
 #  include "vki-posixtypes-arm-linux.h"
+#elif defined(VGA_arm64)
+#  include "vki-posixtypes-arm64-linux.h"
 #elif defined(VGA_s390x)
 #  include "vki-posixtypes-s390x-linux.h"
 #elif defined(VGA_mips32)
@@ -213,6 +215,8 @@
 #  include "vki-ppc64-linux.h"
 #elif defined(VGA_arm)
 #  include "vki-arm-linux.h"
+#elif defined(VGA_arm64)
+#  include "vki-arm64-linux.h"
 #elif defined(VGA_s390x)
 #  include "vki-s390x-linux.h"
 #elif defined(VGA_mips32)
diff --git a/include/vki/vki-posixtypes-arm64-linux.h b/include/vki/vki-posixtypes-arm64-linux.h
new file mode 100644
index 0000000..da33bca
--- /dev/null
+++ b/include/vki/vki-posixtypes-arm64-linux.h
@@ -0,0 +1,68 @@
+
+/*--------------------------------------------------------------------*/
+/*--- arm64/Linux-specific kernel interface: posix types.          ---*/
+/*---                                 vki-posixtypes-arm64-linux.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2013-2013 OpenWorks
+      info@open-works.net
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VKI_POSIXTYPES_ARM64_LINUX_H
+#define __VKI_POSIXTYPES_ARM64_LINUX_H
+
+//----------------------------------------------------------------------
+// From linux-3.9.9/include/uapi/asm-generic/posix_types.h
+//----------------------------------------------------------------------
+
+typedef unsigned int	__vki_kernel_mode_t;
+typedef long		__vki_kernel_off_t;
+typedef int		__vki_kernel_pid_t;
+typedef int		__vki_kernel_ipc_pid_t;
+typedef unsigned int	__vki_kernel_uid_t;
+typedef unsigned int	__vki_kernel_gid_t;
+typedef unsigned long	__vki_kernel_size_t;
+typedef long		__vki_kernel_time_t;
+typedef long		__vki_kernel_suseconds_t;
+typedef long		__vki_kernel_clock_t;
+typedef int		__vki_kernel_timer_t;
+typedef int		__vki_kernel_clockid_t;
+typedef char *		__vki_kernel_caddr_t;
+typedef unsigned int	__vki_kernel_uid32_t;
+typedef unsigned int	__vki_kernel_gid32_t;
+
+typedef unsigned int	__vki_kernel_old_uid_t;
+typedef unsigned int	__vki_kernel_old_gid_t;
+
+typedef long long	__vki_kernel_loff_t;
+
+typedef struct {
+	int	val[2];
+} __vki_kernel_fsid_t;
+
+#endif // __VKI_POSIXTYPES_ARM64_LINUX_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                             vki-posixtypes-arm64-linux.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/vki/vki-ppc32-linux.h b/include/vki/vki-ppc32-linux.h
index 481ee2c..999c481 100644
--- a/include/vki/vki-ppc32-linux.h
+++ b/include/vki/vki-ppc32-linux.h
@@ -63,8 +63,8 @@
 
 /* PAGE_SHIFT determines the page size, unfortunately
    page size might vary between 32-bit and 64-bit ppc kernels */
-extern unsigned long VKI_PAGE_SHIFT;
-extern unsigned long VKI_PAGE_SIZE;
+extern UWord VKI_PAGE_SHIFT;
+extern UWord VKI_PAGE_SIZE;
 #define VKI_MAX_PAGE_SHIFT	16
 #define VKI_MAX_PAGE_SIZE	(1UL << VKI_MAX_PAGE_SHIFT)
 
diff --git a/include/vki/vki-ppc64-linux.h b/include/vki/vki-ppc64-linux.h
index b72039c..1206924 100644
--- a/include/vki/vki-ppc64-linux.h
+++ b/include/vki/vki-ppc64-linux.h
@@ -64,8 +64,8 @@
 
 /* PAGE_SHIFT determines the page size, unfortunately
    page size might vary between 32-bit and 64-bit ppc kernels */
-extern unsigned long VKI_PAGE_SHIFT;
-extern unsigned long VKI_PAGE_SIZE;
+extern UWord VKI_PAGE_SHIFT;
+extern UWord VKI_PAGE_SIZE;
 #define VKI_MAX_PAGE_SHIFT	16
 #define VKI_MAX_PAGE_SIZE	(1UL << VKI_MAX_PAGE_SHIFT)
 
diff --git a/include/vki/vki-scnums-arm64-linux.h b/include/vki/vki-scnums-arm64-linux.h
new file mode 100644
index 0000000..f682a86
--- /dev/null
+++ b/include/vki/vki-scnums-arm64-linux.h
@@ -0,0 +1,510 @@
+
+/*--------------------------------------------------------------------*/
+/*--- System call numbers for arm64-linux.                         ---*/
+/*---                                     vki-scnums-arm64-linux.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2013-2013 OpenWorks
+      info@open-works.net
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VKI_SCNUMS_ARM64_LINUX_H
+#define __VKI_SCNUMS_ARM64_LINUX_H
+
+// From linux-3.10.5/arch/arm64/include/uapi/asm/unistd.h
+// is a #include of
+//      linux-3.10.5/include/uapi/asm-generic/unistd.h
+
+#define __NR_io_setup 0
+#define __NR_io_destroy 1
+#define __NR_io_submit 2
+#define __NR_io_cancel 3
+#define __NR_io_getevents 4
+#define __NR_setxattr 5
+#define __NR_lsetxattr 6
+#define __NR_fsetxattr 7
+#define __NR_getxattr 8
+#define __NR_lgetxattr 9
+#define __NR_fgetxattr 10
+#define __NR_listxattr 11
+#define __NR_llistxattr 12
+#define __NR_flistxattr 13
+#define __NR_removexattr 14
+#define __NR_lremovexattr 15
+#define __NR_fremovexattr 16
+#define __NR_getcwd 17
+#define __NR_lookup_dcookie 18
+#define __NR_eventfd2 19
+#define __NR_epoll_create1 20
+#define __NR_epoll_ctl 21
+#define __NR_epoll_pwait 22
+#define __NR_dup 23
+#define __NR_dup3 24
+#define __NR3264_fcntl 25
+#define __NR_inotify_init1 26
+#define __NR_inotify_add_watch 27
+#define __NR_inotify_rm_watch 28
+#define __NR_ioctl 29
+#define __NR_ioprio_set 30
+#define __NR_ioprio_get 31
+#define __NR_flock 32
+#define __NR_mknodat 33
+#define __NR_mkdirat 34
+#define __NR_unlinkat 35
+#define __NR_symlinkat 36
+#define __NR_linkat 37
+#define __NR_renameat 38
+#define __NR_umount2 39
+#define __NR_mount 40
+#define __NR_pivot_root 41
+#define __NR_nfsservctl 42
+#define __NR3264_statfs 43
+#define __NR3264_fstatfs 44
+#define __NR3264_truncate 45
+#define __NR3264_ftruncate 46
+#define __NR_fallocate 47
+#define __NR_faccessat 48
+#define __NR_chdir 49
+#define __NR_fchdir 50
+#define __NR_chroot 51
+#define __NR_fchmod 52
+#define __NR_fchmodat 53
+#define __NR_fchownat 54
+#define __NR_fchown 55
+#define __NR_openat 56
+#define __NR_close 57
+#define __NR_vhangup 58
+#define __NR_pipe2 59
+#define __NR_quotactl 60
+#define __NR_getdents64 61
+#define __NR3264_lseek 62
+#define __NR_read 63
+#define __NR_write 64
+#define __NR_readv 65
+#define __NR_writev 66
+#define __NR_pread64 67
+#define __NR_pwrite64 68
+#define __NR_preadv 69
+#define __NR_pwritev 70
+#define __NR3264_sendfile 71
+#define __NR_pselect6 72
+#define __NR_ppoll 73
+#define __NR_signalfd4 74
+#define __NR_vmsplice 75
+#define __NR_splice 76
+#define __NR_tee 77
+#define __NR_readlinkat 78
+#define __NR3264_fstatat 79
+#define __NR3264_fstat 80
+#define __NR_sync 81
+#define __NR_fsync 82
+#define __NR_fdatasync 83
+
+//#ifdef __ARCH_WANT_SYNC_FILE_RANGE2
+//#define __NR_sync_file_range2 84
+//#else
+#define __NR_sync_file_range 84
+//#endif
+
+#define __NR_timerfd_create 85
+#define __NR_timerfd_settime 86
+#define __NR_timerfd_gettime 87
+#define __NR_utimensat 88
+#define __NR_acct 89
+#define __NR_capget 90
+#define __NR_capset 91
+#define __NR_personality 92
+#define __NR_exit 93
+#define __NR_exit_group 94
+#define __NR_waitid 95
+#define __NR_set_tid_address 96
+#define __NR_unshare 97
+#define __NR_futex 98
+#define __NR_set_robust_list 99
+#define __NR_get_robust_list 100
+#define __NR_nanosleep 101
+#define __NR_getitimer 102
+#define __NR_setitimer 103
+#define __NR_kexec_load 104
+#define __NR_init_module 105
+#define __NR_delete_module 106
+#define __NR_timer_create 107
+#define __NR_timer_gettime 108
+#define __NR_timer_getoverrun 109
+#define __NR_timer_settime 110
+#define __NR_timer_delete 111
+#define __NR_clock_settime 112
+#define __NR_clock_gettime 113
+#define __NR_clock_getres 114
+#define __NR_clock_nanosleep 115
+#define __NR_syslog 116
+#define __NR_ptrace 117
+#define __NR_sched_setparam 118
+#define __NR_sched_setscheduler 119
+#define __NR_sched_getscheduler 120
+#define __NR_sched_getparam 121
+#define __NR_sched_setaffinity 122
+#define __NR_sched_getaffinity 123
+#define __NR_sched_yield 124
+#define __NR_sched_get_priority_max 125
+#define __NR_sched_get_priority_min 126
+#define __NR_sched_rr_get_interval 127
+#define __NR_restart_syscall 128
+#define __NR_kill 129
+#define __NR_tkill 130
+#define __NR_tgkill 131
+#define __NR_sigaltstack 132
+#define __NR_rt_sigsuspend 133
+#define __NR_rt_sigaction 134
+#define __NR_rt_sigprocmask 135
+#define __NR_rt_sigpending 136
+#define __NR_rt_sigtimedwait 137
+#define __NR_rt_sigqueueinfo 138
+#define __NR_rt_sigreturn 139
+#define __NR_setpriority 140
+#define __NR_getpriority 141
+#define __NR_reboot 142
+#define __NR_setregid 143
+#define __NR_setgid 144
+#define __NR_setreuid 145
+#define __NR_setuid 146
+#define __NR_setresuid 147
+#define __NR_getresuid 148
+#define __NR_setresgid 149
+#define __NR_getresgid 150
+#define __NR_setfsuid 151
+#define __NR_setfsgid 152
+#define __NR_times 153
+#define __NR_setpgid 154
+#define __NR_getpgid 155
+#define __NR_getsid 156
+#define __NR_setsid 157
+#define __NR_getgroups 158
+#define __NR_setgroups 159
+#define __NR_uname 160
+#define __NR_sethostname 161
+#define __NR_setdomainname 162
+#define __NR_getrlimit 163
+#define __NR_setrlimit 164
+#define __NR_getrusage 165
+#define __NR_umask 166
+#define __NR_prctl 167
+#define __NR_getcpu 168
+#define __NR_gettimeofday 169
+#define __NR_settimeofday 170
+#define __NR_adjtimex 171
+#define __NR_getpid 172
+#define __NR_getppid 173
+#define __NR_getuid 174
+#define __NR_geteuid 175
+#define __NR_getgid 176
+#define __NR_getegid 177
+#define __NR_gettid 178
+#define __NR_sysinfo 179
+#define __NR_mq_open 180
+#define __NR_mq_unlink 181
+#define __NR_mq_timedsend 182
+#define __NR_mq_timedreceive 183
+#define __NR_mq_notify 184
+#define __NR_mq_getsetattr 185
+#define __NR_msgget 186
+#define __NR_msgctl 187
+#define __NR_msgrcv 188
+#define __NR_msgsnd 189
+#define __NR_semget 190
+#define __NR_semctl 191
+#define __NR_semtimedop 192
+#define __NR_semop 193
+#define __NR_shmget 194
+#define __NR_shmctl 195
+#define __NR_shmat 196
+#define __NR_shmdt 197
+#define __NR_socket 198
+#define __NR_socketpair 199
+#define __NR_bind 200
+#define __NR_listen 201
+#define __NR_accept 202
+#define __NR_connect 203
+#define __NR_getsockname 204
+#define __NR_getpeername 205
+#define __NR_sendto 206
+#define __NR_recvfrom 207
+#define __NR_setsockopt 208
+#define __NR_getsockopt 209
+#define __NR_shutdown 210
+#define __NR_sendmsg 211
+#define __NR_recvmsg 212
+#define __NR_readahead 213
+#define __NR_brk 214
+#define __NR_munmap 215
+#define __NR_mremap 216
+#define __NR_add_key 217
+#define __NR_request_key 218
+#define __NR_keyctl 219
+#define __NR_clone 220
+#define __NR_execve 221
+#define __NR3264_mmap 222
+#define __NR3264_fadvise64 223
+#define __NR_swapon 224
+#define __NR_swapoff 225
+#define __NR_mprotect 226
+#define __NR_msync 227
+#define __NR_mlock 228
+#define __NR_munlock 229
+#define __NR_mlockall 230
+#define __NR_munlockall 231
+#define __NR_mincore 232
+#define __NR_madvise 233
+#define __NR_remap_file_pages 234
+#define __NR_mbind 235
+#define __NR_get_mempolicy 236
+#define __NR_set_mempolicy 237
+#define __NR_migrate_pages 238
+#define __NR_move_pages 239
+#define __NR_rt_tgsigqueueinfo 240
+#define __NR_perf_event_open 241
+#define __NR_accept4 242
+#define __NR_recvmmsg 243
+
+///*
+// * Architectures may provide up to 16 syscalls of their own
+// * starting with this value.
+// */
+//#define __NR_arch_specific_syscall 244
+
+#define __NR_wait4 260
+#define __NR_prlimit64 261
+#define __NR_fanotify_init 262
+#define __NR_fanotify_mark 263
+#define __NR_name_to_handle_at         264
+#define __NR_open_by_handle_at         265
+#define __NR_clock_adjtime 266
+#define __NR_syncfs 267
+#define __NR_setns 268
+#define __NR_sendmmsg 269
+#define __NR_process_vm_readv 270
+#define __NR_process_vm_writev 271
+#define __NR_kcmp 272
+#define __NR_finit_module 273
+
+#undef __NR_syscalls
+#define __NR_syscalls 274
+
+///*
+// * All syscalls below here should go away really,
+// * these are provided for both review and as a porting
+// * help for the C library version.
+//*
+// * Last chance: are any of these important enough to
+// * enable by default?
+// */
+//#ifdef __ARCH_WANT_SYSCALL_NO_AT
+//ZZZZ#define __NR_open 1024
+//#define __NR_link 1025
+//__SYSCALL(__NR_link, sys_link)
+//#define __NR_unlink 1026
+#define __NR_mknod 1027
+//#define __NR_chmod 1028
+//__SYSCALL(__NR_chmod, sys_chmod)
+//#define __NR_chown 1029
+//__SYSCALL(__NR_chown, sys_chown)
+//#define __NR_mkdir 1030
+//__SYSCALL(__NR_mkdir, sys_mkdir)
+//#define __NR_rmdir 1031
+//__SYSCALL(__NR_rmdir, sys_rmdir)
+//#define __NR_lchown 1032
+//__SYSCALL(__NR_lchown, sys_lchown)
+#define __NR_access 1033
+#define __NR_rename 1034
+//#define __NR_readlink 1035
+//#define __NR_symlink 1036
+//__SYSCALL(__NR_symlink, sys_symlink)
+//#define __NR_utimes 1037
+//__SYSCALL(__NR_utimes, sys_utimes)
+//#define __NR3264_stat 1038
+//__SC_3264(__NR3264_stat, sys_stat64, sys_newstat)
+//#define __NR3264_lstat 1039
+//__SC_3264(__NR3264_lstat, sys_lstat64, sys_newlstat)
+//
+//#undef __NR_syscalls
+//#define __NR_syscalls (__NR3264_lstat+1)
+//#endif /* __ARCH_WANT_SYSCALL_NO_AT */
+//
+//#ifdef __ARCH_WANT_SYSCALL_NO_FLAGS
+#define __NR_pipe 1040
+#define __NR_dup2 1041
+//#define __NR_epoll_create 1042
+//__SYSCALL(__NR_epoll_create, sys_epoll_create)
+//#define __NR_inotify_init 1043
+//__SYSCALL(__NR_inotify_init, sys_inotify_init)
+//#define __NR_eventfd 1044
+//__SYSCALL(__NR_eventfd, sys_eventfd)
+//#define __NR_signalfd 1045
+//__SYSCALL(__NR_signalfd, sys_signalfd)
+//
+//#undef __NR_syscalls
+//#define __NR_syscalls (__NR_signalfd+1)
+//#endif /* __ARCH_WANT_SYSCALL_NO_FLAGS */
+//
+/* #if (__BITS_PER_LONG == 32 || defined(__SYSCALL_COMPAT)) &&   \
+       defined(__ARCH_WANT_SYSCALL_OFF_T)
+*/
+//#define __NR_sendfile 1046
+//__SYSCALL(__NR_sendfile, sys_sendfile)
+//#define __NR_ftruncate 1047
+//__SYSCALL(__NR_ftruncate, sys_ftruncate)
+//#define __NR_truncate 1048
+//__SYSCALL(__NR_truncate, sys_truncate)
+#define __NR_stat 1049
+//#define __NR_lstat 1050
+//__SYSCALL(__NR_lstat, sys_newlstat)
+//ZZ#define __NR_fstat 1051
+//#define __NR_fcntl 1052
+//#define __NR_fadvise64 1053
+//#define __ARCH_WANT_SYS_FADVISE64
+//__SYSCALL(__NR_fadvise64, sys_fadvise64)
+//#define __NR_newfstatat 1054
+//#define __ARCH_WANT_SYS_NEWFSTATAT
+//__SYSCALL(__NR_newfstatat, sys_newfstatat)
+//#define __NR_fstatfs 1055
+//__SYSCALL(__NR_fstatfs, sys_fstatfs)
+//#define __NR_statfs 1056
+//__SYSCALL(__NR_statfs, sys_statfs)
+#define __NR_lseek 1057
+#define __NR_mmap 1058
+//
+//#undef __NR_syscalls
+//#define __NR_syscalls (__NR_mmap+1)
+//#endif /* 32 bit off_t syscalls */
+//
+//#ifdef __ARCH_WANT_SYSCALL_DEPRECATED
+//#define __NR_alarm 1059
+//#define __ARCH_WANT_SYS_ALARM
+//__SYSCALL(__NR_alarm, sys_alarm)
+#define __NR_getpgrp 1060
+//#define __ARCH_WANT_SYS_GETPGRP
+//__SYSCALL(__NR_getpgrp, sys_getpgrp)
+//#define __NR_pause 1061
+//#define __ARCH_WANT_SYS_PAUSE
+//__SYSCALL(__NR_pause, sys_pause)
+//#define __NR_time 1062
+//#define __ARCH_WANT_SYS_TIME
+//#define __ARCH_WANT_COMPAT_SYS_TIME
+//__SYSCALL(__NR_time, sys_time)
+//#define __NR_utime 1063
+//#define __ARCH_WANT_SYS_UTIME
+//__SYSCALL(__NR_utime, sys_utime)
+//
+//#define __NR_creat 1064
+//__SYSCALL(__NR_creat, sys_creat)
+#define __NR_getdents 1065
+//#define __NR_futimesat 1066
+//__SYSCALL(__NR_futimesat, sys_futimesat)
+//#define __NR_select 1067
+//#define __ARCH_WANT_SYS_SELECT
+//__SYSCALL(__NR_select, sys_select)
+#define __NR_poll 1068
+//#define __NR_epoll_wait 1069
+//__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
+//#define __NR_ustat 1070
+//__SYSCALL(__NR_ustat, sys_ustat)
+//#define __NR_vfork 1071
+//__SYSCALL(__NR_vfork, sys_vfork)
+//#define __NR_oldwait4 1072
+//__SYSCALL(__NR_oldwait4, sys_wait4)
+//#define __NR_recv 1073
+//__SYSCALL(__NR_recv, sys_recv)
+//#define __NR_send 1074
+//__SYSCALL(__NR_send, sys_send)
+//#define __NR_bdflush 1075
+//__SYSCALL(__NR_bdflush, sys_bdflush)
+//#define __NR_umount 1076
+//__SYSCALL(__NR_umount, sys_oldumount)
+//#define __ARCH_WANT_SYS_OLDUMOUNT
+//#define __NR_uselib 1077
+//__SYSCALL(__NR_uselib, sys_uselib)
+//#define __NR__sysctl 1078
+//__SYSCALL(__NR__sysctl, sys_sysctl)
+//
+#define __NR_fork 1079
+//#ifdef CONFIG_MMU
+//__SYSCALL(__NR_fork, sys_fork)
+//#else
+//__SYSCALL(__NR_fork, sys_ni_syscall)
+//#endif /* CONFIG_MMU */
+//
+//#undef __NR_syscalls
+//#define __NR_syscalls (__NR_fork+1)
+//
+//#endif /* __ARCH_WANT_SYSCALL_DEPRECATED */
+//
+///*
+// * 32 bit systems traditionally used different
+// * syscalls for off_t and loff_t arguments, while
+// * 64 bit systems only need the off_t version.
+// * For new 32 bit platforms, there is no need to
+// * implement the old 32 bit off_t syscalls, so
+// * they take different names.
+// * Here we map the numbers so that both versions
+// * use the same syscall table layout.
+// */
+//#if __BITS_PER_LONG == 64 && !defined(__SYSCALL_COMPAT)
+#define __NR_fcntl __NR3264_fcntl
+//#define __NR_statfs __NR3264_statfs
+//#define __NR_fstatfs __NR3264_fstatfs
+//#define __NR_truncate __NR3264_truncate
+//#define __NR_ftruncate __NR3264_ftruncate
+//#define __NR_lseek __NR3264_lseek
+//#define __NR_sendfile __NR3264_sendfile
+//#define __NR_newfstatat __NR3264_fstatat
+#define __NR_fstat __NR3264_fstat
+//#define __NR_mmap __NR3264_mmap
+//#define __NR_fadvise64 __NR3264_fadvise64
+//#ifdef __NR3264_stat
+//#define __NR_stat __NR3264_stat
+//#define __NR_lstat __NR3264_lstat
+//#endif
+//#else
+//#define __NR_fcntl64 __NR3264_fcntl
+//#define __NR_statfs64 __NR3264_statfs
+//#define __NR_fstatfs64 __NR3264_fstatfs
+//#define __NR_truncate64 __NR3264_truncate
+//#define __NR_ftruncate64 __NR3264_ftruncate
+//#define __NR_llseek __NR3264_lseek
+//#define __NR_sendfile64 __NR3264_sendfile
+//#define __NR_fstatat64 __NR3264_fstatat
+//#define __NR_fstat64 __NR3264_fstat
+//#define __NR_mmap2 __NR3264_mmap
+//#define __NR_fadvise64_64 __NR3264_fadvise64
+//#ifdef __NR3264_stat
+//#define __NR_stat64 __NR3264_stat
+//#define __NR_lstat64 __NR3264_lstat
+//#endif
+//#endif
+
+#endif /* __VKI_SCNUMS_ARM64_LINUX_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 vki-scnums-arm64-linux.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
index accc780..d04d542 100644
--- a/memcheck/mc_machine.c
+++ b/memcheck/mc_machine.c
@@ -76,6 +76,11 @@
 # define MC_SIZEOF_GUEST_STATE sizeof(VexGuestARMState)
 #endif
 
+#if defined(VGA_arm64)
+# include "libvex_guest_arm64.h"
+# define MC_SIZEOF_GUEST_STATE sizeof(VexGuestARM64State)
+#endif
+
 #if defined(VGA_mips32)
 # include "libvex_guest_mips32.h"
 # define MC_SIZEOF_GUEST_STATE sizeof(VexGuestMIPS32State)
@@ -957,6 +962,27 @@
 #  undef GOF
 #  undef SZB
 
+   /* --------------------- arm64 --------------------- */
+
+#  elif defined(VGA_arm64)
+
+#  define GOF(_fieldname) \
+      (offsetof(VexGuestARM64State,guest_##_fieldname))
+#  define SZB(_fieldname) \
+      (sizeof(((VexGuestARM64State*)0)->guest_##_fieldname))
+
+   Int  o     = offset;
+   Int  sz    = szB;
+   tl_assert(sz > 0);
+   tl_assert(host_is_little_endian());
+   (void)o; // RMME -- just to stop gcc warning that o is unused
+
+   VG_(printf)("MC_(get_otrack_shadow_offset)(arm64)(off=%d,sz=%d)\n",
+               offset,szB);
+   tl_assert(0);
+#  undef GOF
+#  undef SZB
+
    /* --------------------- mips32 --------------------- */
 
 #  elif defined(VGA_mips32)
@@ -1243,6 +1269,13 @@
    VG_(printf)("\n");
    tl_assert(0);
 
+   /* --------------------- arm64 --------------------- */
+#  elif defined(VGA_arm64)
+   VG_(printf)("get_reg_array_equiv_int_type(arm64): unhandled: ");
+   ppIRRegArray(arr);
+   VG_(printf)("\n");
+   tl_assert(0);
+
    /* --------------------- s390x --------------------- */
 #  elif defined(VGA_s390x)
    /* Should never het here because s390x does not use Ist_PutI