Added beginnings of an AMD64 port, so lots of new files and directories.
It compiles, but aborts immediately if you try to run it.

I didn't include ldt.c;  I'm not sure how the LDT is used on AMD64.  It can be
added later if necessary.

While doing this, did some 64-bit cleanness fixes:
- Added necessary intermediate casts to ULong to avoid warnings when converting
  ThreadId to void* and vice versa, in vg_scheduler.c.
- Fixed VALGRIND_NON_SIMD_CALL[0123] to use 'long' as the return type.
- Fixed VALGRIND_PRINTF{,BACKTRACE} to use unsigned longs instead of unsigned
  ints, as needed.
- Converted some offsets in vg_symtab2.h from "Int" to "OffT".
- Made strlen, strncat, etc, use SizeT instead of 'unsigned int' for the length
  parameter.
- Couple of other minor things.

I had to insert some "#ifdef __amd64__" and "#ifndef __amd64__" guards in
places.  In particular, in vg_mylibc.c, some of our syscall wrappers aren't
appropriate for AMD64 because the syscall numbering is a bit different in
places.  This difference will have to be abstracted out somehow.

Also rewrote the sys_fcntl and sys_fcntl64 wrappers, as required for AMD64.

Also moved the ipc wrapper into x86, since it's not applicable for
AMD64.  However, it is applicable (I think) for ARM, so it would be nice
to work out a way to share syscall wrappers between some, but not all,
archs.  Hmm.  Also now using the real IPC constants rather than magic
numbers in the wrapper.

Other non-AMD64-related fixes:
- ARM: fixed syscall table by accounting for the fact that syscall
  numbers don't start at 0, but rather at 0x900000.
- Converted a few places to use ThreadId instead of 'int' or 'Int' for
  thread IDs.
- Added both AMD64 and ARM (which I'd forgotten) entries to valgrind.spec.in.
- Tweaked comments in various places.




git-svn-id: svn://svn.valgrind.org/valgrind/trunk@3136 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/amd64/Makefile.am b/cachegrind/amd64/Makefile.am
new file mode 100644
index 0000000..53d27a1
--- /dev/null
+++ b/cachegrind/amd64/Makefile.am
@@ -0,0 +1,7 @@
+include $(top_srcdir)/Makefile.tool-flags.am
+
+AM_CPPFLAGS += -I$(top_srcdir)/cachegrind
+
+noinst_LIBRARIES = libcgarch.a
+
+libcgarch_a_SOURCES = cg_arch.c
diff --git a/cachegrind/amd64/cg_arch.c b/cachegrind/amd64/cg_arch.c
new file mode 100644
index 0000000..c6693f1
--- /dev/null
+++ b/cachegrind/amd64/cg_arch.c
@@ -0,0 +1,362 @@
+
+/*--------------------------------------------------------------------*/
+/*--- AMD64-specific definitions.                  amd64/cg_arch.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Cachegrind, a Valgrind tool for cache
+   profiling programs.
+
+   Copyright (C) 2002-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "tool.h"
+#include "cg_arch.h"
+
+// All CPUID info taken from sandpile.org/a32/cpuid.htm */
+// Probably only works for Intel and AMD chips, and probably only for some of
+// them. 
+
+#if 0
+static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
+{
+    VG_(message)(Vg_DebugMsg, 
+       "warning: Pentium with %d K micro-op instruction trace cache", 
+       actual_size);
+    VG_(message)(Vg_DebugMsg, 
+       "         Simulating a %d KB cache with %d B lines", 
+       used_size, line_size);
+}
+
+/* Intel method is truly wretched.  We have to do an insane indexing into an
+ * array of pre-defined configurations for various parts of the memory
+ * hierarchy. 
+ */
+static
+Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   UChar info[16];
+   Int   i, trials;
+   Bool  L2_found = False;
+
+   if (level < 2) {
+      VG_(message)(Vg_DebugMsg, 
+         "warning: CPUID level < 2 for Intel processor (%d)", 
+         level);
+      return -1;
+   }
+
+   VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4], 
+                 (Int*)&info[8], (Int*)&info[12]);
+   trials  = info[0] - 1;   /* AL register - bits 0..7 of %eax */
+   info[0] = 0x0;           /* reset AL */
+
+   if (0 != trials) {
+      VG_(message)(Vg_DebugMsg, 
+         "warning: non-zero CPUID trials for Intel processor (%d)",
+         trials);
+      return -1;
+   }
+
+   for (i = 0; i < 16; i++) {
+
+      switch (info[i]) {
+
+      case 0x0:       /* ignore zeros */
+          break;
+          
+      /* TLB info, ignore */
+      case 0x01: case 0x02: case 0x03: case 0x04:
+      case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
+      case 0xb0: case 0xb3:
+          break;      
+
+      case 0x06: *I1c = (cache_t) {  8, 4, 32 }; break;
+      case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
+      case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
+
+      case 0x0a: *D1c = (cache_t) {  8, 2, 32 }; break;
+      case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
+      case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
+
+      /* IA-64 info -- panic! */
+      case 0x10: case 0x15: case 0x1a: 
+      case 0x88: case 0x89: case 0x8a: case 0x8d:
+      case 0x90: case 0x96: case 0x9b:
+         VG_(tool_panic)("IA-64 cache detected?!");
+
+      case 0x22: case 0x23: case 0x25: case 0x29: 
+          VG_(message)(Vg_DebugMsg, 
+             "warning: L3 cache detected but ignored\n");
+          break;
+
+      /* These are sectored, whatever that means */
+      case 0x39: *L2c = (cache_t) {  128, 4, 64 }; L2_found = True; break;
+      case 0x3c: *L2c = (cache_t) {  256, 4, 64 }; L2_found = True; break;
+
+      /* If a P6 core, this means "no L2 cache".  
+         If a P4 core, this means "no L3 cache".
+         We don't know what core it is, so don't issue a warning.  To detect
+         a missing L2 cache, we use 'L2_found'. */
+      case 0x40:
+          break;
+
+      case 0x41: *L2c = (cache_t) {  128, 4, 32 }; L2_found = True; break;
+      case 0x42: *L2c = (cache_t) {  256, 4, 32 }; L2_found = True; break;
+      case 0x43: *L2c = (cache_t) {  512, 4, 32 }; L2_found = True; break;
+      case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
+      case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
+
+      /* These are sectored, whatever that means */
+      case 0x60: *D1c = (cache_t) { 16, 8, 64 };  break;      /* sectored */
+      case 0x66: *D1c = (cache_t) {  8, 4, 64 };  break;      /* sectored */
+      case 0x67: *D1c = (cache_t) { 16, 4, 64 };  break;      /* sectored */
+      case 0x68: *D1c = (cache_t) { 32, 4, 64 };  break;      /* sectored */
+
+      /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
+       * conversion to byte size is a total guess;  treat the 12K and 16K
+       * cases the same since the cache byte size must be a power of two for
+       * everything to work!.  Also guessing 32 bytes for the line size... 
+       */
+      case 0x70:    /* 12K micro-ops, 8-way */
+         *I1c = (cache_t) { 16, 8, 32 };  
+         micro_ops_warn(12, 16, 32);
+         break;  
+      case 0x71:    /* 16K micro-ops, 8-way */
+         *I1c = (cache_t) { 16, 8, 32 };  
+         micro_ops_warn(16, 16, 32); 
+         break;  
+      case 0x72:    /* 32K micro-ops, 8-way */
+         *I1c = (cache_t) { 32, 8, 32 };  
+         micro_ops_warn(32, 32, 32); 
+         break;  
+
+      /* These are sectored, whatever that means */
+      case 0x79: *L2c = (cache_t) {  128, 8,  64 }; L2_found = True;  break;
+      case 0x7a: *L2c = (cache_t) {  256, 8,  64 }; L2_found = True;  break;
+      case 0x7b: *L2c = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
+      case 0x7c: *L2c = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
+      case 0x7e: *L2c = (cache_t) {  256, 8, 128 }; L2_found = True;  break;
+
+      case 0x81: *L2c = (cache_t) {  128, 8, 32 };  L2_found = True;  break;
+      case 0x82: *L2c = (cache_t) {  256, 8, 32 };  L2_found = True;  break;
+      case 0x83: *L2c = (cache_t) {  512, 8, 32 };  L2_found = True;  break;
+      case 0x84: *L2c = (cache_t) { 1024, 8, 32 };  L2_found = True;  break;
+      case 0x85: *L2c = (cache_t) { 2048, 8, 32 };  L2_found = True;  break;
+      case 0x86: *L2c = (cache_t) {  512, 4, 64 };  L2_found = True;  break;
+      case 0x87: *L2c = (cache_t) { 1024, 8, 64 };  L2_found = True;  break;
+
+      default:
+          VG_(message)(Vg_DebugMsg, 
+             "warning: Unknown Intel cache config value "
+             "(0x%x), ignoring", info[i]);
+          break;
+      }
+   }
+
+   if (!L2_found)
+      VG_(message)(Vg_DebugMsg, 
+         "warning: L2 cache not installed, ignore L2 results.");
+
+   return 0;
+}
+
+/* AMD method is straightforward, just extract appropriate bits from the
+ * result registers.
+ *
+ * Bits, for D1 and I1:
+ *  31..24  data L1 cache size in KBs    
+ *  23..16  data L1 cache associativity (FFh=full)    
+ *  15.. 8  data L1 cache lines per tag    
+ *   7.. 0  data L1 cache line size in bytes
+ *
+ * Bits, for L2:
+ *  31..16  unified L2 cache size in KBs
+ *  15..12  unified L2 cache associativity (0=off, FFh=full)
+ *  11.. 8  unified L2 cache lines per tag    
+ *   7.. 0  unified L2 cache line size in bytes
+ *
+ * #3  The AMD K7 processor's L2 cache must be configured prior to relying 
+ *     upon this information. (Whatever that means -- njn)
+ *
+ * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
+ * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
+ * so we detect that.
+ * 
+ * Returns 0 on success, non-zero on failure.
+ */
+static
+Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   UInt ext_level;
+   UInt dummy, model;
+   UInt I1i, D1i, L2i;
+   
+   VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
+
+   if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
+      VG_(message)(Vg_UserMsg, 
+         "warning: ext_level < 0x80000006 for AMD processor (0x%x)", 
+         ext_level);
+      return -1;
+   }
+
+   VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
+   VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &dummy);
+
+   VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
+
+   /* Check for Duron bug */
+   if (model == 0x630) {
+      VG_(message)(Vg_UserMsg,
+         "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
+      L2i = (64 << 16) | (L2i & 0xffff);
+   }
+
+   D1c->size      = (D1i >> 24) & 0xff;
+   D1c->assoc     = (D1i >> 16) & 0xff;
+   D1c->line_size = (D1i >>  0) & 0xff;
+
+   I1c->size      = (I1i >> 24) & 0xff;
+   I1c->assoc     = (I1i >> 16) & 0xff;
+   I1c->line_size = (I1i >>  0) & 0xff;
+
+   L2c->size      = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
+   L2c->assoc     = (L2i >> 12) & 0xf;
+   L2c->line_size = (L2i >>  0) & 0xff;
+
+   return 0;
+}
+
+static jmp_buf cpuid_jmpbuf;
+
+static
+void cpuid_SIGILL_handler(int signum)
+{
+   __builtin_longjmp(cpuid_jmpbuf, 1);
+}
+
+static 
+Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
+{
+   Int  level, res, ret;
+   Char vendor_id[13];
+   struct vki_sigaction sigill_new, sigill_saved;
+
+   /* Install own SIGILL handler */
+   sigill_new.ksa_handler  = cpuid_SIGILL_handler;
+   sigill_new.sa_flags    = 0;
+   sigill_new.sa_restorer = NULL;
+   res = VG_(sigemptyset)( &sigill_new.sa_mask );
+   tl_assert(res == 0);
+
+   res = VG_(sigaction)( VKI_SIGILL, &sigill_new, &sigill_saved );
+   tl_assert(res == 0);
+
+   /* Trap for illegal instruction, in case it's a really old processor that
+    * doesn't support CPUID. */
+   if (__builtin_setjmp(cpuid_jmpbuf) == 0) {
+      VG_(cpuid)(0, &level, (int*)&vendor_id[0], 
+                            (int*)&vendor_id[8], (int*)&vendor_id[4]);    
+      vendor_id[12] = '\0';
+
+      /* Restore old SIGILL handler */
+      res = VG_(sigaction)( VKI_SIGILL, &sigill_saved, NULL );
+      tl_assert(res == 0);
+
+   } else  {
+      VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
+
+      /* Restore old SIGILL handler */
+      res = VG_(sigaction)( VKI_SIGILL, &sigill_saved, NULL );
+      tl_assert(res == 0);
+      return -1;
+   }
+
+   if (0 == level) {
+      VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n");
+      return -1;
+   }
+
+   /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
+   if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
+      ret = Intel_cache_info(level, I1c, D1c, L2c);
+
+   } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
+      ret = AMD_cache_info(I1c, D1c, L2c);
+
+   } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
+      /* Total kludge.  Pretend to be a VIA Nehemiah. */
+      D1c->size      = 64;
+      D1c->assoc     = 16;
+      D1c->line_size = 16;
+      I1c->size      = 64;
+      I1c->assoc     = 4;
+      I1c->line_size = 16;
+      L2c->size      = 64;
+      L2c->assoc     = 16;
+      L2c->line_size = 16;
+      ret = 0;
+
+   } else {
+      VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
+                   vendor_id);
+      return -1;
+   }
+
+   /* Successful!  Convert sizes from KB to bytes */
+   I1c->size *= 1024;
+   D1c->size *= 1024;
+   L2c->size *= 1024;
+      
+   return ret;
+}
+#endif
+
+
+void VGA_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
+                            Bool all_caches_clo_defined)
+{
+   VG_(printf)("VGA_(configure_caches)() not yet implemented for AMD64\n");
+   VG_(exit)(1);
+#if 0
+   Int res;
+   
+   // Set caches to default.
+   *I1c = (cache_t) {  65536, 2, 64 };
+   *D1c = (cache_t) {  65536, 2, 64 };
+   *L2c = (cache_t) { 262144, 8, 64 };
+
+   // Then replace with any info we can get from CPUID.
+   res = get_caches_from_CPUID(I1c, D1c, L2c);
+
+   // Warn if CPUID failed and config not completely specified from cmd line.
+   if (res != 0 && !all_caches_clo_defined) {
+      VG_(message)(Vg_DebugMsg, 
+                   "Warning: Couldn't auto-detect cache config, using one "
+                   "or more defaults ");
+   }
+#endif
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind/arm/cg_arch.c b/cachegrind/arm/cg_arch.c
index e3e83dd..9085a8f 100644
--- a/cachegrind/arm/cg_arch.c
+++ b/cachegrind/arm/cg_arch.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Arch-specific definitions.                     arm/cg_arch.c ---*/
+/*--- ARM-specific definitions.                      arm/cg_arch.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/cachegrind/tests/amd64/Makefile.am b/cachegrind/tests/amd64/Makefile.am
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cachegrind/tests/amd64/Makefile.am
diff --git a/cachegrind/x86/cg_arch.c b/cachegrind/x86/cg_arch.c
index 6e2b710..81117b8 100644
--- a/cachegrind/x86/cg_arch.c
+++ b/cachegrind/x86/cg_arch.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Arch-specific definitions.                     x86/cg_arch.c ---*/
+/*--- x86-specific definitions.                      x86/cg_arch.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/configure.in b/configure.in
index 237304a..437dcc3 100644
--- a/configure.in
+++ b/configure.in
@@ -85,7 +85,7 @@
 AC_MSG_CHECKING([for a supported CPU])
 AC_SUBST(VG_ARCH)
 AC_SUBST(VG_ARCH_ALL)
-VG_ARCH_ALL="arm x86"
+VG_ARCH_ALL="amd64 arm x86"
 AC_SUBST(KICKSTART_BASE)
 AC_SUBST(ARCH_CORE_AM_CFLAGS)
 AC_SUBST(ARCH_TOOL_AM_CFLAGS)
@@ -104,12 +104,18 @@
      x86_64) 
         AC_MSG_RESULT([ok (${host_cpu})])
         VG_ARCH="amd64"
-        # OOO: relocations under x86-64 small model are 32-bit signed
+        # XXX: relocations under amd64's "small model" are 32-bit signed
         # quantities; therefore going above 0x7fffffff doesn't work... this is
         # a problem.
         KICKSTART_BASE="0x70000000"
         ARCH_CORE_AM_CFLAGS="-fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -DELFSZ=64" 
-        ARCH_TOOL_AM_CFLAGS="-fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@"
+        # XXX: need to use -fpic, otherwise when linking tools I get this error
+        # message:
+        #   relocation R_X86_64_32 can not be used when making a shared object;
+        #   recompile with -fPIC
+        #
+        # I don't understand...  --njn
+        ARCH_TOOL_AM_CFLAGS="-fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -fpic"
         ARCH_CORE_AM_CCASFLAGS=""
         ;;
 
@@ -186,7 +192,7 @@
 AC_MSG_CHECKING([for a supported CPU/OS combination])
 AC_SUBST(VG_PLATFORM)
 AC_SUBST(VG_PLATFORM_ALL)
-VG_PLATFORM_ALL="arm-linux x86-linux"
+VG_PLATFORM_ALL="amd64-linux arm-linux x86-linux"
 
 VG_PLATFORM="$VG_ARCH-$VG_OS"
 
@@ -355,34 +361,41 @@
    tests/unused/Makefile 
    include/valgrind.h 
    include/Makefile 
-   include/x86/Makefile 
+   include/amd64/Makefile 
    include/arm/Makefile 
+   include/x86/Makefile 
    include/linux/Makefile 
-   include/x86-linux/Makefile 
+   include/amd64-linux/Makefile 
    include/arm-linux/Makefile 
+   include/x86-linux/Makefile 
    auxprogs/Makefile
    coregrind/Makefile 
    coregrind/demangle/Makefile 
    coregrind/docs/Makefile
-   coregrind/x86/Makefile
+   coregrind/amd64/Makefile
    coregrind/arm/Makefile
+   coregrind/x86/Makefile
    coregrind/linux/Makefile
-   coregrind/x86-linux/Makefile
+   coregrind/amd64-linux/Makefile
    coregrind/arm-linux/Makefile
+   coregrind/x86-linux/Makefile
    addrcheck/Makefile
    addrcheck/tests/Makefile
    addrcheck/docs/Makefile
    memcheck/Makefile
    memcheck/tests/Makefile
-   memcheck/tests/x86/Makefile
+   memcheck/tests/amd64/Makefile
    memcheck/tests/arm/Makefile
+   memcheck/tests/x86/Makefile
    memcheck/docs/Makefile
    cachegrind/Makefile
-   cachegrind/x86/Makefile
+   cachegrind/amd64/Makefile
    cachegrind/arm/Makefile
+   cachegrind/x86/Makefile
    cachegrind/tests/Makefile
-   cachegrind/tests/x86/Makefile
+   cachegrind/tests/amd64/Makefile
    cachegrind/tests/arm/Makefile
+   cachegrind/tests/x86/Makefile
    cachegrind/docs/Makefile
    cachegrind/cg_annotate
    helgrind/Makefile
@@ -400,8 +413,9 @@
    lackey/docs/Makefile
    none/Makefile
    none/tests/Makefile
-   none/tests/x86/Makefile
+   none/tests/amd64/Makefile
    none/tests/arm/Makefile
+   none/tests/x86/Makefile
    none/docs/Makefile
 ) 
 
diff --git a/coregrind/amd64-linux/Makefile.am b/coregrind/amd64-linux/Makefile.am
new file mode 100644
index 0000000..cfd3b7a
--- /dev/null
+++ b/coregrind/amd64-linux/Makefile.am
@@ -0,0 +1,15 @@
+include $(top_srcdir)/Makefile.all.am
+include $(top_srcdir)/Makefile.core-AM_CPPFLAGS.am
+
+AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+
+noinst_HEADERS = \
+	core_platform.h \
+	vki_unistd.h
+
+noinst_LIBRARIES = libplatform.a
+
+
+libplatform_a_SOURCES = \
+	syscall.S \
+	syscalls.c
diff --git a/coregrind/amd64-linux/core_platform.h b/coregrind/amd64-linux/core_platform.h
new file mode 100644
index 0000000..c6f5a68
--- /dev/null
+++ b/coregrind/amd64-linux/core_platform.h
@@ -0,0 +1,109 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Platform-specific stuff for the core.                        ---*/
+/*---                                  amd64-linux/core_platform.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_LINUX_CORE_PLATFORM_H
+#define __AMD64_LINUX_CORE_PLATFORM_H
+
+//#include "core_platform_asm.h"    // platform-specific asm  stuff
+//#include "platform_arch.h"        // platform-specific tool stuff
+
+/* ---------------------------------------------------------------------
+   Dealing with registers
+   ------------------------------------------------------------------ */
+
+// Accessors for the ThreadArchState
+#define PLATFORM_SYSCALL_NUM     guest_RAX
+#define PLATFORM_SYSCALL_ARG1    guest_RDI
+#define PLATFORM_SYSCALL_ARG2    guest_RSI
+#define PLATFORM_SYSCALL_ARG3    guest_RDX
+#define PLATFORM_SYSCALL_ARG4    guest_R10
+#define PLATFORM_SYSCALL_ARG5    guest_R8
+#define PLATFORM_SYSCALL_ARG6    guest_R9
+#define PLATFORM_SYSCALL_RET     guest_RAX
+
+// Setting a syscall result
+#define PLATFORM_SET_SYSCALL_RESULT(regs, val)     \
+   ((regs).vex.guest_RAX = (val))
+
+// Setting thread regs and shadow regs from within the core
+#define SET_SYSCALL_RETVAL(zztid, zzval) \
+   SET_THREAD_REG(zztid, zzval, SYSCALL_RET, post_reg_write, \
+                  Vg_CoreSysCall, zztid, O_SYSCALL_RET, sizeof(UWord))
+
+/* ---------------------------------------------------------------------
+   Exports of vg_ldt.c
+   ------------------------------------------------------------------ */
+
+// XXX: eventually all these should be x86-private, and not visible to the
+// core (except maybe do_useseg()?)
+
+#if 0
+/* Simulate the modify_ldt syscall. */
+extern Int VG_(sys_modify_ldt) ( ThreadId tid,
+                                 Int func, void* ptr, UInt bytecount );
+
+/* Simulate the {get,set}_thread_area syscalls. */
+extern Int VG_(sys_set_thread_area) ( ThreadId tid,
+                                      vki_modify_ldt_t* info );
+extern Int VG_(sys_get_thread_area) ( ThreadId tid,
+                                      vki_modify_ldt_t* info );
+
+/* Called from generated code.  Given a segment selector and a virtual
+   address, return a linear address, and do limit checks too. */
+extern Addr VG_(do_useseg) ( UInt seg_selector, Addr virtual_addr );
+#endif
+
+/* ---------------------------------------------------------------------
+   ucontext stuff
+   ------------------------------------------------------------------ */
+
+#define UCONTEXT_INSTR_PTR(uc)   ((uc)->uc_mcontext.rip)
+#define UCONTEXT_STACK_PTR(uc)   ((uc)->uc_mcontext.rsp)
+#define UCONTEXT_FRAME_PTR(uc)   ((uc)->uc_mcontext.rbp)
+#define UCONTEXT_SYSCALL_NUM(uc) ((uc)->uc_mcontext.rax)
+
+/* ---------------------------------------------------------------------
+   mmap() stuff
+   ------------------------------------------------------------------ */
+
+#define PLATFORM_DO_MMAP(ret, start, length, prot, flags, fd, offset) { \
+   I_die_here; \
+} while (0)
+
+#define PLATFORM_GET_MMAP_ARGS(tst, a1, a2, a3, a4, a5, a6) do {\
+   I_die_here; \
+} while (0)
+
+#endif   // __AMD64_LINUX_CORE_PLATFORM_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/amd64-linux/syscall.S b/coregrind/amd64-linux/syscall.S
new file mode 100644
index 0000000..af024b4
--- /dev/null
+++ b/coregrind/amd64-linux/syscall.S
@@ -0,0 +1,53 @@
+
+##--------------------------------------------------------------------##
+##--- Support for doing system calls.        amd64-linux/syscall.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an extensible x86 protected-mode
+  emulator for monitoring program execution on x86-Unixes.
+
+  Copyright (C) 2000-2004 Julian Seward 
+     jseward@acm.org
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "core_asm.h"
+#include "vki_unistd.h"
+
+# XXX: must reinstate comments also -- see x86-linux/syscall.S
+
+.globl	VG_(do_syscall)
+VG_(do_syscall):
+	ud2
+
+.globl VG_(clone)
+VG_(clone):
+	ud2
+
+.globl VG_(sigreturn)
+VG_(sigreturn):
+	ud2
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+##--------------------------------------------------------------------##
+##--- end                                                          ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/amd64-linux/syscalls.c b/coregrind/amd64-linux/syscalls.c
new file mode 100644
index 0000000..389c281
--- /dev/null
+++ b/coregrind/amd64-linux/syscalls.c
@@ -0,0 +1,144 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Platform-specific syscalls stuff.     amd64-linux/syscalls.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "core.h"
+
+// See the comment accompanying the declaration of VGA_(thread_syscall)() in
+// coregrind/core.h for an explanation of what this does, and why.
+//
+// XXX: this function and these variables should be assembly code!  See the
+// x86 version.
+const Addr VGA_(sys_before), VGA_(sys_restarted),
+           VGA_(sys_after),  VGA_(sys_done);
+void VGA_(do_thread_syscall)(UWord sys,
+                             UWord arg1, UWord arg2, UWord arg3,
+                             UWord arg4, UWord arg5, UWord arg6,
+                             UWord *result, /*enum PXState*/Int *statep,
+                             /*enum PXState*/Int poststate)
+{
+   I_die_here;
+}
+
+
+// Back up to restart a system call.
+void VGA_(restart_syscall)(ThreadArchState *arch)
+{
+#if 0
+   arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      int $0x80 == CD 80 
+   */
+   {
+      UChar *p = (UChar *)arch->vex.guest_EIP;
+      
+      if (p[0] != 0xcd || p[1] != 0x80)
+         VG_(message)(Vg_DebugMsg,
+                      "?! restarting over syscall at %p %02x %02x\n",
+                      arch->vex.guest_EIP, p[0], p[1]); 
+
+      vg_assert(p[0] == 0xcd && p[1] == 0x80);
+   }
+#endif
+}
+
+/* ---------------------------------------------------------------------
+   PRE/POST wrappers for AMD64/Linux-specific syscalls
+   ------------------------------------------------------------------ */
+
+// Nb: See the comment above the generic PRE/POST wrappers in
+// coregrind/vg_syscalls.c for notes about how they work.
+
+#define PRE(name, f)     PRE_TEMPLATE(static, amd64_linux, name, f)
+#define POST(name)      POST_TEMPLATE(static, amd64_linux, name)
+
+PRE(sys_clone, Special)
+{
+   I_die_here;
+#if 0
+   PRINT("sys_clone ( %d, %p, %p, %p, %p )",ARG1,ARG2,ARG3,ARG4,ARG5);
+   // XXX: really not sure about the last two args... if they are really
+   // there, we should do PRE_MEM_READs for both of them...
+   PRE_REG_READ4(int, "clone",
+                 unsigned long, flags, void *, child_stack,
+                 int *, parent_tidptr, int *, child_tidptr);
+
+   if (ARG2 == 0 &&
+       (ARG1 == (VKI_CLONE_CHILD_CLEARTID|VKI_CLONE_CHILD_SETTID|VKI_SIGCHLD)
+     || ARG1 == (VKI_CLONE_PARENT_SETTID|VKI_SIGCHLD))) 
+   {
+      VGA_(gen_sys_fork_before)(tid, tst);
+      SET_RESULT( VG_(do_syscall)(SYSNO, ARG1, ARG2, ARG3, ARG4, ARG5) );
+      VGA_(gen_sys_fork_after) (tid, tst);
+   } else {
+      VG_(unimplemented)
+         ("clone(): not supported by Valgrind.\n   "
+          "We do support programs linked against\n   "
+          "libpthread.so, though.  Re-run with -v and ensure that\n   "
+          "you are picking up Valgrind's implementation of libpthread.so.");
+   }
+#endif
+}
+
+#undef PRE
+#undef POST
+
+/* ---------------------------------------------------------------------
+   The AMD64/Linux syscall table
+   ------------------------------------------------------------------ */
+
+// Macros for adding AMD64/Linux-specific wrappers to the syscall table.
+#define PLAX_(const, name)    SYS_WRAPPER_ENTRY_X_(amd64_linux, const, name) 
+#define PLAXY(const, name)    SYS_WRAPPER_ENTRY_XY(amd64_linux, const, name) 
+
+// This table maps from __NR_xxx syscall numbers (from
+// linux/include/asm-x86_64/unistd.h) to the appropriate PRE/POST sys_foo()
+// wrappers on AMD64 (as per sys_call_table in
+// linux/arch/x86_64/kernel/entry.S).
+//
+// For those syscalls not handled by Valgrind, the annotation indicate its
+// arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
+// (unknown).
+
+const struct SyscallTableEntry VGA_(syscall_table)[] = {
+   PLAX_(__NR_clone,             sys_clone),          // 56
+   GENX_(__NR_exit,              sys_exit),           // 60
+   LINX_(__NR_mount,             sys_mount),          // 165
+   //   (restart_syscall)                             // 219
+};
+
+const UInt VGA_(syscall_table_size) = 
+            sizeof(VGA_(syscall_table)) / sizeof(VGA_(syscall_table)[0]);
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/amd64-linux/vki_unistd.h b/coregrind/amd64-linux/vki_unistd.h
new file mode 100644
index 0000000..770b670
--- /dev/null
+++ b/coregrind/amd64-linux/vki_unistd.h
@@ -0,0 +1,329 @@
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_LINUX_VKI_UNISTD_H_
+#define __AMD64_LINUX_VKI_UNISTD_H_
+
+// From linux-2.6.9/include/asm-x86_64/unistd.h
+
+#define __NR_read                                0
+#define __NR_write                               1
+#define __NR_open                                2
+#define __NR_close                               3
+#define __NR_stat                                4
+#define __NR_fstat                               5
+#define __NR_lstat                               6
+#define __NR_poll                                7
+
+#define __NR_lseek                               8
+#define __NR_mmap                                9
+#define __NR_mprotect                           10
+#define __NR_munmap                             11
+#define __NR_brk                                12
+#define __NR_rt_sigaction                       13
+#define __NR_rt_sigprocmask                     14
+#define __NR_rt_sigreturn                       15
+
+#define __NR_ioctl                              16
+#define __NR_pread64                            17
+#define __NR_pwrite64                           18
+#define __NR_readv                              19
+#define __NR_writev                             20
+#define __NR_access                             21
+#define __NR_pipe                               22
+#define __NR_select                             23
+
+#define __NR_sched_yield                        24
+#define __NR_mremap                             25
+#define __NR_msync                              26
+#define __NR_mincore                            27
+#define __NR_madvise                            28
+#define __NR_shmget                             29
+#define __NR_shmat                              30
+#define __NR_shmctl                             31
+
+#define __NR_dup                                32
+#define __NR_dup2                               33
+#define __NR_pause                              34
+#define __NR_nanosleep                          35
+#define __NR_getitimer                          36
+#define __NR_alarm                              37
+#define __NR_setitimer                          38
+#define __NR_getpid                             39
+
+#define __NR_sendfile                           40
+#define __NR_socket                             41
+#define __NR_connect                            42
+#define __NR_accept                             43
+#define __NR_sendto                             44
+#define __NR_recvfrom                           45
+#define __NR_sendmsg                            46
+#define __NR_recvmsg                            47
+
+#define __NR_shutdown                           48
+#define __NR_bind                               49
+#define __NR_listen                             50
+#define __NR_getsockname                        51
+#define __NR_getpeername                        52
+#define __NR_socketpair                         53
+#define __NR_setsockopt                         54
+#define __NR_getsockopt                         55
+
+#define __NR_clone                              56
+#define __NR_fork                               57
+#define __NR_vfork                              58
+#define __NR_execve                             59
+#define __NR_exit                               60
+#define __NR_wait4                              61
+#define __NR_kill                               62
+#define __NR_uname                              63
+
+#define __NR_semget                             64
+#define __NR_semop                              65
+#define __NR_semctl                             66
+#define __NR_shmdt                              67
+#define __NR_msgget                             68
+#define __NR_msgsnd                             69
+#define __NR_msgrcv                             70
+#define __NR_msgctl                             71
+
+#define __NR_fcntl                              72
+#define __NR_flock                              73
+#define __NR_fsync                              74
+#define __NR_fdatasync                          75
+#define __NR_truncate                           76
+#define __NR_ftruncate                          77
+#define __NR_getdents                           78
+#define __NR_getcwd                             79
+
+#define __NR_chdir                              80
+#define __NR_fchdir                             81
+#define __NR_rename                             82
+#define __NR_mkdir                              83
+#define __NR_rmdir                              84
+#define __NR_creat                              85
+#define __NR_link                               86
+#define __NR_unlink                             87
+
+#define __NR_symlink                            88
+#define __NR_readlink                           89
+#define __NR_chmod                              90
+#define __NR_fchmod                             91
+#define __NR_chown                              92
+#define __NR_fchown                             93
+#define __NR_lchown                             94
+#define __NR_umask                              95
+
+#define __NR_gettimeofday                       96
+#define __NR_getrlimit                          97
+#define __NR_getrusage                          98
+#define __NR_sysinfo                            99
+#define __NR_times                             100
+#define __NR_ptrace                            101
+#define __NR_getuid                            102
+#define __NR_syslog                            103
+
+/* at the very end the stuff that never runs during the benchmarks */
+#define __NR_getgid                            104
+#define __NR_setuid                            105
+#define __NR_setgid                            106
+#define __NR_geteuid                           107
+#define __NR_getegid                           108
+#define __NR_setpgid                           109
+#define __NR_getppid                           110
+#define __NR_getpgrp                           111
+
+#define __NR_setsid                            112
+#define __NR_setreuid                          113
+#define __NR_setregid                          114
+#define __NR_getgroups                         115
+#define __NR_setgroups                         116
+#define __NR_setresuid                         117
+#define __NR_getresuid                         118
+#define __NR_setresgid                         119
+
+#define __NR_getresgid                         120
+#define __NR_getpgid                           121
+#define __NR_setfsuid                          122
+#define __NR_setfsgid                          123
+#define __NR_getsid                            124
+#define __NR_capget                            125
+#define __NR_capset                            126
+
+#define __NR_rt_sigpending                     127
+#define __NR_rt_sigtimedwait                   128
+#define __NR_rt_sigqueueinfo                   129
+#define __NR_rt_sigsuspend                     130
+#define __NR_sigaltstack                       131
+#define __NR_utime                             132
+#define __NR_mknod                             133
+
+#define __NR_uselib                            134
+#define __NR_personality                       135
+
+#define __NR_ustat                             136
+#define __NR_statfs                            137
+#define __NR_fstatfs                           138
+#define __NR_sysfs                             139
+
+#define __NR_getpriority                       140
+#define __NR_setpriority                       141
+#define __NR_sched_setparam                    142
+#define __NR_sched_getparam                    143
+#define __NR_sched_setscheduler                144
+#define __NR_sched_getscheduler                145
+#define __NR_sched_get_priority_max            146
+#define __NR_sched_get_priority_min            147
+#define __NR_sched_rr_get_interval             148
+
+#define __NR_mlock                             149
+#define __NR_munlock                           150
+#define __NR_mlockall                          151
+#define __NR_munlockall                        152
+
+#define __NR_vhangup                           153
+
+#define __NR_modify_ldt                        154
+
+#define __NR_pivot_root                        155
+
+#define __NR__sysctl                           156
+
+#define __NR_prctl                             157
+#define __NR_arch_prctl                        158
+
+#define __NR_adjtimex                          159
+
+#define __NR_setrlimit                         160
+
+#define __NR_chroot                            161
+
+#define __NR_sync                              162
+
+#define __NR_acct                              163
+
+#define __NR_settimeofday                      164
+
+#define __NR_mount                             165
+#define __NR_umount2                           166
+
+#define __NR_swapon                            167
+#define __NR_swapoff                           168
+
+#define __NR_reboot                            169
+
+#define __NR_sethostname                       170
+#define __NR_setdomainname                     171
+
+#define __NR_iopl                              172
+#define __NR_ioperm                            173
+
+#define __NR_create_module                     174
+#define __NR_init_module                       175
+#define __NR_delete_module                     176
+#define __NR_get_kernel_syms                   177
+#define __NR_query_module                      178
+
+#define __NR_quotactl                          179
+
+#define __NR_nfsservctl                        180
+
+#define __NR_getpmsg                           181	/* reserved for LiS/STREAMS */
+#define __NR_putpmsg                           182	/* reserved for LiS/STREAMS */
+
+#define __NR_afs_syscall                       183	/* reserved for AFS */ 
+
+#define __NR_tuxcall      		184 /* reserved for tux */
+
+#define __NR_security			185
+
+#define __NR_gettid		186
+
+#define __NR_readahead		187
+#define __NR_setxattr		188
+#define __NR_lsetxattr		189
+#define __NR_fsetxattr		190
+#define __NR_getxattr		191
+#define __NR_lgetxattr		192
+#define __NR_fgetxattr		193
+#define __NR_listxattr		194
+#define __NR_llistxattr		195
+#define __NR_flistxattr		196
+#define __NR_removexattr	197
+#define __NR_lremovexattr	198
+#define __NR_fremovexattr	199
+#define __NR_tkill	200
+#define __NR_time      201
+#define __NR_futex     202
+#define __NR_sched_setaffinity    203
+#define __NR_sched_getaffinity     204
+#define __NR_set_thread_area	205
+#define __NR_io_setup	206
+#define __NR_io_destroy	207
+#define __NR_io_getevents	208
+#define __NR_io_submit	209
+#define __NR_io_cancel	210
+#define __NR_get_thread_area	211
+#define __NR_lookup_dcookie	212
+#define __NR_epoll_create	213
+#define __NR_epoll_ctl_old	214
+#define __NR_epoll_wait_old	215
+#define __NR_remap_file_pages	216
+#define __NR_getdents64	217
+#define __NR_set_tid_address	218
+#define __NR_restart_syscall	219
+#define __NR_semtimedop		220
+#define __NR_fadvise64		221
+#define __NR_timer_create		222
+#define __NR_timer_settime		223
+#define __NR_timer_gettime		224
+#define __NR_timer_getoverrun		225
+#define __NR_timer_delete	226
+#define __NR_clock_settime	227
+#define __NR_clock_gettime	228
+#define __NR_clock_getres	229
+#define __NR_clock_nanosleep	230
+#define __NR_exit_group		231
+#define __NR_epoll_wait		232
+#define __NR_epoll_ctl		233
+#define __NR_tgkill		234
+#define __NR_utimes		235
+#define __NR_vserver		236
+#define __NR_vserver		236
+#define __NR_mbind 		237
+#define __NR_set_mempolicy 	238
+#define __NR_get_mempolicy 	239
+#define __NR_mq_open 		240
+#define __NR_mq_unlink 		241
+#define __NR_mq_timedsend 	242
+#define __NR_mq_timedreceive	243
+#define __NR_mq_notify 		244
+#define __NR_mq_getsetattr 	245
+#define __NR_kexec_load 	246
+#define __NR_waitid		247
+
+
+#endif /* __AMD64_LINUX_VKI_UNISTD_H_ */
diff --git a/coregrind/amd64/Makefile.am b/coregrind/amd64/Makefile.am
new file mode 100644
index 0000000..2c39310
--- /dev/null
+++ b/coregrind/amd64/Makefile.am
@@ -0,0 +1,33 @@
+include $(top_srcdir)/Makefile.all.am
+include $(top_srcdir)/Makefile.core-AM_CPPFLAGS.am
+
+AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+
+noinst_HEADERS = \
+	core_arch.h \
+	core_arch_asm.h \
+	amd64_private.h \
+	amd64_private_asm.h
+
+noinst_LIBRARIES = libarch.a
+
+EXTRA_DIST = \
+	jmp_with_stack.c \
+	libpthread.c
+
+BUILT_SOURCES = stage2.lds
+CLEANFILES = stage2.lds
+
+libarch_a_SOURCES = \
+	cpuid.S \
+	helpers.S \
+	dispatch.S \
+	signals.c \
+	state.c
+
+# Extract ld's default linker script and hack it to our needs
+stage2.lds: Makefile
+	$(CC) -Wl,--verbose -nostdlib 2>&1 | sed \
+		-e '1,/^=====\+$$/d' \
+		-e '/^=====\+$$/d' \
+		-e 's/0x00400000/kickstart_base/g' > $@ || rm -f $@
diff --git a/coregrind/amd64/amd64_private.h b/coregrind/amd64/amd64_private.h
new file mode 100644
index 0000000..da1714c
--- /dev/null
+++ b/coregrind/amd64/amd64_private.h
@@ -0,0 +1,53 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Private arch-specific header.          amd64/amd64_private.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_PRIVATE_H
+#define __AMD64_PRIVATE_H
+
+#include "core_arch_asm.h"       // arch-specific asm  stuff
+#include "amd64_private_asm.h"   // private arch-specific asm stuff
+#include "tool_arch.h"           // arch-specific tool stuff
+
+/* ---------------------------------------------------------------------
+   Exports of vg_ldt.c
+   ------------------------------------------------------------------ */
+
+#if 0
+/* Alloc & copy, and dealloc. */
+extern VgLdtEntry* VG_(allocate_LDT_for_thread)   ( VgLdtEntry* parent_ldt );
+extern void        VG_(deallocate_LDT_for_thread) ( VgLdtEntry* ldt );
+extern void        VG_(clear_TLS_for_thread)      ( VgLdtEntry* tls );
+#endif
+
+#endif   // __AMD64_PRIVATE_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/amd64/amd64_private_asm.h b/coregrind/amd64/amd64_private_asm.h
new file mode 100644
index 0000000..4649982
--- /dev/null
+++ b/coregrind/amd64/amd64_private_asm.h
@@ -0,0 +1,43 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Private arch-specific asm stuff.   amd64/amd64_private_asm.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_PRIVATE_ASM_H
+#define __AMD64_PRIVATE_ASM_H
+
+#if 0
+/* Client address space segment limit descriptor entry */
+#define VG_POINTERCHECK_SEGIDX  1
+#endif
+
+#endif   // __AMD64_PRIVATE_ASM_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/amd64/core_arch.h b/coregrind/amd64/core_arch.h
new file mode 100644
index 0000000..76f38c6
--- /dev/null
+++ b/coregrind/amd64/core_arch.h
@@ -0,0 +1,175 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Arch-specific stuff for the core.          amd64/core_arch.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_CORE_ARCH_H
+#define __AMD64_CORE_ARCH_H
+
+#include "core_arch_asm.h"    // arch-specific asm  stuff
+#include "tool_arch.h"        // arch-specific tool stuff
+
+#include "libvex_guest_amd64.h"
+
+/* ---------------------------------------------------------------------
+   Interesting registers
+   ------------------------------------------------------------------ */
+
+// Vex field names
+#define ARCH_INSTR_PTR        guest_RIP
+#define ARCH_STACK_PTR        guest_RSP
+#define ARCH_FRAME_PTR        guest_RBP
+
+#define ARCH_CLREQ_ARGS       guest_RAX
+#define ARCH_CLREQ_RET        guest_RDX
+#define ARCH_PTHREQ_RET       guest_RDX
+
+// Register numbers, for vg_symtab2.c
+#define R_STACK_PTR           4
+#define R_FRAME_PTR           5
+
+// Stack frame layout and linkage
+#define FIRST_STACK_FRAME(rbp)         (rbp)
+#define STACK_FRAME_RET(rbp)           (((UWord*)rbp)[1])
+#define STACK_FRAME_NEXT(rbp)          (((UWord*)rbp)[0])
+
+// Get stack pointer and frame pointer
+#define ARCH_GET_REAL_STACK_PTR(esp) do {   \
+   I_die_here; \
+} while (0)
+
+#define ARCH_GET_REAL_FRAME_PTR(ebp) do {   \
+   I_die_here; \
+} while (0)
+
+/* ---------------------------------------------------------------------
+   Elf stuff
+   ------------------------------------------------------------------ */
+
+#define VG_ELF_ENDIANNESS     ELFDATA2LSB
+#define VG_ELF_MACHINE        EM_X86_64
+#define VG_ELF_CLASS          ELFCLASS64
+
+/* ---------------------------------------------------------------------
+   LDT type             
+   ------------------------------------------------------------------ */
+
+// XXX: eventually this will be x86-private, not seen by the core(?)
+
+/* This is the hardware-format for a segment descriptor, ie what the
+   x86 actually deals with.  It is 8 bytes long.  It's ugly.  */
+
+typedef struct _LDT_ENTRY {
+    union {
+       struct {
+          UShort      LimitLow;
+          UShort      BaseLow;
+          unsigned    BaseMid         : 8;
+          unsigned    Type            : 5;
+          unsigned    Dpl             : 2;
+          unsigned    Pres            : 1;
+          unsigned    LimitHi         : 4;
+          unsigned    Sys             : 1;
+          unsigned    Reserved_0      : 1;
+          unsigned    Default_Big     : 1;
+          unsigned    Granularity     : 1;
+          unsigned    BaseHi          : 8;
+       } Bits;
+       struct {
+          UInt word1;
+          UInt word2;
+       } Words;
+    } 
+    LdtEnt;
+} VgLdtEntry;
+
+/* ---------------------------------------------------------------------
+   Architecture-specific part of a ThreadState
+   ------------------------------------------------------------------ */
+
+// Architecture-specific part of a ThreadState
+// XXX: eventually this should be made abstract, ie. the fields not visible
+//      to the core...  then VgLdtEntry can be made non-visible to the core
+//      also.
+typedef 
+   struct {
+      /* Pointer to this thread's Local (Segment) Descriptor Table.
+         Starts out as NULL, indicating there is no table, and we hope
+         to keep it that way.  If the thread does __NR_modify_ldt to
+         create entries, we allocate a 8192-entry table at that point.
+         This is a straight copy of the Linux kernel's scheme.  Don't
+         forget to deallocate this at thread exit. */
+      VgLdtEntry* ldt;
+
+      /* TLS table. This consists of a small number (currently 3) of
+         entries from the Global Descriptor Table. */
+      VgLdtEntry tls[VKI_GDT_ENTRY_TLS_ENTRIES];
+
+      /* --- BEGIN vex-mandated guest state --- */
+
+      /* Saved machine context. */
+      VexGuestAMD64State vex;
+
+      /* Saved shadow context. */
+      VexGuestAMD64State vex_shadow;
+
+      /* Spill area. */
+      UChar vex_spill[LibVEX_N_SPILL_BYTES];
+
+      /* --- END vex-mandated guest state --- */
+   } 
+   ThreadArchState;
+
+typedef VexGuestAMD64State VexGuestArchState;
+
+/* ---------------------------------------------------------------------
+   libpthread stuff
+   ------------------------------------------------------------------ */
+
+// ToDo XXX???  not at all sure about this...
+struct _ThreadArchAux {
+   void*         tls_data;
+   int           tls_segment;
+   unsigned long sysinfo;
+};
+
+/* ---------------------------------------------------------------------
+   Miscellaneous constants
+   ------------------------------------------------------------------ */
+
+// Valgrind's signal stack size, in words.
+#define VG_SIGSTACK_SIZE_W    10000
+
+// Base address of client address space.
+#define CLIENT_BASE	0x00000000ul
+
+#endif   // __AMD64_CORE_ARCH_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/amd64/core_arch_asm.h b/coregrind/amd64/core_arch_asm.h
new file mode 100644
index 0000000..a00b011
--- /dev/null
+++ b/coregrind/amd64/core_arch_asm.h
@@ -0,0 +1,37 @@
+/*--------------------------------------------------------------------*/
+/*---                                        amd64/core_arch_asm.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_CORE_ARCH_ASM_H
+#define __AMD64_CORE_ARCH_ASM_H
+
+#endif   // __AMD64_CORE_ARCH_ASM_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/amd64/cpuid.S b/coregrind/amd64/cpuid.S
new file mode 100644
index 0000000..cc88344
--- /dev/null
+++ b/coregrind/amd64/cpuid.S
@@ -0,0 +1,50 @@
+
+##--------------------------------------------------------------------##
+##--- Support for determining CPU characteristics.   amd64/cpuid.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an extensible x86 protected-mode
+  emulator for monitoring program execution on x86-Unixes.
+
+  Copyright (C) 2000-2004 Julian Seward 
+     jseward@acm.org
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#if 0
+
+#include "core_asm.h"
+
+/*
+    int VG_(cpuid)(UInt eax,
+                   UInt *eax_ret, UInt *ebx_ret, UInt *ecx_ret, UInt *edx_ret)
+ */
+.globl VG_(cpuid)
+VG_(cpuid):
+    ud2
+        
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+#endif /* 0 */
+
+##--------------------------------------------------------------------##
+##--- end                                    	                   ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/amd64/dispatch.S b/coregrind/amd64/dispatch.S
new file mode 100644
index 0000000..44c8d93
--- /dev/null
+++ b/coregrind/amd64/dispatch.S
@@ -0,0 +1,185 @@
+
+##--------------------------------------------------------------------##
+##--- The core dispatch loop, for jumping to a code address.       ---##
+##---                                             amd64/dispatch.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an extensible x86 protected-mode
+  emulator for monitoring program execution on x86-Unixes.
+
+  Copyright (C) 2000-2004 Julian Seward 
+     jseward@acm.org
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "core_asm.h"
+#include "amd64_private_asm.h"
+
+/*------------------------------------------------------------*/
+/*--- The normal-case dispatch machinery.                  ---*/
+/*------------------------------------------------------------*/
+	
+/* To transfer to an (original) code address, load it into %eax and
+   jump to vg_dispatch.  This fragment of code tries to find the
+   address of the corresponding translation by searching the translation
+   table.   If it fails, a new translation is made, added to the
+   translation table, and then jumped to.  Almost all the hard
+   work is done by C routines; this code simply handles the
+   common case fast -- when the translation address is found in
+   the translation cache.
+
+   At entry, %eax is the only live (real-machine) register; the
+   entire simulated state is tidily saved in vg_m_state.  
+*/
+
+.globl VG_(run_innerloop)
+VG_(run_innerloop):
+    ud2
+
+#if 0	
+#define TT_LOOKUP(reg, fail)				\
+	movl %eax, reg;					\
+	andl $VG_TT_FAST_MASK, reg;			\
+	movl VG_(tt_fast)(,reg,4), reg;			\
+	cmpl %eax, (reg);				\
+	jnz  fail
+	
+/* The C world needs a way to get started simulating.  So we provide
+   a function void vg_run_innerloop ( void ), which starts running
+   from vg_m_eip, and exits when the counter reaches zero.  This loop
+   can also exit if vg_oursignalhandler() catches a non-resumable
+   signal, for example SIGSEGV.  It then longjmp()s back past here.
+*/
+
+/* signature: UInt VG_(run_innerloop) ( void* guest_state ) */
+
+.globl VG_(run_innerloop)
+VG_(run_innerloop):
+	/* 4(%esp) holds guest_state */
+	
+	/* ----- entry point to VG_(run_innerloop) ----- */
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+	
+	/* 28(%esp) holds guest_state */
+
+	/* Set up the guest state pointer */
+	movl	28(%esp), %ebp
+	
+	/* fetch m_eip into %eax */
+	movl	VG_(instr_ptr_offset), %esi
+	movl	(%ebp, %esi, 1), %eax
+	
+	/* fall into main loop */
+
+dispatch_boring:
+	/* save the jump address in the guest state */
+	movl	VG_(instr_ptr_offset), %esi
+	movl	%eax, (%ebp, %esi, 1)
+
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
+	subl $1, VG_(dispatch_ctr)
+	
+	jz	counter_is_zero
+	/* try a fast lookup in the translation cache */
+	TT_LOOKUP(%ebx, fast_lookup_failed)
+
+	/* Found a match.  Call the tce.payload field (+VG_CODE_OFFSET) */
+	addl	$VG_CODE_OFFSET, %ebx
+	call	*%ebx
+	
+	/* 
+	   %eax holds destination (original) address.
+	   %ebp indicates further details of the control transfer
+	   requested to the address in %eax.
+	
+	   If ebp is unchanged (== * 28(%esp)), just jump next to %eax.
+	 
+	   If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
+	   continuing at eax.
+	
+	   If ebp == VG_EBP_JMP_CLIENTREQ, do a client request before 
+	   continuing at eax.
+	
+	   If %ebp has any other value, we panic.
+	*/
+
+	cmpl	28(%esp), %ebp
+	jz	dispatch_boring
+
+	jmp	dispatch_exceptional
+
+	
+fast_lookup_failed:
+	/* %EIP is up to date here since dispatch_boring dominates */
+	addl	$1, VG_(dispatch_ctr)
+	movl	$VG_TRC_INNER_FASTMISS, %eax
+	jmp	run_innerloop_exit
+
+counter_is_zero:
+	/* %EIP is up to date here since dispatch_boring dominates */
+	addl	$1, VG_(dispatch_ctr)
+	movl	$VG_TRC_INNER_COUNTERZERO, %eax
+	jmp	run_innerloop_exit
+	
+run_innerloop_exit:
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	ret	
+
+
+
+/* Other ways of getting out of the inner loop.  Placed out-of-line to
+   make it look cleaner. 
+*/
+dispatch_exceptional:
+	/* this is jumped to only, not fallen-through from above */
+	cmpl	$VG_TRC_INNER_COUNTERZERO, %ebp
+	jz	counter_is_zero
+
+	/* save %eax in %EIP and defer to sched */
+	movl	VG_(instr_ptr_offset), %esi
+	movl	28(%esp), %edi
+	movl	%eax, (%edi, %esi, 1)
+	movl	%ebp, %eax
+	jmp	run_innerloop_exit
+
+	
+.data
+panic_msg_ebp:
+.ascii	"vg_dispatch: %ebp has invalid value!"
+.byte	0
+.text	
+        
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+#endif
+
+##--------------------------------------------------------------------##
+##--- end                                                          ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/amd64/helpers.S b/coregrind/amd64/helpers.S
new file mode 100644
index 0000000..c2c83ab
--- /dev/null
+++ b/coregrind/amd64/helpers.S
@@ -0,0 +1,93 @@
+##--------------------------------------------------------------------##
+##--- Support routines for the JITter output.      amd64/helpers.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an extensible x86 protected-mode
+  emulator for monitoring program execution on x86-Unixes.
+
+  Copyright (C) 2000-2004 Julian Seward 
+     jseward@acm.org
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#if 0
+#include "core_asm.h"
+
+/* ------------------ SIMULATED CPU HELPERS ------------------ */
+/* A stubs for a return which we want to catch: a signal return.
+   returns and pthread returns.  In the latter case, the thread's
+   return value is in %EAX, so we pass this as the first argument
+   to the request.  In both cases we use the user request mechanism.
+   You need to to read the definition of VALGRIND_MAGIC_SEQUENCE
+   in valgrind.h to make sense of this.
+
+   This isn't used in-place.  It is copied into the client address space
+   at an arbitary address.  Therefore, this code must be completely
+   position-independent.
+*/
+.global VG_(trampoline_code_start)
+.global VG_(trampoline_code_length)
+.global VG_(tramp_sigreturn_offset)
+.global VG_(tramp_syscall_offset)
+	
+VG_(trampoline_code_start):
+sigreturn_start:	
+	subl	$20, %esp	# allocate arg block
+	movl	%esp, %edx	# %edx == &_zzq_args[0]
+	movl	$VG_USERREQ__SIGNAL_RETURNS, 0(%edx)	# request
+	movl	$0, 4(%edx)	# arg1
+	movl	$0, 8(%edx)	# arg2
+	movl	$0, 12(%edx)	# arg3
+	movl	$0, 16(%edx)	# arg4
+	movl	%edx, %eax
+	# and now the magic sequence itself:
+	roll $29, %eax
+	roll $3, %eax
+	rorl $27, %eax
+	rorl $5, %eax
+	roll $13, %eax
+	roll $19, %eax
+	# should never get here
+	ud2
+
+	# We can point our sysinfo stuff here
+	.align 16
+syscall_start:	
+	int	$0x80
+	ret
+tramp_code_end:
+			
+.data
+VG_(trampoline_code_length):
+	.long tramp_code_end - VG_(trampoline_code_start)
+VG_(tramp_sigreturn_offset):
+	.long sigreturn_start - VG_(trampoline_code_start)
+VG_(tramp_syscall_offset):
+	.long syscall_start - VG_(trampoline_code_start)
+.text
+
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+#endif
+		
+##--------------------------------------------------------------------##
+##--- end                                                          ---##
+##--------------------------------------------------------------------##
diff --git a/coregrind/amd64/jmp_with_stack.c b/coregrind/amd64/jmp_with_stack.c
new file mode 100644
index 0000000..b2f82f2
--- /dev/null
+++ b/coregrind/amd64/jmp_with_stack.c
@@ -0,0 +1,58 @@
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "ume.h"
+
+/* 
+   Jump to a particular IP with a particular SP.  This is intended
+   to simulate the initial CPU state when the kernel starts an program
+   after exec; it therefore also clears all the other registers.
+ */
+void jmp_with_stack(Addr eip, Addr esp)
+{
+   // XXX: temporary only
+extern int printf (__const char *__restrict __format, ...);
+extern void exit (int __status);
+   printf("jmp_with_stack: argh\n");
+   exit(1);
+#if 0
+   asm volatile ("movl %1, %%esp;"	/* set esp */
+		 "pushl %%eax;"		/* push esp */
+		 "xorl	%%eax,%%eax;"	/* clear registers */
+		 "xorl	%%ebx,%%ebx;"
+		 "xorl	%%ecx,%%ecx;"
+		 "xorl	%%edx,%%edx;"
+		 "xorl	%%esi,%%esi;"
+		 "xorl	%%edi,%%edi;"
+		 "xorl	%%ebp,%%ebp;"
+
+		 "ret"			/* return into entry */
+		 : : "a" (eip), "r" (esp));
+   /* we should never get here */
+   for(;;)
+	   asm volatile("ud2");
+#endif
+} 
diff --git a/coregrind/amd64/libpthread.c b/coregrind/amd64/libpthread.c
new file mode 100644
index 0000000..673b2b8
--- /dev/null
+++ b/coregrind/amd64/libpthread.c
@@ -0,0 +1,230 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Arch-specific libpthread code.            amd64/libpthread.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* ALL THIS CODE RUNS ON THE SIMULATED CPU.
+
+   See the comments at the top of coregrind/vg_libpthread.c for some
+   caveats.
+*/
+
+#if 0
+#include "core.h"        /* For the VG_USERREQ__* constants */
+
+#define __USE_UNIX98
+#include <pthread.h>
+#undef __USE_UNIX98
+
+#define __USE_GNU
+#include <dlfcn.h>
+#undef __USE_GNU
+
+#include <errno.h>
+
+// Struct used to describe a TDB header, copied from glibc.
+typedef
+   struct {
+      void *tcb;
+      void *dtv;
+      void *self;                
+      int multiple_threads;
+      unsigned long sysinfo;
+   }
+   tcbhead_t;
+
+/* --------------------------------------------------- 
+   Helper functions for running a thread 
+   and for clearing up afterwards.
+   ------------------------------------------------ */
+
+typedef void *(*__attribute__ ((stdcall)) REGPARM(3) allocate_tls_t) (void *result);
+typedef void (*__attribute__ ((stdcall)) REGPARM(3) deallocate_tls_t) (void *tcb, int dealloc_tcb);
+
+static allocate_tls_t allocate_tls = NULL;
+static deallocate_tls_t deallocate_tls = NULL;
+
+static int get_gs()
+{
+   int gs;
+   asm volatile ("movw %%gs, %w0" : "=q" (gs));
+   return gs & 0xffff;
+}
+
+static void set_gs( int gs )
+{
+   asm volatile ("movw %w0, %%gs" :: "q" (gs));
+}
+
+static void *get_tcb()
+{
+   void *tcb;
+   asm volatile ("movl %%gs:0, %0" : "=r" (tcb));
+   return tcb;
+}
+
+
+Bool VGA_(has_tls)(void)
+{
+   return (get_gs() & 7) == 3;
+}  
+
+
+void VGA_(thread_create)(ThreadArchAux *aux)
+{
+   if (VGA_(has_tls)()) {
+      tcbhead_t *tcb = get_tcb();
+
+      if (allocate_tls == NULL || deallocate_tls == NULL) {
+         allocate_tls = (allocate_tls_t)dlsym(RTLD_DEFAULT, "_dl_allocate_tls");
+         deallocate_tls = (deallocate_tls_t)dlsym(RTLD_DEFAULT, "_dl_deallocate_tls");
+      }
+
+      my_assert(allocate_tls != NULL);
+      
+      aux->tls_data = allocate_tls(NULL);
+      aux->tls_segment = get_gs() >> 3;
+      aux->sysinfo = tcb->sysinfo;
+
+      tcb->multiple_threads = 1;
+   } else {
+      aux->tls_data = NULL;
+      aux->tls_segment = -1;
+      aux->sysinfo = 0;
+   }
+}
+   
+void VGA_(thread_wrapper)(ThreadArchAux *aux)
+{
+   void*         tls_data;
+   int           tls_segment;
+   unsigned long sysinfo;
+      
+   tls_data    = aux->tls_data;
+   tls_segment = aux->tls_segment;
+   sysinfo     = aux->sysinfo;
+
+   if (tls_data) {
+      tcbhead_t *tcb = tls_data;
+      vki_modify_ldt_t ldt_info;
+
+      /* Fill in the TCB header */
+      tcb->tcb = tcb;
+      tcb->self = tcb;
+      tcb->multiple_threads = 1;
+      tcb->sysinfo = sysinfo;
+      
+      /* Fill in an LDT descriptor */
+      ldt_info.entry_number = tls_segment;
+      ldt_info.base_addr = (unsigned long)tls_data;
+      ldt_info.limit = 0xfffff;
+      ldt_info.seg_32bit = 1;
+      ldt_info.contents = 0;
+      ldt_info.read_exec_only = 0;
+      ldt_info.limit_in_pages = 1;
+      ldt_info.seg_not_present = 0;
+      ldt_info.useable = 1;
+      ldt_info.reserved = 0;
+      
+      /* Install the thread area */
+      VG_(do_syscall)(__NR_set_thread_area, &ldt_info);
+      
+      /* Setup the GS segment register */
+      set_gs(ldt_info.entry_number * 8 + 3);
+   }
+}
+   
+void VGA_(thread_exit)(void)
+{
+   /* Free up any TLS data */
+   if ((get_gs() & 7) == 3 && pthread_self() > 1) {
+      my_assert(deallocate_tls != NULL);
+      deallocate_tls(get_tcb(), 1);
+   }
+}   
+
+/* POSIX spinlocks, taken from glibc linuxthreads/sysdeps/i386 */
+
+typedef volatile int pthread_spinlock_t; /* Huh?  Guarded by __USE_XOPEN2K */
+
+int pthread_spin_init(pthread_spinlock_t *lock, int pshared)
+{
+  /* We can ignore the `pshared' parameter.  Since we are busy-waiting
+     all processes which can access the memory location `lock' points
+     to can use the spinlock.  */
+  *lock = 1;
+  return 0;
+}
+
+int pthread_spin_lock(pthread_spinlock_t *lock)
+{
+  asm volatile
+    ("\n"
+     "1:\n\t"
+     "lock; decl %0\n\t"
+     "js 2f\n\t"
+     ".section .text.spinlock,\"ax\"\n"
+     "2:\n\t"
+     "cmpl $0,%0\n\t"
+     "rep; nop\n\t"
+     "jle 2b\n\t"
+     "jmp 1b\n\t"
+     ".previous"
+     : "=m" (*lock));
+  return 0;
+}
+
+int pthread_spin_unlock(pthread_spinlock_t *lock)
+{
+  asm volatile
+    ("movl $1,%0"
+     : "=m" (*lock));
+  return 0;
+}
+
+int pthread_spin_destroy(pthread_spinlock_t *lock)
+{
+  /* Nothing to do.  */
+  return 0;
+}
+
+int pthread_spin_trylock(pthread_spinlock_t *lock)
+{
+  int oldval;
+
+  asm volatile
+    ("xchgl %0,%1"
+     : "=r" (oldval), "=m" (*lock)
+     : "0" (0));
+  return oldval > 0 ? 0 : EBUSY;
+}
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/amd64/signals.c b/coregrind/amd64/signals.c
new file mode 100644
index 0000000..7fefc92
--- /dev/null
+++ b/coregrind/amd64/signals.c
@@ -0,0 +1,312 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Arch-specific signals stuff.                 amd64/signals.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "core.h"
+
+#include "libvex_guest_amd64.h"
+
+/*------------------------------------------------------------*/
+/*--- Signal frame                                         ---*/
+/*------------------------------------------------------------*/
+
+// A structure in which to save the application's registers
+// during the execution of signal handlers.
+
+#if 0
+typedef
+   struct {
+      /* There are two different stack frame formats, depending on
+	 whether the client set the SA_SIGINFO flag for the handler.
+	 This structure is put onto the client's stack as part of
+	 signal delivery, and therefore appears as the signal
+	 handler's arguments.
+
+	 The first two words are common for both frame formats -
+	 they're the return address and the signal number. */
+
+      /* Sig handler's (bogus) return address */
+      Addr retaddr;
+      /* The arg to the sig handler.  We need to inspect this after
+         the handler returns, but it's unreasonable to assume that the
+         handler won't change it.  So we keep a second copy of it in
+         sigNo_private. */
+      Int  sigNo;
+
+      /* This is where the two frames start differing. */
+      union {
+	 struct {		/* set SA_SIGINFO */
+	    /* ptr to siginfo_t. */
+	    Addr psigInfo;
+
+	    /* ptr to ucontext */
+	    Addr puContext;
+	 } sigInfo;
+	 struct vki_sigcontext sigContext; /* did not set SA_SIGINFO */
+      } handlerArgs;
+
+      /* The rest are private fields which the handler is unaware of. */
+
+      /* Sanity check word. */
+      UInt magicPI;
+      /* pointed to by psigInfo */
+      vki_siginfo_t sigInfo;
+      /* pointed to by puContext */
+      struct vki_ucontext uContext;
+
+      /* Safely-saved version of sigNo, as described above. */
+      Int  sigNo_private;
+
+      /* Saved processor state. */
+      VexGuestX86State vex;
+      VexGuestX86State vex_shadow;
+
+      /* saved signal mask to be restored when handler returns */
+      vki_sigset_t	mask;
+
+      /* Scheduler-private stuff: what was the thread's status prior to
+         delivering this signal? */
+      ThreadStatus status;
+      void* /*pthread_mutex_t* */ associated_mx;
+      void* /*pthread_cond_t* */ associated_cv;
+
+      /* Sanity check word.  Is the highest-addressed word; do not
+         move!*/
+      UInt magicE;
+   }
+   VgSigFrame;
+#endif
+
+/*------------------------------------------------------------*/
+/*--- Signal operations                                    ---*/
+/*------------------------------------------------------------*/
+
+#if 0
+/* Make up a plausible-looking thread state from the thread's current state */
+static void synth_ucontext(ThreadId tid, const vki_siginfo_t *si, 
+			   const vki_sigset_t *set, struct vki_ucontext *uc)
+{
+   ThreadState *tst = VG_(get_ThreadState)(tid);
+   struct vki_sigcontext *sc = &uc->uc_mcontext;
+
+   VG_(memset)(uc, 0, sizeof(*uc));
+
+   uc->uc_flags = 0;
+   uc->uc_link = 0;
+   uc->uc_sigmask = *set;
+   uc->uc_stack = tst->altstack;
+
+#define SC2(reg,REG)  sc->reg = tst->arch.vex.guest_##REG
+   SC2(gs,GS);
+   SC2(fs,FS);
+   SC2(es,ES);
+   SC2(ds,DS);
+
+   SC2(edi,EDI);
+   SC2(esi,ESI);
+   SC2(ebp,EBP);
+   SC2(esp,ESP);
+   SC2(ebx,EBX);
+   SC2(edx,EDX);
+   SC2(ecx,ECX);
+   SC2(eax,EAX);
+
+   SC2(eip,EIP);
+   SC2(cs,CS);
+   sc->eflags = LibVEX_GuestX86_get_eflags(&tst->arch.vex);
+   SC2(ss,SS);
+   /* XXX esp_at_signal */
+   /* XXX trapno */
+   /* XXX err */
+#undef SC2
+
+   sc->cr2 = (UInt)si->_sifields._sigfault._addr;
+}
+
+#define SET_SIGNAL_ESP(zztid, zzval) \
+   SET_THREAD_REG(zztid, zzval, STACK_PTR, post_reg_write, \
+                  Vg_CoreSignal, zztid, O_STACK_PTR, sizeof(Addr))
+#endif
+
+void VGA_(push_signal_frame)(ThreadId tid, Addr esp_top_of_frame,
+                             const vki_siginfo_t *siginfo,
+                             void *handler, UInt flags,
+                             const vki_sigset_t *mask)
+{
+   I_die_here;
+#if 0
+   Addr		esp;
+   ThreadState* tst;
+   VgSigFrame*  frame;
+   Int		sigNo = siginfo->si_signo;
+
+   esp = esp_top_of_frame;
+   esp -= sizeof(VgSigFrame);
+   frame = (VgSigFrame*)esp;
+
+   tst = & VG_(threads)[tid];
+
+   /* For tracking memory events, indicate the entire frame has been
+    * allocated, but pretend that only the first four words are written */
+   VG_TRACK( new_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
+
+   /* Assert that the frame is placed correctly. */
+   vg_assert( (sizeof(VgSigFrame) & 0x3) == 0 );
+   vg_assert( ((Char*)(&frame->magicE)) + sizeof(UInt) 
+              == ((Char*)(esp_top_of_frame)) );
+
+   /* retaddr, sigNo, psigInfo, puContext fields are to be written */
+   VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame", 
+                            (Addr)frame, offsetof(VgSigFrame, handlerArgs) );
+   frame->retaddr    = (UInt)VG_(client_trampoline_code)+VG_(tramp_sigreturn_offset);
+   frame->sigNo      = sigNo;
+   frame->sigNo_private = sigNo;
+   VG_TRACK( post_mem_write, Vg_CoreSignal, tid,
+             (Addr)frame, offsetof(VgSigFrame, handlerArgs) );
+
+   if (flags & VKI_SA_SIGINFO) {
+      /* if the client asked for a siginfo delivery, then build the stack that way */
+      VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame (siginfo)", 
+		(Addr)&frame->handlerArgs, sizeof(frame->handlerArgs.sigInfo) );
+      frame->handlerArgs.sigInfo.psigInfo   = (Addr)&frame->sigInfo;
+      frame->handlerArgs.sigInfo.puContext = (Addr)&frame->uContext;
+      VG_TRACK( post_mem_write, Vg_CoreSignal, tid,
+                (Addr)&frame->handlerArgs, sizeof(frame->handlerArgs.sigInfo) );
+
+      VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame (siginfo)", 
+		(Addr)&frame->sigInfo, sizeof(frame->sigInfo) );
+      VG_(memcpy)(&frame->sigInfo, siginfo, sizeof(vki_siginfo_t));
+      VG_TRACK( post_mem_write, Vg_CoreSignal, tid, 
+                (Addr)&frame->sigInfo, sizeof(frame->sigInfo) );
+
+      VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame (siginfo)", 
+		(Addr)&frame->uContext, sizeof(frame->uContext) );
+      synth_ucontext(tid, siginfo, mask, &frame->uContext);
+      VG_TRACK( post_mem_write, Vg_CoreSignal, tid, 
+                (Addr)&frame->uContext, sizeof(frame->uContext) );
+   } else {
+      struct vki_ucontext uc;
+
+      /* otherwise just put the sigcontext there */
+
+      synth_ucontext(tid, siginfo, mask, &uc);
+
+      VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame (sigcontext)", 
+		(Addr)&frame->handlerArgs, sizeof(frame->handlerArgs.sigContext) );
+      VG_(memcpy)(&frame->handlerArgs.sigContext, &uc.uc_mcontext, 
+		  sizeof(struct vki_sigcontext));
+      VG_TRACK( post_mem_write, Vg_CoreSignal, tid,
+                (Addr)&frame->handlerArgs, sizeof(frame->handlerArgs.sigContext) );
+      
+      frame->handlerArgs.sigContext.oldmask = tst->sig_mask.sig[0];
+   }
+
+   frame->magicPI    = 0x31415927;
+
+   frame->vex        = tst->arch.vex;
+   frame->vex_shadow = tst->arch.vex_shadow;
+
+   frame->mask = tst->sig_mask;
+
+   /* If the thread is currently blocked in a syscall, we want it to
+      resume as runnable. */
+   if (tst->status == VgTs_WaitSys)
+      frame->status = VgTs_Runnable;
+   else
+      frame->status = tst->status;
+ 
+   frame->associated_mx = tst->associated_mx;
+   frame->associated_cv = tst->associated_cv;
+
+   frame->magicE     = 0x27182818;
+
+   /* Ensure 'tid' and 'tst' correspond */
+   vg_assert(& VG_(threads)[tid] == tst);
+   /* Set the thread so it will next run the handler. */
+   /* tst->m_esp  = esp; */
+   SET_SIGNAL_ESP(tid, esp);
+
+   tst->arch.vex.guest_EIP = (Addr) handler;
+   /* This thread needs to be marked runnable, but we leave that the
+      caller to do. */
+
+   if (0)
+      VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p, status=%d\n", 
+		  esp, tst->arch.vex.guest_EIP, tst->status);
+#endif
+}
+
+Int VGA_(pop_signal_frame)(ThreadId tid)
+{
+   I_die_here;
+#if 0
+   Addr          esp;
+   VgSigFrame*   frame;
+   ThreadState*  tst;
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst = & VG_(threads)[tid];
+
+   /* Correctly reestablish the frame base address. */
+   esp   = tst->arch.vex.guest_ESP;
+   frame = (VgSigFrame*)
+              (esp -4 /* because the handler's RET pops the RA */
+                  +20 /* because signalreturn_bogusRA pushes 5 words */);
+
+   vg_assert(frame->magicPI == 0x31415927);
+   vg_assert(frame->magicE  == 0x27182818);
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg, 
+         "vg_pop_signal_frame (thread %d): valid magic; EIP=%p", tid, frame->vex.guest_EIP);
+
+   /* Mark the frame structure as nonaccessible. */
+   VG_TRACK( die_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
+
+   /* restore machine state */
+   tst->arch.vex        = frame->vex;
+   tst->arch.vex_shadow = frame->vex_shadow;
+
+   /* And restore the thread's status to what it was before the signal
+      was delivered. */
+   tst->status    = frame->status;
+
+   tst->associated_mx = frame->associated_mx;
+   tst->associated_cv = frame->associated_cv;
+
+   tst->sig_mask  = frame->mask;
+
+   /* don't use the copy exposed to the handler; it might have changed
+      it. */
+   return frame->sigNo_private; 
+#endif
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/amd64/state.c b/coregrind/amd64/state.c
new file mode 100644
index 0000000..0ddfffa
--- /dev/null
+++ b/coregrind/amd64/state.c
@@ -0,0 +1,293 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Arch-specific registers, etc.                  amd64/state.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "core.h"
+#include "amd64_private.h"
+#include <sys/ptrace.h>
+
+#include "libvex_guest_amd64.h"
+
+/*------------------------------------------------------------*/
+/*--- Initialising the first thread                        ---*/
+/*------------------------------------------------------------*/
+
+/* Given a pointer to the ThreadArchState for thread 1 (the root
+   thread), initialise the VEX guest state, and copy in essential
+   starting values.
+*/
+void VGA_(init_thread1state) ( Addr client_eip, 
+                               Addr esp_at_startup,
+			       /*MOD*/ ThreadArchState* arch )
+{
+   I_die_here;
+#if 0
+   vg_assert(0 == sizeof(VexGuestX86State) % 8);
+
+   /* Zero out the initial state, and set up the simulated FPU in a
+      sane way. */
+   LibVEX_GuestX86_initialise(&arch->vex);
+
+   /* Zero out the shadow area. */
+   VG_(memset)(&arch->vex_shadow, 0, sizeof(VexGuestX86State));
+
+   /* Put essential stuff into the new state. */
+   /* initialise %cs, %ds and %ss to point at the operating systems
+      default code, data and stack segments */
+   arch->vex.guest_ESP = esp_at_startup;
+   arch->vex.guest_EIP = client_eip;
+
+   asm volatile("movw %%cs, %0"
+                :
+                : "m" (arch->vex.guest_CS));
+   asm volatile("movw %%ds, %0"
+                :
+                : "m" (arch->vex.guest_DS));
+   asm volatile("movw %%ss, %0"
+                :
+                : "m" (arch->vex.guest_SS));
+
+   VG_TRACK( post_reg_write, Vg_CoreStartup, /*tid*/1, /*offset*/0,
+             sizeof(VexGuestArchState));
+
+   /* I assume that if we have SSE2 we also have SSE */
+   VG_(have_ssestate) = False;
+   //      VG_(cpu_has_feature)(VG_X86_FEAT_FXSR) &&
+   //   VG_(cpu_has_feature)(VG_X86_FEAT_SSE);
+
+   if (0) {
+      if (VG_(have_ssestate))
+         VG_(printf)("Looks like a SSE-capable CPU\n");
+      else
+         VG_(printf)("Looks like a MMX-only CPU\n");
+   }
+#endif
+}
+
+/*------------------------------------------------------------*/
+/*--- Thread stuff                                         ---*/
+/*------------------------------------------------------------*/
+
+void VGA_(clear_thread)( ThreadArchState *arch )
+{
+   I_die_here;
+#if 0
+   arch->ldt = NULL;
+   VG_(clear_TLS_for_thread)(arch->tls);
+#endif
+}  
+
+void VGA_(cleanup_thread) ( ThreadArchState *arch )
+{  
+   I_die_here;
+#if 0
+   /* Deallocate its LDT, if it ever had one. */
+   VG_(deallocate_LDT_for_thread)( arch->ldt ); 
+   arch->ldt = NULL;
+   
+   /* Clear its TLS array. */
+   VG_(clear_TLS_for_thread)( arch->tls );
+#endif
+}  
+
+void VGA_(setup_child) ( ThreadArchState *arch, ThreadArchState *parent_arch )
+{  
+   I_die_here;
+#if 0
+   /* We inherit our parent's LDT. */
+   if (parent_arch->ldt == NULL) {
+      /* We hope this is the common case. */
+      arch->ldt = NULL;
+   } else {
+      /* No luck .. we have to take a copy of the parent's. */
+      arch->ldt = VG_(allocate_LDT_for_thread)( parent_arch->ldt );
+   }
+
+   /* Initialise the thread's TLS array */
+   VG_(clear_TLS_for_thread)( arch->tls );
+#endif
+}  
+
+void VGA_(set_arg_and_bogus_ret)( ThreadId tid, UWord arg, Addr ret )
+{
+   I_die_here;
+#if 0
+   /* Push the arg, and mark it as readable. */
+   SET_PTHREQ_ESP(tid, VG_(threads)[tid].arch.vex.guest_ESP - sizeof(UWord));
+   * (UInt*)(VG_(threads)[tid].arch.vex.guest_ESP) = arg;
+   VG_TRACK( post_mem_write, Vg_CoreSignal, tid, 
+             VG_(threads)[tid].arch.vex.guest_ESP, sizeof(void*) );
+
+   /* Don't mark the pushed return address as readable; any attempt to read
+      this is an internal valgrind bug since thread_exit_wrapper() should not
+      return. */
+   SET_PTHREQ_ESP(tid, VG_(threads)[tid].arch.vex.guest_ESP - sizeof(UWord));
+   * (UInt*)(VG_(threads)[tid].arch.vex.guest_ESP) = ret;
+#endif
+}
+
+void VGA_(thread_initial_stack)(ThreadId tid, UWord arg, Addr ret)
+{
+   I_die_here;
+#if 0
+   Addr esp = (Addr)STACK_PTR(VG_(threads)[tid].arch);
+
+   /* push two args */
+   esp -= 2 * sizeof(UWord);
+   SET_PTHREQ_ESP(tid, esp);
+   
+   VG_TRACK ( new_mem_stack, esp, 2 * sizeof(UWord) );
+   VG_TRACK ( pre_mem_write, Vg_CorePThread, tid, "new thread: stack",
+                             esp, 2 * sizeof(UWord) );
+
+   /* push arg and (bogus) return address */
+   *(UWord*)(esp+sizeof(UWord)) = arg;
+   *(UWord*)(esp)               = ret;
+
+   VG_TRACK ( post_mem_write, Vg_CoreSignal, tid, esp, 2 * sizeof(UWord) );
+#endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Symtab stuff                                         ---*/
+/*------------------------------------------------------------*/
+
+#if 0
+/* This is the Intel register encoding -- integer regs. */
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+#define R_E8  8
+#define R_E9  9
+#define R_E10 10
+#define R_E11 11
+#define R_E12 12
+#define R_E13 13
+#define R_E14 14
+#define R_E15 15
+#endif
+
+UInt *VGA_(reg_addr_from_tst)(Int regno, ThreadArchState *arch)
+{
+   I_die_here;
+#if 0
+   switch (regno) {
+   case R_RAX: return &arch->vex.guest_RAX;
+   case R_RCX: return &arch->vex.guest_RCX;
+   case R_RDX: return &arch->vex.guest_RDX;
+   case R_RBX: return &arch->vex.guest_RBX;
+   case R_RSP: return &arch->vex.guest_RSP;
+   case R_RBP: return &arch->vex.guest_RBP;
+   case R_RSI: return &arch->vex.guest_RSI;
+   case R_RDI: return &arch->vex.guest_RDI;
+   case R_R8 : return &arch->vex.guest_R8 ;
+   case R_R9 : return &arch->vex.guest_R9 ;
+   case R_R10: return &arch->vex.guest_R10;
+   case R_R11: return &arch->vex.guest_R11;
+   case R_R12: return &arch->vex.guest_R12;
+   case R_R13: return &arch->vex.guest_R13;
+   case R_R14: return &arch->vex.guest_R14;
+   case R_R15: return &arch->vex.guest_R15;
+   default:    return NULL;
+   }
+#endif
+}
+
+/*------------------------------------------------------------*/
+/*--- pointercheck                                         ---*/
+/*------------------------------------------------------------*/
+
+Bool VGA_(setup_pointercheck)(void)
+{
+   I_die_here;
+#if 0
+   vki_modify_ldt_t ldt = { 
+      VG_POINTERCHECK_SEGIDX,    // entry_number
+      VG_(client_base),          // base_addr
+      (VG_(client_end)-VG_(client_base)) / VKI_PAGE_SIZE, // limit
+      1,                         // seg_32bit
+      0,                         // contents: data, RW, non-expanding
+      0,                         // ! read_exec_only
+      1,                         // limit_in_pages
+      0,                         // ! seg not present
+      1,                         // useable
+   };
+   int ret = VG_(do_syscall)(__NR_modify_ldt, 1, &ldt, sizeof(ldt));
+   if (ret < 0) {
+      VG_(message)(Vg_UserMsg,
+                   "Warning: ignoring --pointercheck=yes, "
+                   "because modify_ldt failed (errno=%d)", -ret);
+      return False;
+   } else {
+      return True;
+   }
+#endif
+}
+
+/*------------------------------------------------------------*/
+/*--- Debugger-related operations                          ---*/
+/*------------------------------------------------------------*/
+
+Int VGA_(ptrace_setregs_from_tst)(Int pid, ThreadArchState* arch)
+{
+   I_die_here;
+#if 0
+   struct vki_user_regs_struct regs;
+
+   regs.cs     = arch->vex.guest_CS;
+   regs.ss     = arch->vex.guest_SS;
+   regs.ds     = arch->vex.guest_DS;
+   regs.es     = arch->vex.guest_ES;
+   regs.fs     = arch->vex.guest_FS;
+   regs.gs     = arch->vex.guest_GS;
+   regs.eax    = arch->vex.guest_EAX;
+   regs.ebx    = arch->vex.guest_EBX;
+   regs.ecx    = arch->vex.guest_ECX;
+   regs.edx    = arch->vex.guest_EDX;
+   regs.esi    = arch->vex.guest_ESI;
+   regs.edi    = arch->vex.guest_EDI;
+   regs.ebp    = arch->vex.guest_EBP;
+   regs.esp    = arch->vex.guest_ESP;
+   regs.eflags = LibVEX_GuestX86_get_eflags(&arch->vex);
+   regs.eip    = arch->vex.guest_EIP;
+
+   return ptrace(PTRACE_SETREGS, pid, NULL, &regs);
+#endif
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/coregrind/arm-linux/core_platform.h b/coregrind/arm-linux/core_platform.h
index 43a0929..33da12b 100644
--- a/coregrind/arm-linux/core_platform.h
+++ b/coregrind/arm-linux/core_platform.h
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- ARM-Linux-specific stuff for the core.                       ---*/
+/*--- Platform-specific stuff for the core.                        ---*/
 /*---                                    arm-linux/core_platform.h ---*/
 /*--------------------------------------------------------------------*/
 
@@ -73,7 +73,7 @@
 
 #define PLATFORM_DO_MMAP(ret, start, length, prot, flags, fd, offset) { \
    I_die_here; \
-}
+} while (0)
 
 #define PLATFORM_GET_MMAP_ARGS(tst, a1, a2, a3, a4, a5, a6) do { \
    I_die_here; \
diff --git a/coregrind/arm-linux/syscalls.c b/coregrind/arm-linux/syscalls.c
index f2c4601..3c18e62 100644
--- a/coregrind/arm-linux/syscalls.c
+++ b/coregrind/arm-linux/syscalls.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- ARM/Linux-specific syscalls, etc.       arm-linux/syscalls.c ---*/
+/*--- Platform-specific syscalls stuff.       arm-linux/syscalls.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
@@ -123,6 +123,15 @@
 #endif
 }
 
+PRE(sys_ipc, Special)
+{
+   // XXX: the situation is complicated by the fact that ARM's ipc
+   // super-syscall, which encompasses shmdt, shmat, getsem, etc, seems to
+   // be the same (or at least similar?) to x86's, and so we want to avoid
+   // duplicating the x86 wrapper here, since it's so big...
+   I_die_here;
+}
+
 #undef PRE
 #undef POST
 
@@ -130,23 +139,23 @@
    The ARM/Linux syscall table
    ------------------------------------------------------------------ */
 
-// Macros for adding ARM/Linux-specific wrappers to the syscall table.
-#define PLAX_(const, name)    SYS_WRAPPER_ENTRY_X_(arm_linux, const, name) 
-#define PLAXY(const, name)    SYS_WRAPPER_ENTRY_XY(arm_linux, const, name) 
+// Macros for adding ARM/Linux-specific wrappers to the syscall table.  Note
+// that ARM syscall numbers start at __NR_SYSCALL_BASE.
+#define PLAX_(const, name) \
+   SYS_WRAPPER_ENTRY_X_(arm_linux, const - __NR_SYSCALL_BASE, name) 
+#define PLAXY(const, name) \
+   SYS_WRAPPER_ENTRY_XY(arm_linux, const - __NR_SYSCALL_BASE, name) 
 
 // This table maps from __NR_xxx syscall numbers (from
 // linux/include/asm-arm/unistd.h) to the appropriate PRE/POST sys_foo()
 // wrappers on ARM (as per sys_call_table in linux/arch/arm/kernel/entry.S).
-//
-// For those syscalls not handled by Valgrind, the annotation indicate its
-// arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
-// (unknown).
 
 const struct SyscallTableEntry VGA_(syscall_table)[] = {
    //   (restart_syscall)                             // 0
    GENX_(__NR_exit,              sys_exit),           // 1
    LINX_(__NR_mount,             sys_mount),          // 21
    PLAX_(__NR_syscall,           sys_syscall),        // 113
+   PLAXY(__NR_ipc,               sys_ipc),            // 117
    PLAX_(__NR_clone,             sys_clone),          // 120
 };
 
diff --git a/coregrind/arm/core_arch.h b/coregrind/arm/core_arch.h
index ee0124c..e8405a4 100644
--- a/coregrind/arm/core_arch.h
+++ b/coregrind/arm/core_arch.h
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- ARM-specific stuff for the core.             arm/core_arch.h ---*/
+/*--- Arch-specific stuff for the core.            arm/core_arch.h ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/coregrind/arm/signals.c b/coregrind/arm/signals.c
index 27b4837..61a8bfe 100644
--- a/coregrind/arm/signals.c
+++ b/coregrind/arm/signals.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- ARM signals, etc.                              arm/signals.c ---*/
+/*--- Arch-specific signals stuff.                   arm/signals.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/coregrind/arm/state.c b/coregrind/arm/state.c
index 70a34f7..82c3272 100644
--- a/coregrind/arm/state.c
+++ b/coregrind/arm/state.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- x86 registers, etc.                              x86/state.c ---*/
+/*--- Arch-specific registers, etc.                    x86/state.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/coregrind/core.h b/coregrind/core.h
index 9af2d7f..235d79b 100644
--- a/coregrind/core.h
+++ b/coregrind/core.h
@@ -1230,7 +1230,7 @@
             __attribute__((__noreturn__));
 
 /* Something of a function looking for a home ... start up debugger. */
-extern void VG_(start_debugger) ( Int tid );
+extern void VG_(start_debugger) ( ThreadId tid );
 
 /* Counts downwards in vg_run_innerloop. */
 extern UInt VG_(dispatch_ctr);
@@ -1316,7 +1316,7 @@
    ------------------------------------------------------------------ */
 
 /* Issue a syscall for thread tid */
-extern Int  VG_(sys_issue)(int tid);
+extern Int  VG_(sys_issue)(ThreadId tid);
 
 extern void VG_(proxy_init)     ( void );
 extern void VG_(proxy_create)   ( ThreadId tid );
@@ -1369,7 +1369,7 @@
 // Return true if we're allowed to use or create this fd.
 Bool VG_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool soft);
 
-void VG_(record_fd_open)(Int tid, Int fd, char *pathname);
+void VG_(record_fd_open)(ThreadId tid, Int fd, char *pathname);
    
 // Flags describing syscall wrappers
 #define Special    (1 << 0)
diff --git a/coregrind/ume.c b/coregrind/ume.c
index 4a1362d..0035724 100644
--- a/coregrind/ume.c
+++ b/coregrind/ume.c
@@ -103,7 +103,7 @@
 
    while(bufptr && bufptr < buf+ret) {
       char perm[5];
-      off_t offset;
+      ULong offset;
       int maj, min;
       int ino;
       void *segstart, *segend;
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index 3c002f6..3e17a5a 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -211,7 +211,7 @@
    poke around and look at parameters, memory, etc.  You can't
    meaningfully get the debugger to continue the program, though; to
    continue, quit the debugger.  */
-void VG_(start_debugger) ( Int tid )
+void VG_(start_debugger) ( ThreadId tid )
 {
    Int pid;
 
@@ -1110,6 +1110,8 @@
    *auxv = *orig_auxv;
    vg_assert(auxv->a_type == AT_NULL);
 
+// XXX: what architectures is this necessary for?  x86 yes, PPC no, others ?
+// Perhaps a per-arch VGA_NEEDS_TRAMPOLINE constant is necessary?
 #ifdef __x86__
    /* --- trampoline page --- */
    VG_(memcpy)( (void *)VG_(client_trampoline_code),
@@ -1671,6 +1673,7 @@
      config_error("Please use absolute paths in "
                   "./configure --prefix=... or --libdir=...");
 
+// XXX: what architectures is this necessary for?  x86 yes, PPC no, others ?
 #ifdef __x86__
    {
       Int *auxp;
diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c
index 9f47407..11ee814 100644
--- a/coregrind/vg_mylibc.c
+++ b/coregrind/vg_mylibc.c
@@ -195,7 +195,7 @@
 }
 
 
-Int VG_(tkill)( Int tid, Int signo )
+Int VG_(tkill)( ThreadId tid, Int signo )
 {
    Int ret = -VKI_ENOSYS;
 
@@ -216,8 +216,16 @@
 
 Int VG_(sigpending) ( vki_sigset_t* set )
 {
+// Nb: AMD64/Linux doesn't have __NR_sigpending;  it only provides
+// __NR_rt_sigpending.  This function will have to be abstracted in some
+// way to account for this.  In the meantime, the easy option is to forget
+// about it for AMD64 until it's needed.
+#ifdef __amd64__
+   I_die_here;
+#else
    Int res = VG_(do_syscall)(__NR_sigpending, (UWord)set);
    return VG_(is_kerror)(res) ? -1 : 0;
+#endif
 }
 
 Int VG_(waitpid)(Int pid, Int *status, Int options)
@@ -1314,7 +1322,7 @@
    buffer if buf==NULL, because we don't want Linux calling malloc() */
 Char* VG_(getcwd) ( Char* buf, SizeT size )
 {
-   Int res;
+   Word res;
    vg_assert(buf != NULL);
    res = VG_(do_syscall)(__NR_getcwd, (UWord)buf, size);
    return VG_(is_kerror)(res) ? ((Char*)NULL) : (Char*)res;
@@ -1936,6 +1944,11 @@
 static
 Int my_socket ( Int domain, Int type, Int protocol )
 {
+// AMD64/Linux doesn't define __NR_socketcall... see comment above
+// VG_(sigpending)() for more details.
+#ifdef __amd64__
+   I_die_here;
+#else
    Int res;
    UWord args[3];
    args[0] = domain;
@@ -1945,12 +1958,18 @@
    if (VG_(is_kerror)(res)) 
       res = -1;
    return res;
+#endif
 }
 
 static
 Int my_connect ( Int sockfd, struct vki_sockaddr_in* serv_addr, 
                  Int addrlen )
 {
+// AMD64/Linux doesn't define __NR_socketcall... see comment above
+// VG_(sigpending)() for more details.
+#ifdef __amd64__
+   I_die_here;
+#else
    Int res;
    UWord args[3];
    args[0] = sockfd;
@@ -1960,10 +1979,16 @@
    if (VG_(is_kerror)(res)) 
       res = -1;
    return res;
+#endif
 }
 
 Int VG_(write_socket)( Int sd, void *msg, Int count )
 {
+// AMD64/Linux doesn't define __NR_socketcall... see comment above
+// VG_(sigpending)() for more details.
+#ifdef __amd64__
+   I_die_here;
+#else
    /* This is actually send(). */
 
    /* Requests not to send SIGPIPE on errors on stream oriented
@@ -1981,10 +2006,16 @@
    if (VG_(is_kerror)(res)) 
       res = -1;
    return res;
+#endif
 }
 
 Int VG_(getsockname) ( Int sd, struct vki_sockaddr *name, Int *namelen)
 {
+// AMD64/Linux doesn't define __NR_socketcall... see comment above
+// VG_(sigpending)() for more details.
+#ifdef __amd64__
+   I_die_here;
+#else
    Int res;
    UWord args[3];
    args[0] = sd;
@@ -1994,10 +2025,16 @@
    if(VG_(is_kerror)(res))
       res = -1;
    return res;
+#endif
 }
 
 Int VG_(getpeername) ( Int sd, struct vki_sockaddr *name, Int *namelen)
 {
+// AMD64/Linux doesn't define __NR_socketcall... see comment above
+// VG_(sigpending)() for more details.
+#ifdef __amd64__
+   I_die_here;
+#else
    Int res;
    UWord args[3];
    args[0] = sd;
@@ -2007,11 +2044,17 @@
    if(VG_(is_kerror)(res))
       res = -1;
    return res;
+#endif
 }
 
 Int VG_(getsockopt) ( Int sd, Int level, Int optname, void *optval,
                       Int *optlen)
 {
+// AMD64/Linux doesn't define __NR_socketcall... see comment above
+// VG_(sigpending)() for more details.
+#ifdef __amd64__
+   I_die_here;
+#else
    Int res;
    UWord args[5];
    args[0] = sd;
@@ -2023,9 +2066,9 @@
    if(VG_(is_kerror)(res))
       res = -1;
    return res;
+#endif
 }
 
-
 /*--------------------------------------------------------------------*/
-/*--- end                                              vg_mylibc.c ---*/
+/*--- end                                                          ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_proxylwp.c b/coregrind/vg_proxylwp.c
index f3b235f..6ef4bf6 100644
--- a/coregrind/vg_proxylwp.c
+++ b/coregrind/vg_proxylwp.c
@@ -1215,7 +1215,7 @@
 }
 
 /* Issue a syscall to the thread's ProxyLWP */
-Int VG_(sys_issue)(int tid)
+Int VG_(sys_issue)(ThreadId tid)
 {
    ThreadState *tst = VG_(get_ThreadState)(tid);
    ProxyLWP *proxy = tst->proxy;
@@ -1249,7 +1249,7 @@
    res = VG_(write)(proxy->topx, &req, sizeof(req));
 
    if (res != sizeof(req)) {
-      VG_(message)(Vg_DebugMsg, "sys_issue: write to tid %d failed %d (not %d)\n",
+      VG_(message)(Vg_DebugMsg, "sys_issue: write to tid %u failed %d (not %d)\n",
 		   tid, res, sizeof(req));
    }
    return 0;
@@ -1258,7 +1258,7 @@
 /* Relatively expensive sanity tests for the syscall machinery */
 void VG_(sanity_check_proxy)(void)
 {
-   Int tid;
+   ThreadId tid;
    Bool sane = True;
    static const struct PX_Request req = { .request = PX_Ping };
 
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index 5bde927..9097888 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -465,7 +465,7 @@
        VG_(threads)[tid].associated_mx->__vg_m_count == 0) {
       vg_pthread_mutex_t* mutex = VG_(threads)[tid].associated_mx;
       mutex->__vg_m_count = 1;
-      mutex->__vg_m_owner = (/*_pthread_descr*/void*)tid;
+      mutex->__vg_m_owner = (/*_pthread_descr*/void*)(ULong)tid;
       VG_(threads)[tid].status        = VgTs_Runnable;
       VG_(threads)[tid].associated_mx = NULL;
       /* m_edx already holds pth_mx_lock() success (0) */
@@ -1227,7 +1227,7 @@
          VG_(threads)[tid].status        = VgTs_Runnable;
          VG_(threads)[tid].associated_cv = NULL;
          VG_(threads)[tid].associated_mx = NULL;
-         mx->__vg_m_owner = (/*_pthread_descr*/void*)tid;
+         mx->__vg_m_owner = (/*_pthread_descr*/void*)(ULong)tid;
          mx->__vg_m_count = 1;
          /* .m_edx already holds pth_cond_wait success value (0) */
 
@@ -1944,7 +1944,7 @@
          break;
    }
 
-   VG_TRACK( post_mutex_unlock, (ThreadId)mutex->__vg_m_owner, mutex );
+   VG_TRACK( post_mutex_unlock, (ThreadId)(ULong)mutex->__vg_m_owner, mutex );
 
    vg_assert(i <= VG_N_THREADS);
    if (i == VG_N_THREADS) {
@@ -1956,7 +1956,7 @@
          pthread_mutex_lock() call now returns with 0 (success). */
       /* The .count is already == 1. */
       vg_assert(VG_(threads)[i].associated_mx == mutex);
-      mutex->__vg_m_owner = (/*_pthread_descr*/void*)i;
+      mutex->__vg_m_owner = (/*_pthread_descr*/void*)(ULong)i;
       VG_(threads)[i].status        = VgTs_Runnable;
       VG_(threads)[i].associated_mx = NULL;
       /* m_edx already holds pth_mx_lock() success (0) */
@@ -2024,7 +2024,7 @@
    }
 
    if (mutex->__vg_m_count > 0) {
-      if (!VG_(is_valid_tid)((ThreadId)mutex->__vg_m_owner)) {
+      if (!VG_(is_valid_tid)((ThreadId)(ULong)mutex->__vg_m_owner)) {
          VG_(record_pthread_error)( tid, 
             "pthread_mutex_lock/trylock: mutex has invalid owner");
          SET_PTHREQ_RETVAL(tid, VKI_EINVAL);
@@ -2032,7 +2032,7 @@
       }	 
 
       /* Someone has it already. */
-      if ((ThreadId)mutex->__vg_m_owner == tid && ms_end == 0xFFFFFFFF) {
+      if ((ThreadId)(ULong)mutex->__vg_m_owner == tid && ms_end == 0xFFFFFFFF) {
          /* It's locked -- by me! */
          if (mutex->__vg_m_kind == PTHREAD_MUTEX_RECURSIVE_NP) {
             /* return 0 (success). */
@@ -2082,7 +2082,7 @@
 
       /* We get it! [for the first time]. */
       mutex->__vg_m_count = 1;
-      mutex->__vg_m_owner = (/*_pthread_descr*/void*)tid;
+      mutex->__vg_m_owner = (/*_pthread_descr*/void*)(ULong)tid;
 
       /* return 0 (success). */
       SET_PTHREQ_RETVAL(tid, 0);
@@ -2143,7 +2143,7 @@
       return;
    }
 
-   if ((ThreadId)mutex->__vg_m_owner != tid) {
+   if ((ThreadId)(ULong)mutex->__vg_m_owner != tid) {
       /* we don't hold it */
       VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is locked by a different thread");
@@ -2163,7 +2163,7 @@
    /* Now we're sure it is locked exactly once, and by the thread who
       is now doing an unlock on it.  */
    vg_assert(mutex->__vg_m_count == 1);
-   vg_assert((ThreadId)mutex->__vg_m_owner == tid);
+   vg_assert((ThreadId)(ULong)mutex->__vg_m_owner == tid);
 
    /* Release at max one thread waiting on this mutex. */
    release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
@@ -2209,7 +2209,7 @@
       SET_PTHREQ_RETVAL(tid, ETIMEDOUT);  /* pthread_cond_wait return value */
       VG_(threads)[tid].associated_cv = NULL;
       VG_(threads)[tid].associated_mx = NULL;
-      mx->__vg_m_owner = (/*_pthread_descr*/void*)tid;
+      mx->__vg_m_owner = (/*_pthread_descr*/void*)(ULong)tid;
       mx->__vg_m_count = 1;
 
       VG_TRACK( post_mutex_lock, tid, mx );
@@ -2278,7 +2278,7 @@
          VG_(threads)[i].status        = VgTs_Runnable;
          VG_(threads)[i].associated_cv = NULL;
          VG_(threads)[i].associated_mx = NULL;
-         mx->__vg_m_owner = (/*_pthread_descr*/void*)i;
+         mx->__vg_m_owner = (/*_pthread_descr*/void*)(ULong)i;
          mx->__vg_m_count = 1;
          /* .m_edx already holds pth_cond_wait success value (0) */
 
@@ -2375,7 +2375,7 @@
       return;
    }
 
-   if ((ThreadId)mutex->__vg_m_owner != tid /* we don't hold it */) {
+   if ((ThreadId)(ULong)mutex->__vg_m_owner != tid /* we don't hold it */) {
          VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is locked by another thread");
       SET_PTHREQ_RETVAL(tid, VKI_EPERM);
@@ -2575,7 +2575,7 @@
    }
 
    specifics_ptr = VG_(threads)[tid].specifics_ptr;
-   vg_assert(specifics_ptr == NULL || IS_ALIGNED4_ADDR(specifics_ptr));
+   vg_assert(specifics_ptr == NULL || IS_WORD_ALIGNED_ADDR(specifics_ptr));
 
    SET_PTHREQ_RETVAL(tid, (UWord)specifics_ptr);
 }
@@ -3334,8 +3334,8 @@
          vg_assert(cv == NULL);
          /* 1 */ vg_assert(mx != NULL);
 	 /* 2 */ vg_assert(mx->__vg_m_count > 0);
-         /* 3 */ vg_assert(VG_(is_valid_tid)((ThreadId)mx->__vg_m_owner));
-         /* 4 */ vg_assert((UInt)i != (ThreadId)mx->__vg_m_owner ||
+         /* 3 */ vg_assert(VG_(is_valid_tid)((ThreadId)(ULong)mx->__vg_m_owner));
+         /* 4 */ vg_assert((UInt)i != (ThreadId)(ULong)mx->__vg_m_owner ||
                            VG_(threads)[i].awaken_at != 0xFFFFFFFF); 
       } else 
       if (VG_(threads)[i].status == VgTs_WaitCV) {
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
index 29648c8..50469c6 100644
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -2506,6 +2506,7 @@
                        "soname:libpthread.so.0", redirects[i].to);
    }
 
+// XXX: what architectures is this necessary for?  x86 yes, PPC no, others ?
 #ifdef __x86__
    /* Redirect _dl_sysinfo_int80, which is glibc's default system call
       routine, to the routine in our trampoline page so that the
diff --git a/coregrind/vg_symtab2.h b/coregrind/vg_symtab2.h
index 09c75bb..7f442d6 100644
--- a/coregrind/vg_symtab2.h
+++ b/coregrind/vg_symtab2.h
@@ -94,7 +94,7 @@
 
    /* a value, depending on kind */
    union {
-      Int	offset;		/* offset on stack (-ve -> ebp; +ve -> esp) */
+      OffT	offset;		/* offset on stack (-ve -> ebp; +ve -> esp) */
       Int	regno;		/* register number */
       Addr	addr;		/* static or global address */
    } u;
@@ -129,7 +129,7 @@
    Addr   start;
    UInt   size;
    Char*  filename; /* in mallocville */
-   UInt   foffset;
+   OffT   foffset;
    Char*  soname;
 
    /* An expandable array of symbols. */
@@ -157,7 +157,7 @@
    /* offset    is what we need to add to symbol table entries
       to get the real location of that symbol in memory.
    */
-   UInt   offset;
+   OffT   offset;
 
    /* Bounds of data, BSS, PLT and GOT, so that tools can see what
       section an address is in */
diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c
index 2e847db..b4f80f6 100644
--- a/coregrind/vg_syscalls.c
+++ b/coregrind/vg_syscalls.c
@@ -400,7 +400,7 @@
 /* Note the fact that a file descriptor was just closed. */
 
 static
-void record_fd_close(Int tid, Int fd)
+void record_fd_close(ThreadId tid, Int fd)
 {
    OpenFd *i = allocated_fds;
 
@@ -432,7 +432,7 @@
    already open, then we're probably doing a dup2() to an existing fd,
    so just overwrite the existing one. */
 
-void VG_(record_fd_open)(Int tid, Int fd, char *pathname)
+void VG_(record_fd_open)(ThreadId tid, Int fd, char *pathname)
 {
    OpenFd *i;
 
@@ -647,33 +647,6 @@
 }
 
 static
-UInt get_shm_size ( Int shmid )
-{
-   struct vki_shmid_ds buf;
-   long __res = VG_(do_syscall)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid, VKI_IPC_STAT, 0, &buf);
-    if ( VG_(is_kerror) ( __res ) )
-       return 0;
- 
-   return buf.shm_segsz;
-}
-
-static
-UInt get_sem_count( Int semid )
-{
-  struct vki_semid_ds buf;
-  union vki_semun arg;
-  long res;
-
-  arg.buf = &buf;
-  
-  res = VG_(do_syscall)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0, VKI_IPC_STAT, &arg);
-  if ( VG_(is_kerror)(res) )
-    return 0;
-
-  return buf.sem_nsems;
-}
- 
-static
 Char *strdupcat ( const Char *s1, const Char *s2, ArenaId aid )
 {
    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
@@ -746,7 +719,7 @@
                      (Addr)msg->msg_control, msg->msg_controllen );
 }
 
-void check_cmsg_for_fds(Int tid, struct vki_msghdr *msg)
+void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
 {
    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
 
@@ -1862,7 +1835,7 @@
    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
 }
 
-void common_pre_fcntl(Bool is64, ThreadId tid, ThreadState* tst)
+PRE(sys_fcntl, 0)
 {
    switch (ARG2) {
    // These ones ignore ARG3.
@@ -1873,13 +1846,8 @@
    case VKI_F_GETSIG:
    case VKI_F_SETSIG:
    case VKI_F_GETLEASE:
-      if (is64) {
-         PRINT("sys_fcntl64 ( %d, %d )", ARG1,ARG2);
-         PRE_REG_READ2(long, "fcntl64", unsigned int, fd, unsigned int, cmd);
-      } else {
-         PRINT("sys_fcntl ( %d, %d )", ARG1,ARG2);
-         PRE_REG_READ2(long, "fcntl", unsigned int, fd, unsigned int, cmd);
-      }
+      PRINT("sys_fcntl ( %d, %d )", ARG1,ARG2);
+      PRE_REG_READ2(long, "fcntl", unsigned int, fd, unsigned int, cmd);
       break;
 
    // These ones use ARG3 as "arg".
@@ -1888,42 +1856,27 @@
    case VKI_F_SETFL:
    case VKI_F_SETLEASE:
    case VKI_F_NOTIFY:
-      if (is64) {
-         PRINT("sys_fcntl64[ARG3=='arg'] ( %d, %d, %d )", ARG1,ARG2,ARG3);
-         PRE_REG_READ3(long, "fcntl64",
-                       unsigned int, fd, unsigned int, cmd, unsigned long, arg);
-      } else {
-         PRINT("sys_fcntl[ARG3=='arg'] ( %d, %d, %d )", ARG1,ARG2,ARG3);
-         PRE_REG_READ3(long, "fcntl",
-                       unsigned int, fd, unsigned int, cmd, unsigned long, arg);
-      }
+      PRINT("sys_fcntl[ARG3=='arg'] ( %d, %d, %d )", ARG1,ARG2,ARG3);
+      PRE_REG_READ3(long, "fcntl",
+                    unsigned int, fd, unsigned int, cmd, unsigned long, arg);
       break;
 
    // These ones use ARG3 as "lock".
    case VKI_F_GETLK:
    case VKI_F_SETLK:
    case VKI_F_SETLKW:
+#ifndef __amd64__
    case VKI_F_GETLK64:
    case VKI_F_SETLK64:
    case VKI_F_SETLKW64:
-      if (is64) {
-         PRINT("sys_fcntl64[ARG3=='lock'] ( %d, %d, %p )", ARG1,ARG2,ARG3);
-         PRE_REG_READ3(long, "fcntl64",
-                       unsigned int, fd, unsigned int, cmd,
-                       struct flock64 *, lock);
-      } else {
-         PRINT("sys_fcntl[ARG3=='lock'] ( %d, %d, %p )", ARG1,ARG2,ARG3);
-         PRE_REG_READ3(long, "fcntl",
-                       unsigned int, fd, unsigned int, cmd,
-                       struct flock64 *, lock);
-      }
+#else
+#endif
+      PRINT("sys_fcntl[ARG3=='lock'] ( %d, %d, %p )", ARG1,ARG2,ARG3);
+      PRE_REG_READ3(long, "fcntl",
+                    unsigned int, fd, unsigned int, cmd,
+                    struct flock64 *, lock);
       break;
    }
-}
-
-PRE(sys_fcntl, 0)
-{
-   common_pre_fcntl(/*is64*/False, tid, tst);
 
    if (ARG2 == VKI_F_SETLKW)
       tst->sys_flags |= MayBlock;
@@ -1945,10 +1898,53 @@
 // XXX: wrapper only suitable for 32-bit systems
 PRE(sys_fcntl64, 0)
 {
-   common_pre_fcntl(/*is64*/True, tid, tst);
+   switch (ARG2) {
+   // These ones ignore ARG3.
+   case VKI_F_GETFD:
+   case VKI_F_GETFL:
+   case VKI_F_GETOWN:
+   case VKI_F_SETOWN:
+   case VKI_F_GETSIG:
+   case VKI_F_SETSIG:
+   case VKI_F_GETLEASE:
+      PRINT("sys_fcntl64 ( %d, %d )", ARG1,ARG2);
+      PRE_REG_READ2(long, "fcntl64", unsigned int, fd, unsigned int, cmd);
+      break;
+
+   // These ones use ARG3 as "arg".
+   case VKI_F_DUPFD:
+   case VKI_F_SETFD:
+   case VKI_F_SETFL:
+   case VKI_F_SETLEASE:
+   case VKI_F_NOTIFY:
+      PRINT("sys_fcntl64[ARG3=='arg'] ( %d, %d, %d )", ARG1,ARG2,ARG3);
+      PRE_REG_READ3(long, "fcntl64",
+                    unsigned int, fd, unsigned int, cmd, unsigned long, arg);
+      break;
+
+   // These ones use ARG3 as "lock".
+   case VKI_F_GETLK:
+   case VKI_F_SETLK:
+   case VKI_F_SETLKW:
+#ifndef __amd64__
+   case VKI_F_GETLK64:
+   case VKI_F_SETLK64:
+   case VKI_F_SETLKW64:
+#endif
+      PRINT("sys_fcntl64[ARG3=='lock'] ( %d, %d, %p )", ARG1,ARG2,ARG3);
+      PRE_REG_READ3(long, "fcntl64",
+                    unsigned int, fd, unsigned int, cmd,
+                    struct flock64 *, lock);
+      break;
+   }
    
+#ifndef __amd64__
    if (ARG2 == VKI_F_SETLKW || ARG2 == VKI_F_SETLKW64)
       tst->sys_flags |= MayBlock;
+#else
+   if (ARG2 == VKI_F_SETLKW)
+      tst->sys_flags |= MayBlock;
+#endif
 }
 
 POST(sys_fcntl64)
@@ -2291,450 +2287,6 @@
    PRE_REG_READ0(long, "getuid");
 }
 
-// XXX: x86-specific
-// XXX: should use the constants here (eg. SHMAT), not the numbers directly!
-PRE(sys_ipc, 0)
-{
-   PRINT("sys_ipc ( %d, %d, %d, %d, %p, %d )", ARG1,ARG2,ARG3,ARG4,ARG5,ARG6);
-   // XXX: this is simplistic -- some args are not used in all circumstances.
-   PRE_REG_READ6(int, "ipc",
-                 vki_uint, call, int, first, int, second, int, third,
-                 void *, ptr, long, fifth)
-
-   switch (ARG1 /* call */) {
-   case 1: /* IPCOP_semop */
-      PRE_MEM_READ( "semop(sops)", ARG5, ARG3 * sizeof(struct vki_sembuf) );
-      tst->sys_flags |= MayBlock;
-      break;
-   case 2: /* IPCOP_semget */
-      break;
-   case 3: /* IPCOP_semctl */
-   {
-      union vki_semun *arg = (union vki_semun *)ARG5;
-      switch (ARG4 /* cmd */) {
-      case VKI_IPC_INFO:
-      case VKI_SEM_INFO:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->__buf, "semctl(IPC_INFO, arg)" );
-	 PRE_MEM_WRITE( "semctl(IPC_INFO, arg->buf)", buf, 
-			sizeof(struct vki_seminfo) );
-	 break;
-      }
-      case VKI_IPC_STAT:
-      case VKI_SEM_STAT:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(IPC_STAT, arg)" );
-	 PRE_MEM_WRITE( "semctl(IPC_STAT, arg->buf)", buf, 
-			sizeof(struct vki_semid_ds) );
-	 break;
-      }
-      case VKI_IPC_SET:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(IPC_SET, arg)" );
-	 PRE_MEM_READ( "semctl(IPC_SET, arg->buf)", buf, 
-			sizeof(struct vki_semid_ds) );
-	 break;
-      }
-      case VKI_GETALL:
-      {
-         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(IPC_GETALL, arg)" );
-         UInt nsems = get_sem_count( ARG2 );
-	 PRE_MEM_WRITE( "semctl(IPC_GETALL, arg->array)", array, 
-			sizeof(short) * nsems );
-	 break;
-      }
-      case VKI_SETALL:
-      {
-         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(IPC_SETALL, arg)" );
-         UInt nsems = get_sem_count( ARG2 );
-	 PRE_MEM_READ( "semctl(IPC_SETALL, arg->array)", array, 
-			sizeof(short) * nsems );
-	 break;
-      }
-      case VKI_SETVAL:
-      {
-	 PRE_MEM_READ( "semctl(IPC_SETVAL, arg->array)",
-                        (Addr)&arg->val, sizeof(arg->val) );
-	 break;
-      }
-      case VKI_IPC_INFO|VKI_IPC_64:
-      case VKI_SEM_INFO|VKI_IPC_64:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->__buf, "semctl(IPC_INFO, arg)" );
-	 PRE_MEM_WRITE( "semctl(IPC_INFO, arg->buf)", buf, 
-			sizeof(struct vki_seminfo) );
-	 break;
-      }
-      case VKI_IPC_STAT|VKI_IPC_64:
-      case VKI_SEM_STAT|VKI_IPC_64:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(IPC_STAT, arg)" );
-	 PRE_MEM_WRITE( "semctl(IPC_STAT, arg->buf)", buf, 
-			sizeof(struct vki_semid64_ds) );
-	 break;
-      }
-      case VKI_IPC_SET|VKI_IPC_64:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(IPC_SET, arg)" );
-	 PRE_MEM_READ( "semctl(IPC_SET, arg->buf)", buf, 
-			sizeof(struct vki_semid64_ds) );
-	 break;
-      }
-      case VKI_GETALL|VKI_IPC_64:
-      {
-         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(IPC_GETALL, arg)" );
-         UInt nsems = get_sem_count( ARG2 );
-	 PRE_MEM_WRITE( "semctl(IPC_GETALL, arg->array)", array, 
-			sizeof(short) * nsems );
-	 break;
-      }
-      case VKI_SETALL|VKI_IPC_64:
-      {
-         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(IPC_SETALL, arg)" );
-         UInt nsems = get_sem_count( ARG2 );
-	 PRE_MEM_READ( "semctl(IPC_SETALL, arg->array)", array, 
-			sizeof(short) * nsems );
-	 break;
-      }
-      case VKI_SETVAL|VKI_IPC_64:
-      {
-	 PRE_MEM_READ( "semctl(IPC_SETVAL, arg->array)",
-                        (Addr)&arg->val, sizeof(arg->val) );
-	 break;
-      }
-      default:
-	 break;
-      }
-      break;
-   }
-   case 4: /* IPCOP_semtimedop */
-      PRE_MEM_READ( "semtimedop(sops)", ARG5, 
-		     ARG3 * sizeof(struct vki_sembuf) );
-      if (ARG6 != 0)
-         PRE_MEM_READ( "semtimedop(timeout)", ARG6, 
-                        sizeof(struct vki_timespec) );
-      tst->sys_flags |= MayBlock;
-      break;
-   case 11: /* IPCOP_msgsnd */
-   {
-      struct vki_msgbuf *msgp = (struct vki_msgbuf *)ARG5;
-      Int msgsz = ARG3;
-
-      PRE_MEM_READ( "msgsnd(msgp->mtype)", 
-		     (Addr)&msgp->mtype, sizeof(msgp->mtype) );
-      PRE_MEM_READ( "msgsnd(msgp->mtext)", 
-		     (Addr)msgp->mtext, msgsz );
-
-      if ((ARG4 & VKI_IPC_NOWAIT) == 0)
-         tst->sys_flags |= MayBlock;
-      break;
-   }
-   case 12: /* IPCOP_msgrcv */
-   {
-      struct vki_msgbuf *msgp;
-      Int msgsz = ARG3;
- 
-      msgp = (struct vki_msgbuf *)deref_Addr( tid,
-					  (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgp),
-					  "msgrcv(msgp)" );
-
-      PRE_MEM_WRITE( "msgrcv(msgp->mtype)", 
-		     (Addr)&msgp->mtype, sizeof(msgp->mtype) );
-      PRE_MEM_WRITE( "msgrcv(msgp->mtext)", 
-		     (Addr)msgp->mtext, msgsz );
-
-      if ((ARG4 & VKI_IPC_NOWAIT) == 0)
-         tst->sys_flags |= MayBlock;
-      break;
-   }
-   case 13: /* IPCOP_msgget */
-      break;
-   case 14: /* IPCOP_msgctl */
-   {
-      switch (ARG3 /* cmd */) {
-      case VKI_IPC_INFO:
-      case VKI_MSG_INFO:
-	 PRE_MEM_WRITE( "msgctl(IPC_INFO, buf)", ARG5, 
-			sizeof(struct vki_msginfo) );
-	 break;
-      case VKI_IPC_STAT:
-      case VKI_MSG_STAT:
-	 PRE_MEM_WRITE( "msgctl(IPC_STAT, buf)", ARG5, 
-			sizeof(struct vki_msqid_ds) );
-	 break;
-      case VKI_IPC_SET:
-	 PRE_MEM_READ( "msgctl(IPC_SET, buf)", ARG5, 
-			sizeof(struct vki_msqid_ds) );
-	 break;
-      case VKI_IPC_INFO|VKI_IPC_64:
-      case VKI_MSG_INFO|VKI_IPC_64:
-	 PRE_MEM_WRITE( "msgctl(IPC_INFO, buf)", ARG5, 
-			sizeof(struct vki_msginfo) );
-	 break;
-      case VKI_IPC_STAT|VKI_IPC_64:
-      case VKI_MSG_STAT|VKI_IPC_64:
-	 PRE_MEM_WRITE( "msgctl(IPC_STAT, buf)", ARG5, 
-			sizeof(struct vki_msqid64_ds) );
-	 break;
-      case VKI_IPC_SET|VKI_IPC_64:
-	 PRE_MEM_READ( "msgctl(IPC_SET, buf)", ARG5, 
-			sizeof(struct vki_msqid64_ds) );
-	 break;
-      default:
-	 break;
-      }
-      break;
-   }
-   case 21: /* IPCOP_shmat */
-   {
-      UInt shmid = ARG2;
-      UInt segmentSize = get_shm_size ( shmid );
-      
-      /* If they didn't ask for a particular address, then place it
-	 like an mmap. */
-      if (ARG5 == 0)
-	 ARG5 = VG_(find_map_space)(0, segmentSize, True);
-      else if (!VG_(valid_client_addr)(ARG5, segmentSize, tid, "shmat"))
-	 SET_RESULT( -VKI_EINVAL );
-      break;
-   }
-   case 22: /* IPCOP_shmdt */
-      if (!VG_(valid_client_addr)(ARG5, 1, tid, "shmdt"))
-	 SET_RESULT( -VKI_EINVAL );
-      break;
-   case 23: /* IPCOP_shmget */
-      break;
-   case 24: /* IPCOP_shmctl */
-   {
-      switch (ARG3 /* cmd */) {
-      case VKI_IPC_INFO:
-	 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)", ARG5, 
-			sizeof(struct vki_shminfo) );
-	 break;
-      case VKI_SHM_INFO:
-	 PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)", ARG5, 
-			sizeof(struct vki_shm_info) );
-	 break;
-      case VKI_IPC_STAT:
-      case VKI_SHM_STAT:
-	 PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)", ARG5, 
-			sizeof(struct vki_shmid_ds) );
-	 break;
-      case VKI_IPC_SET:
-	 PRE_MEM_READ( "shmctl(IPC_SET, buf)", ARG5, 
-			sizeof(struct vki_shmid_ds) );
-	 break;
-      case VKI_IPC_INFO|VKI_IPC_64:
-	 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)", ARG5, 
-			sizeof(struct vki_shminfo64) );
-	 break;
-      case VKI_SHM_INFO|VKI_IPC_64:
-	 PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)", ARG5, 
-			sizeof(struct vki_shm_info) );
-	 break;
-      case VKI_IPC_STAT|VKI_IPC_64:
-      case VKI_SHM_STAT|VKI_IPC_64:
-	 PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)", ARG5, 
-			sizeof(struct vki_shmid64_ds) );
-	 break;
-      case VKI_IPC_SET|VKI_IPC_64:
-	 PRE_MEM_READ( "shmctl(IPC_SET, buf)", ARG5, 
-			sizeof(struct vki_shmid_ds) );
-	 break;
-      default:
-	 break;
-      }
-      break;
-   }
-   default:
-      VG_(message)(Vg_DebugMsg,
-		   "FATAL: unhandled syscall(ipc) %d",
-		   ARG1 );
-      VG_(core_panic)("... bye!\n");
-      break; /*NOTREACHED*/
-   }   
-}
-
-POST(sys_ipc)
-{
-   switch (ARG1 /* call */) {
-   case 1: /* IPCOP_semop */
-   case 2: /* IPCOP_semget */
-      break;
-   case 3: /* IPCOP_semctl */
-   {
-      union vki_semun *arg = (union vki_semun *)ARG5;
-      switch (ARG4 /* cmd */) {
-      case VKI_IPC_INFO:
-      case VKI_SEM_INFO:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->__buf, "semctl(arg)" );
-	 POST_MEM_WRITE( buf, sizeof(struct vki_seminfo) );
-	 break;
-      }
-      case VKI_IPC_STAT:
-      case VKI_SEM_STAT:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(arg)" );
-	 POST_MEM_WRITE( buf, sizeof(struct vki_semid_ds) );
-	 break;
-      }
-      case VKI_GETALL:
-      {
-         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(arg)" );
-         UInt nsems = get_sem_count( ARG2 );
-	 POST_MEM_WRITE( array, sizeof(short) * nsems );
-	 break;
-      }
-      case VKI_IPC_INFO|VKI_IPC_64:
-      case VKI_SEM_INFO|VKI_IPC_64:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->__buf, "semctl(arg)" );
-	 POST_MEM_WRITE( buf, sizeof(struct vki_seminfo) );
-	 break;
-      }
-      case VKI_IPC_STAT|VKI_IPC_64:
-      case VKI_SEM_STAT|VKI_IPC_64:
-      {
-         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(arg)" );
-	 POST_MEM_WRITE( buf, sizeof(struct vki_semid64_ds) );
-	 break;
-      }
-      case VKI_GETALL|VKI_IPC_64:
-      {
-         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(arg)" );
-         UInt nsems = get_sem_count( ARG2 );
-	 POST_MEM_WRITE( array, sizeof(short) * nsems );
-	 break;
-      }
-      default:
-	 break;
-      }
-      break;
-   }
-   case 4: /* IPCOP_semtimedop */
-      break;
-   case 11: /* IPCOP_msgsnd */
-      break;
-   case 12: /* IPCOP_msgrcv */
-   {
-      struct vki_msgbuf *msgp;
- 
-      msgp = (struct vki_msgbuf *)deref_Addr( tid,
-					  (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgp),
-					  "msgrcv(msgp)" );
-      if ( RES > 0 ) {
-	 POST_MEM_WRITE( (Addr)&msgp->mtype, sizeof(msgp->mtype) );
-	 POST_MEM_WRITE( (Addr)msgp->mtext, RES );
-      }
-      break;
-   }
-   case 13: /* IPCOP_msgget */
-      break;
-   case 14: /* IPCOP_msgctl */
-   {
-      switch (ARG3 /* cmd */) {
-      case VKI_IPC_INFO:
-      case VKI_MSG_INFO:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_msginfo) );
-	 break;
-      case VKI_IPC_STAT:
-      case VKI_MSG_STAT:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_msqid_ds) );
-	 break;
-      case VKI_IPC_SET:
-	 break;
-      case VKI_IPC_INFO|VKI_IPC_64:
-      case VKI_MSG_INFO|VKI_IPC_64:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_msginfo) );
-	 break;
-      case VKI_IPC_STAT|VKI_IPC_64:
-      case VKI_MSG_STAT|VKI_IPC_64:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_msqid64_ds) );
-	 break;
-      case VKI_IPC_SET|VKI_IPC_64:
-	 break;
-      default:
-	 break;
-      }
-      break;
-   }
-   case 21: /* IPCOP_shmat */
-   {
-      Int shmid = ARG2;
-      Int shmflag = ARG3;
-      Addr addr;
-
-      /* force readability. before the syscall it is
-       * indeed uninitialized, as can be seen in
-       * glibc/sysdeps/unix/sysv/linux/shmat.c */
-      POST_MEM_WRITE( ARG4, sizeof( ULong ) );
-
-      addr = deref_Addr ( tid, ARG4, "shmat(addr)" );
-      if ( addr > 0 ) { 
-	 UInt segmentSize = get_shm_size ( shmid );
-	 if ( segmentSize > 0 ) {
-	    UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
-	    /* we don't distinguish whether it's read-only or
-	     * read-write -- it doesn't matter really. */
-	    VG_TRACK( new_mem_mmap, addr, segmentSize, True, True, False );
-
-	    if (!(shmflag & 010000)) /* = SHM_RDONLY */
-	       prot &= ~VKI_PROT_WRITE;
-	    VG_(map_segment)(addr, segmentSize, prot, SF_SHARED|SF_SHM);
-	 }
-      }
-      break;
-   }
-   case 22: /* IPCOP_shmdt */
-   {
-      Segment *s = VG_(find_segment)(ARG5);
-
-      if (s != NULL && (s->flags & SF_SHM) && VG_(seg_contains)(s, ARG5, 1)) {
-	 VG_TRACK( die_mem_munmap, s->addr, s->len );
-	 VG_(unmap_range)(s->addr, s->len);
-      }
-      break;
-   }
-   case 23: /* IPCOP_shmget */
-      break;
-   case 24: /* IPCOP_shmctl */
-   {
-      switch (ARG3 /* cmd */) {
-      case VKI_IPC_INFO:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shminfo) );
-	 break;
-      case VKI_SHM_INFO:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shm_info) );
-	 break;
-      case VKI_IPC_STAT:
-      case VKI_SHM_STAT:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shmid_ds) );
-	 break;
-      case VKI_IPC_INFO|VKI_IPC_64:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shminfo64) );
-	 break;
-      case VKI_SHM_INFO|VKI_IPC_64:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shm_info) );
-	 break;
-      case VKI_IPC_STAT|VKI_IPC_64:
-      case VKI_SHM_STAT|VKI_IPC_64:
-	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shmid64_ds) );
-	 break;
-      default:
-	 break;
-      }
-      break;
-   }
-   default:
-      VG_(message)(Vg_DebugMsg,
-		   "FATAL: unhandled syscall(ipc) %d",
-		   ARG1 );
-      VG_(core_panic)("... bye!\n");
-      break; /*NOTREACHED*/
-   }
-}
-
 // XXX: I reckon some of these cases must be x86-specific
 PRE(sys_ioctl, MayBlock)
 {
@@ -4062,6 +3614,11 @@
    }
 }
 
+// XXX: this syscall is generic, but not necessarily applicable to every
+// architecture -- I think only to 32-bit archs.  We're going to need
+// something like linux/core_os32.h for such things, eventually, I think.
+// --njn
+#ifndef __amd64__
 PRE(sys_lstat64, 0)
 {
    PRINT("sys_lstat64 ( %p(%s), %p )",ARG1,ARG1,ARG2);
@@ -4076,6 +3633,7 @@
       POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
    }
 }
+#endif
 
 PRE(sys_mkdir, MayBlock)
 {
@@ -5011,6 +4569,8 @@
    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
 }
 
+// See comment above PRE(sys_lstat64) for an explanation of this #ifdef.
+#ifndef __amd64__
 PRE(sys_stat64, 0)
 {
    PRINT("sys_stat64 ( %p, %p )",ARG1,ARG2);
@@ -5035,6 +4595,7 @@
 {
    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
 }
+#endif
 
 PRE(sys_time, 0)
 {
@@ -5315,28 +4876,6 @@
 }
 
 // XXX: x86-specific
-PRE(sys_sigaction, SIG_SIM)
-{
-   PRINT("sys_sigaction ( %d, %p, %p )", ARG1,ARG2,ARG3);
-   PRE_REG_READ3(int, "sigaction",
-                 int, signum, const struct old_sigaction *, act,
-                 struct old_sigaction *, oldact)
-   if (ARG2 != 0)
-      PRE_MEM_READ( "sigaction(act)", ARG2, sizeof(struct vki_old_sigaction));
-   if (ARG3 != 0)
-      PRE_MEM_WRITE( "sigaction(oldact)", ARG3, sizeof(struct vki_old_sigaction));
-
-   if (SIGNAL_SIMULATION)
-      VG_(do_sys_sigaction)(tid);
-}
-
-POST(sys_sigaction)
-{
-   if (RES == 0 && ARG3 != 0)
-      POST_MEM_WRITE( ARG3, sizeof(struct vki_old_sigaction));
-}
-
-// XXX: x86-specific
 PRE(sys_rt_sigaction, SIG_SIM)
 {
    PRINT("sys_rt_sigaction ( %d, %p, %p, %d )", ARG1,ARG2,ARG3,ARG4);
@@ -5360,6 +4899,10 @@
       POST_MEM_WRITE( ARG3, sizeof(struct vki_sigaction));
 }
 
+// XXX: This syscall is not used on amd64 -- it only provides
+//      sys_rt_sigprocmask, which uses sigset_t rather than old_sigset_t.
+// This wrapper is only suitable for 32-bit architectures.
+#ifndef __amd64__
 PRE(sys_sigprocmask, SIG_SIM)
 {
    PRINT("sys_sigprocmask ( %d, %p, %p )",ARG1,ARG2,ARG3);
@@ -5396,6 +4939,7 @@
    if (RES == 0 && ARG3 != 0)
       POST_MEM_WRITE( ARG3, sizeof(vki_old_sigset_t));
 }
+#endif
 
 PRE(sys_rt_sigprocmask, SIG_SIM)
 {
diff --git a/coregrind/x86-linux/core_platform.h b/coregrind/x86-linux/core_platform.h
index 894a416..398d31b 100644
--- a/coregrind/x86-linux/core_platform.h
+++ b/coregrind/x86-linux/core_platform.h
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- x86-Linux-specific stuff for the core.                       ---*/
+/*--- Platform-specific stuff for the core.                        ---*/
 /*---                                    x86-linux/core_platform.h ---*/
 /*--------------------------------------------------------------------*/
 
@@ -103,7 +103,7 @@
    __args[5] = (offset);                                                \
                                                                         \
    ret = VG_(do_syscall)(__NR_mmap, (UWord)(&(__args[0])) );            \
-}
+} while (0)
 
 #define PLATFORM_GET_MMAP_ARGS(tst, a1, a2, a3, a4, a5, a6) do {\
    UInt *arg_block = (UInt*)SYSCALL_ARG1(tst->arch);            \
diff --git a/coregrind/x86-linux/syscall.S b/coregrind/x86-linux/syscall.S
index ec862fb..c8f9c6e 100644
--- a/coregrind/x86-linux/syscall.S
+++ b/coregrind/x86-linux/syscall.S
@@ -31,8 +31,6 @@
 #include "core_asm.h"
 #include "vki_unistd.h"
 
-.globl	VG_(do_syscall)
-
 /*
 	Perform a Linux syscall with int 0x80
 	
@@ -43,6 +41,7 @@
 	that the syscall mechanism makes no useful changes to any
 	register except %eax, which is returned.
  */
+.globl	VG_(do_syscall)
 VG_(do_syscall):
 	push	%esi
 	push	%edi
diff --git a/coregrind/x86-linux/syscalls.c b/coregrind/x86-linux/syscalls.c
index 2f0ac0b..21fd2c2 100644
--- a/coregrind/x86-linux/syscalls.c
+++ b/coregrind/x86-linux/syscalls.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- x86/Linux-specific syscalls, etc.       x86-linux/syscalls.c ---*/
+/*--- Platform-specific syscalls stuff.       x86-linux/syscalls.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
@@ -286,6 +286,494 @@
    }
 }
 
+static
+UInt get_shm_size ( Int shmid )
+{
+   struct vki_shmid_ds buf;
+   long __res = VG_(do_syscall)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid, VKI_IPC_STAT, 0, &buf);
+    if ( VG_(is_kerror) ( __res ) )
+       return 0;
+ 
+   return buf.shm_segsz;
+}
+
+static
+UInt get_sem_count( Int semid )
+{
+  struct vki_semid_ds buf;
+  union vki_semun arg;
+  long res;
+
+  arg.buf = &buf;
+  
+  res = VG_(do_syscall)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0, VKI_IPC_STAT, &arg);
+  if ( VG_(is_kerror)(res) )
+    return 0;
+
+  return buf.sem_nsems;
+}
+ 
+// XXX: should use the constants here (eg. SHMAT), not the numbers directly!
+PRE(sys_ipc, 0)
+{
+   PRINT("sys_ipc ( %d, %d, %d, %d, %p, %d )", ARG1,ARG2,ARG3,ARG4,ARG5,ARG6);
+   // XXX: this is simplistic -- some args are not used in all circumstances.
+   PRE_REG_READ6(int, "ipc",
+                 vki_uint, call, int, first, int, second, int, third,
+                 void *, ptr, long, fifth)
+
+   switch (ARG1 /* call */) {
+   case VKI_SEMOP:
+      PRE_MEM_READ( "semop(sops)", ARG5, ARG3 * sizeof(struct vki_sembuf) );
+      tst->sys_flags |= MayBlock;
+      break;
+   case VKI_SEMGET:
+      break;
+   case VKI_SEMCTL:
+   {
+      union vki_semun *arg = (union vki_semun *)ARG5;
+      switch (ARG4 /* cmd */) {
+      case VKI_IPC_INFO:
+      case VKI_SEM_INFO:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->__buf, "semctl(IPC_INFO, arg)" );
+	 PRE_MEM_WRITE( "semctl(IPC_INFO, arg->buf)", buf, 
+			sizeof(struct vki_seminfo) );
+	 break;
+      }
+      case VKI_IPC_STAT:
+      case VKI_SEM_STAT:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(IPC_STAT, arg)" );
+	 PRE_MEM_WRITE( "semctl(IPC_STAT, arg->buf)", buf, 
+			sizeof(struct vki_semid_ds) );
+	 break;
+      }
+      case VKI_IPC_SET:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(IPC_SET, arg)" );
+	 PRE_MEM_READ( "semctl(IPC_SET, arg->buf)", buf, 
+			sizeof(struct vki_semid_ds) );
+	 break;
+      }
+      case VKI_GETALL:
+      {
+         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(IPC_GETALL, arg)" );
+         UInt nsems = get_sem_count( ARG2 );
+	 PRE_MEM_WRITE( "semctl(IPC_GETALL, arg->array)", array, 
+			sizeof(short) * nsems );
+	 break;
+      }
+      case VKI_SETALL:
+      {
+         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(IPC_SETALL, arg)" );
+         UInt nsems = get_sem_count( ARG2 );
+	 PRE_MEM_READ( "semctl(IPC_SETALL, arg->array)", array, 
+			sizeof(short) * nsems );
+	 break;
+      }
+      case VKI_SETVAL:
+      {
+	 PRE_MEM_READ( "semctl(IPC_SETVAL, arg->array)",
+                        (Addr)&arg->val, sizeof(arg->val) );
+	 break;
+      }
+      case VKI_IPC_INFO|VKI_IPC_64:
+      case VKI_SEM_INFO|VKI_IPC_64:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->__buf, "semctl(IPC_INFO, arg)" );
+	 PRE_MEM_WRITE( "semctl(IPC_INFO, arg->buf)", buf, 
+			sizeof(struct vki_seminfo) );
+	 break;
+      }
+      case VKI_IPC_STAT|VKI_IPC_64:
+      case VKI_SEM_STAT|VKI_IPC_64:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(IPC_STAT, arg)" );
+	 PRE_MEM_WRITE( "semctl(IPC_STAT, arg->buf)", buf, 
+			sizeof(struct vki_semid64_ds) );
+	 break;
+      }
+      case VKI_IPC_SET|VKI_IPC_64:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(IPC_SET, arg)" );
+	 PRE_MEM_READ( "semctl(IPC_SET, arg->buf)", buf, 
+			sizeof(struct vki_semid64_ds) );
+	 break;
+      }
+      case VKI_GETALL|VKI_IPC_64:
+      {
+         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(IPC_GETALL, arg)" );
+         UInt nsems = get_sem_count( ARG2 );
+	 PRE_MEM_WRITE( "semctl(IPC_GETALL, arg->array)", array, 
+			sizeof(short) * nsems );
+	 break;
+      }
+      case VKI_SETALL|VKI_IPC_64:
+      {
+         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(IPC_SETALL, arg)" );
+         UInt nsems = get_sem_count( ARG2 );
+	 PRE_MEM_READ( "semctl(IPC_SETALL, arg->array)", array, 
+			sizeof(short) * nsems );
+	 break;
+      }
+      case VKI_SETVAL|VKI_IPC_64:
+      {
+	 PRE_MEM_READ( "semctl(IPC_SETVAL, arg->array)",
+                        (Addr)&arg->val, sizeof(arg->val) );
+	 break;
+      }
+      default:
+	 break;
+      }
+      break;
+   }
+   case VKI_SEMTIMEDOP:
+      PRE_MEM_READ( "semtimedop(sops)", ARG5, 
+		     ARG3 * sizeof(struct vki_sembuf) );
+      if (ARG6 != 0)
+         PRE_MEM_READ( "semtimedop(timeout)", ARG6, 
+                        sizeof(struct vki_timespec) );
+      tst->sys_flags |= MayBlock;
+      break;
+   case VKI_MSGSND:
+   {
+      struct vki_msgbuf *msgp = (struct vki_msgbuf *)ARG5;
+      Int msgsz = ARG3;
+
+      PRE_MEM_READ( "msgsnd(msgp->mtype)", 
+		     (Addr)&msgp->mtype, sizeof(msgp->mtype) );
+      PRE_MEM_READ( "msgsnd(msgp->mtext)", 
+		     (Addr)msgp->mtext, msgsz );
+
+      if ((ARG4 & VKI_IPC_NOWAIT) == 0)
+         tst->sys_flags |= MayBlock;
+      break;
+   }
+   case VKI_MSGRCV:
+   {
+      struct vki_msgbuf *msgp;
+      Int msgsz = ARG3;
+ 
+      msgp = (struct vki_msgbuf *)deref_Addr( tid,
+					  (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgp),
+					  "msgrcv(msgp)" );
+
+      PRE_MEM_WRITE( "msgrcv(msgp->mtype)", 
+		     (Addr)&msgp->mtype, sizeof(msgp->mtype) );
+      PRE_MEM_WRITE( "msgrcv(msgp->mtext)", 
+		     (Addr)msgp->mtext, msgsz );
+
+      if ((ARG4 & VKI_IPC_NOWAIT) == 0)
+         tst->sys_flags |= MayBlock;
+      break;
+   }
+   case VKI_MSGGET:
+      break;
+   case VKI_MSGCTL:
+   {
+      switch (ARG3 /* cmd */) {
+      case VKI_IPC_INFO:
+      case VKI_MSG_INFO:
+	 PRE_MEM_WRITE( "msgctl(IPC_INFO, buf)", ARG5, 
+			sizeof(struct vki_msginfo) );
+	 break;
+      case VKI_IPC_STAT:
+      case VKI_MSG_STAT:
+	 PRE_MEM_WRITE( "msgctl(IPC_STAT, buf)", ARG5, 
+			sizeof(struct vki_msqid_ds) );
+	 break;
+      case VKI_IPC_SET:
+	 PRE_MEM_READ( "msgctl(IPC_SET, buf)", ARG5, 
+			sizeof(struct vki_msqid_ds) );
+	 break;
+      case VKI_IPC_INFO|VKI_IPC_64:
+      case VKI_MSG_INFO|VKI_IPC_64:
+	 PRE_MEM_WRITE( "msgctl(IPC_INFO, buf)", ARG5, 
+			sizeof(struct vki_msginfo) );
+	 break;
+      case VKI_IPC_STAT|VKI_IPC_64:
+      case VKI_MSG_STAT|VKI_IPC_64:
+	 PRE_MEM_WRITE( "msgctl(IPC_STAT, buf)", ARG5, 
+			sizeof(struct vki_msqid64_ds) );
+	 break;
+      case VKI_IPC_SET|VKI_IPC_64:
+	 PRE_MEM_READ( "msgctl(IPC_SET, buf)", ARG5, 
+			sizeof(struct vki_msqid64_ds) );
+	 break;
+      default:
+	 break;
+      }
+      break;
+   }
+   case VKI_SHMAT:
+   {
+      UInt shmid = ARG2;
+      UInt segmentSize = get_shm_size ( shmid );
+      
+      /* If they didn't ask for a particular address, then place it
+	 like an mmap. */
+      if (ARG5 == 0)
+	 ARG5 = VG_(find_map_space)(0, segmentSize, True);
+      else if (!VG_(valid_client_addr)(ARG5, segmentSize, tid, "shmat"))
+	 SET_RESULT( -VKI_EINVAL );
+      break;
+   }
+   case VKI_SHMDT:
+      if (!VG_(valid_client_addr)(ARG5, 1, tid, "shmdt"))
+	 SET_RESULT( -VKI_EINVAL );
+      break;
+   case VKI_SHMGET:
+      break;
+   case VKI_SHMCTL: /* IPCOP_shmctl */
+   {
+      switch (ARG3 /* cmd */) {
+      case VKI_IPC_INFO:
+	 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)", ARG5, 
+			sizeof(struct vki_shminfo) );
+	 break;
+      case VKI_SHM_INFO:
+	 PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)", ARG5, 
+			sizeof(struct vki_shm_info) );
+	 break;
+      case VKI_IPC_STAT:
+      case VKI_SHM_STAT:
+	 PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)", ARG5, 
+			sizeof(struct vki_shmid_ds) );
+	 break;
+      case VKI_IPC_SET:
+	 PRE_MEM_READ( "shmctl(IPC_SET, buf)", ARG5, 
+			sizeof(struct vki_shmid_ds) );
+	 break;
+      case VKI_IPC_INFO|VKI_IPC_64:
+	 PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)", ARG5, 
+			sizeof(struct vki_shminfo64) );
+	 break;
+      case VKI_SHM_INFO|VKI_IPC_64:
+	 PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)", ARG5, 
+			sizeof(struct vki_shm_info) );
+	 break;
+      case VKI_IPC_STAT|VKI_IPC_64:
+      case VKI_SHM_STAT|VKI_IPC_64:
+	 PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)", ARG5, 
+			sizeof(struct vki_shmid64_ds) );
+	 break;
+      case VKI_IPC_SET|VKI_IPC_64:
+	 PRE_MEM_READ( "shmctl(IPC_SET, buf)", ARG5, 
+			sizeof(struct vki_shmid_ds) );
+	 break;
+      default:
+	 break;
+      }
+      break;
+   }
+   default:
+      VG_(message)(Vg_DebugMsg, "FATAL: unhandled syscall(ipc) %d", ARG1 );
+      VG_(core_panic)("... bye!\n");
+      break; /*NOTREACHED*/
+   }   
+}
+
+POST(sys_ipc)
+{
+   switch (ARG1 /* call */) {
+   case VKI_SEMOP:
+   case VKI_SEMGET:
+      break;
+   case VKI_SEMCTL:
+   {
+      union vki_semun *arg = (union vki_semun *)ARG5;
+      switch (ARG4 /* cmd */) {
+      case VKI_IPC_INFO:
+      case VKI_SEM_INFO:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->__buf, "semctl(arg)" );
+	 POST_MEM_WRITE( buf, sizeof(struct vki_seminfo) );
+	 break;
+      }
+      case VKI_IPC_STAT:
+      case VKI_SEM_STAT:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(arg)" );
+	 POST_MEM_WRITE( buf, sizeof(struct vki_semid_ds) );
+	 break;
+      }
+      case VKI_GETALL:
+      {
+         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(arg)" );
+         UInt nsems = get_sem_count( ARG2 );
+	 POST_MEM_WRITE( array, sizeof(short) * nsems );
+	 break;
+      }
+      case VKI_IPC_INFO|VKI_IPC_64:
+      case VKI_SEM_INFO|VKI_IPC_64:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->__buf, "semctl(arg)" );
+	 POST_MEM_WRITE( buf, sizeof(struct vki_seminfo) );
+	 break;
+      }
+      case VKI_IPC_STAT|VKI_IPC_64:
+      case VKI_SEM_STAT|VKI_IPC_64:
+      {
+         Addr buf = deref_Addr( tid, (Addr)&arg->buf, "semctl(arg)" );
+	 POST_MEM_WRITE( buf, sizeof(struct vki_semid64_ds) );
+	 break;
+      }
+      case VKI_GETALL|VKI_IPC_64:
+      {
+         Addr array = deref_Addr( tid, (Addr)&arg->array, "semctl(arg)" );
+         UInt nsems = get_sem_count( ARG2 );
+	 POST_MEM_WRITE( array, sizeof(short) * nsems );
+	 break;
+      }
+      default:
+	 break;
+      }
+      break;
+   }
+   case VKI_SEMTIMEDOP:
+   case VKI_MSGSND:
+      break;
+   case VKI_MSGRCV:
+   {
+      struct vki_msgbuf *msgp;
+ 
+      msgp = (struct vki_msgbuf *)deref_Addr( tid,
+					  (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgp),
+					  "msgrcv(msgp)" );
+      if ( RES > 0 ) {
+	 POST_MEM_WRITE( (Addr)&msgp->mtype, sizeof(msgp->mtype) );
+	 POST_MEM_WRITE( (Addr)msgp->mtext, RES );
+      }
+      break;
+   }
+   case VKI_MSGGET:
+      break;
+   case VKI_MSGCTL:
+   {
+      switch (ARG3 /* cmd */) {
+      case VKI_IPC_INFO:
+      case VKI_MSG_INFO:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_msginfo) );
+	 break;
+      case VKI_IPC_STAT:
+      case VKI_MSG_STAT:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_msqid_ds) );
+	 break;
+      case VKI_IPC_SET:
+	 break;
+      case VKI_IPC_INFO|VKI_IPC_64:
+      case VKI_MSG_INFO|VKI_IPC_64:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_msginfo) );
+	 break;
+      case VKI_IPC_STAT|VKI_IPC_64:
+      case VKI_MSG_STAT|VKI_IPC_64:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_msqid64_ds) );
+	 break;
+      case VKI_IPC_SET|VKI_IPC_64:
+	 break;
+      default:
+	 break;
+      }
+      break;
+   }
+   case VKI_SHMAT:
+   {
+      Int shmid = ARG2;
+      Int shmflag = ARG3;
+      Addr addr;
+
+      /* force readability. before the syscall it is
+       * indeed uninitialized, as can be seen in
+       * glibc/sysdeps/unix/sysv/linux/shmat.c */
+      POST_MEM_WRITE( ARG4, sizeof( ULong ) );
+
+      addr = deref_Addr ( tid, ARG4, "shmat(addr)" );
+      if ( addr > 0 ) { 
+	 UInt segmentSize = get_shm_size ( shmid );
+	 if ( segmentSize > 0 ) {
+	    UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
+	    /* we don't distinguish whether it's read-only or
+	     * read-write -- it doesn't matter really. */
+	    VG_TRACK( new_mem_mmap, addr, segmentSize, True, True, False );
+
+	    if (!(shmflag & 010000)) /* = SHM_RDONLY */
+	       prot &= ~VKI_PROT_WRITE;
+	    VG_(map_segment)(addr, segmentSize, prot, SF_SHARED|SF_SHM);
+	 }
+      }
+      break;
+   }
+   case VKI_SHMDT:
+   {
+      Segment *s = VG_(find_segment)(ARG5);
+
+      if (s != NULL && (s->flags & SF_SHM) && VG_(seg_contains)(s, ARG5, 1)) {
+	 VG_TRACK( die_mem_munmap, s->addr, s->len );
+	 VG_(unmap_range)(s->addr, s->len);
+      }
+      break;
+   }
+   case VKI_SHMGET:
+      break;
+   case VKI_SHMCTL:
+   {
+      switch (ARG3 /* cmd */) {
+      case VKI_IPC_INFO:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shminfo) );
+	 break;
+      case VKI_SHM_INFO:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shm_info) );
+	 break;
+      case VKI_IPC_STAT:
+      case VKI_SHM_STAT:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shmid_ds) );
+	 break;
+      case VKI_IPC_INFO|VKI_IPC_64:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shminfo64) );
+	 break;
+      case VKI_SHM_INFO|VKI_IPC_64:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shm_info) );
+	 break;
+      case VKI_IPC_STAT|VKI_IPC_64:
+      case VKI_SHM_STAT|VKI_IPC_64:
+	 POST_MEM_WRITE( ARG5, sizeof(struct vki_shmid64_ds) );
+	 break;
+      default:
+	 break;
+      }
+      break;
+   }
+   default:
+      VG_(message)(Vg_DebugMsg,
+		   "FATAL: unhandled syscall(ipc) %d",
+		   ARG1 );
+      VG_(core_panic)("... bye!\n");
+      break; /*NOTREACHED*/
+   }
+}
+
+PRE(sys_sigaction, SIG_SIM)
+{
+   PRINT("sys_sigaction ( %d, %p, %p )", ARG1,ARG2,ARG3);
+   PRE_REG_READ3(int, "sigaction",
+                 int, signum, const struct old_sigaction *, act,
+                 struct old_sigaction *, oldact)
+   if (ARG2 != 0)
+      PRE_MEM_READ( "sigaction(act)", ARG2, sizeof(struct vki_old_sigaction));
+   if (ARG3 != 0)
+      PRE_MEM_WRITE( "sigaction(oldact)", ARG3, sizeof(struct vki_old_sigaction));
+
+   if (SIGNAL_SIMULATION)
+      VG_(do_sys_sigaction)(tid);
+}
+
+POST(sys_sigaction)
+{
+   if (RES == 0 && ARG3 != 0)
+      POST_MEM_WRITE( ARG3, sizeof(struct vki_old_sigaction));
+}
+
 #undef PRE
 #undef POST
 
@@ -386,7 +874,7 @@
 
    GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
    GENX_(__NR_setsid,            sys_setsid),         // 66
-   GENXY(__NR_sigaction,         sys_sigaction),      // 67
+   PLAXY(__NR_sigaction,         sys_sigaction),      // 67
    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
 
@@ -446,7 +934,7 @@
 
    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux 
    LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
-   GENXY(__NR_ipc,               sys_ipc),            // 117
+   PLAXY(__NR_ipc,               sys_ipc),            // 117
    GENX_(__NR_fsync,             sys_fsync),          // 118
    //   (__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
 
diff --git a/coregrind/x86-linux/vki_unistd.h b/coregrind/x86-linux/vki_unistd.h
index 569f281..7676ea7 100644
--- a/coregrind/x86-linux/vki_unistd.h
+++ b/coregrind/x86-linux/vki_unistd.h
@@ -27,7 +27,7 @@
 #ifndef __X86_LINUX_VKI_UNISTD_H_
 #define __X86_LINUX_VKI_UNISTD_H_
 
-// From linux-2.6.0-test1/include/asm-i386/unistd.h
+// From linux-2.6.9/include/asm-i386/unistd.h
 
 #define __NR_restart_syscall      0
 #define __NR_exit		  1
@@ -313,5 +313,6 @@
 #define __NR_mq_notify		(__NR_mq_open+4)
 #define __NR_mq_getsetattr	(__NR_mq_open+5)
 #define __NR_sys_kexec_load	283
+#define __NR_wait		284
 
 #endif /* __X86_LINUX_VKI_UNISTD_H_ */
diff --git a/coregrind/x86/core_arch.h b/coregrind/x86/core_arch.h
index 064123f..657b7a9 100644
--- a/coregrind/x86/core_arch.h
+++ b/coregrind/x86/core_arch.h
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- x86-specific stuff for the core.             x86/core_arch.h ---*/
+/*--- Arch-specific stuff for the core.            x86/core_arch.h ---*/
 /*--------------------------------------------------------------------*/
 
 /*
@@ -68,16 +68,12 @@
    asm("movl %%ebp, %0" : "=r" (ebp));       \
 } while (0)
 
-// So the dispatch loop can find %EIP
-extern Int vgoff_m_eip;
-
-
 /* ---------------------------------------------------------------------
    Elf stuff
    ------------------------------------------------------------------ */
 
 #define VG_ELF_ENDIANNESS     ELFDATA2LSB
-#define VG_ELF_MACHINE        EM_386       
+#define VG_ELF_MACHINE        EM_386
 #define VG_ELF_CLASS          ELFCLASS32
 
 
diff --git a/coregrind/x86/libpthread.c b/coregrind/x86/libpthread.c
index 0830dab..0901de1 100644
--- a/coregrind/x86/libpthread.c
+++ b/coregrind/x86/libpthread.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- x86-specific libpthread code.               x86/libpthread.c ---*/
+/*--- Arch-specific libpthread code.              x86/libpthread.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/coregrind/x86/signals.c b/coregrind/x86/signals.c
index f41f24e..3391637 100644
--- a/coregrind/x86/signals.c
+++ b/coregrind/x86/signals.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- x86 signals, etc.                              x86/signals.c ---*/
+/*--- Arch-specific signals stuff.                   x86/signals.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/coregrind/x86/state.c b/coregrind/x86/state.c
index 0454a50..5459656 100644
--- a/coregrind/x86/state.c
+++ b/coregrind/x86/state.c
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- x86 registers, etc.                              x86/state.c ---*/
+/*--- Arch-specific registers, etc.                    x86/state.c ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/coregrind/x86/x86_private.h b/coregrind/x86/x86_private.h
index e307d8c..548232f 100644
--- a/coregrind/x86/x86_private.h
+++ b/coregrind/x86/x86_private.h
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Private x86 specific header.               x86/x86_private.h ---*/
+/*--- Private arch-specific header.              x86/x86_private.h ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c
index 2b6175b..e93bdea 100644
--- a/helgrind/hg_main.c
+++ b/helgrind/hg_main.c
@@ -153,6 +153,7 @@
    VgeInitStatus;
 
 
+// XXX: not 64-bit clean!
 /* Should add up to 32 to fit in one word */
 #define OTHER_BITS      30
 #define STATE_BITS      2
@@ -244,11 +245,6 @@
    return ((UWord)ec) >> STATE_BITS;
 }
 
-static inline ExeContext *unpackEC(UInt i)
-{
-   return (ExeContext *)(i << STATE_BITS);
-}
-
 /* Lose 2 LSB of IP */
 static inline UInt packIP(Addr ip)
 {
diff --git a/include/amd64-linux/Makefile.am b/include/amd64-linux/Makefile.am
new file mode 100644
index 0000000..2b7b7c0
--- /dev/null
+++ b/include/amd64-linux/Makefile.am
@@ -0,0 +1,5 @@
+incincdir = $(includedir)/valgrind/amd64-linux
+
+incinc_HEADERS = \
+    vki_arch.h \
+    vki_arch_posixtypes.h
diff --git a/include/amd64-linux/vki_arch.h b/include/amd64-linux/vki_arch.h
new file mode 100644
index 0000000..c912cc3
--- /dev/null
+++ b/include/amd64-linux/vki_arch.h
@@ -0,0 +1,525 @@
+
+/*--------------------------------------------------------------------*/
+/*--- AMD64/Linux-specific kernel interface.                       ---*/
+/*---                                       amd64-linux/vki_arch.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_LINUX_VKI_ARCH_H
+#define __AMD64_LINUX_VKI_ARCH_H
+
+// AMD64 is little-endian.
+#define VKI_LITTLE_ENDIAN  1
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/types.h
+//----------------------------------------------------------------------
+
+typedef unsigned char __vki_u8;
+
+typedef __signed__ short __vki_s16;
+typedef unsigned short __vki_u16;
+
+typedef unsigned int __vki_u32;
+
+typedef __signed__ long long __vki_s64;
+typedef unsigned long long __vki_u64;
+
+typedef unsigned short vki_u16;
+
+typedef unsigned int vki_u32;
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/page.h
+//----------------------------------------------------------------------
+
+#define VKI_PAGE_SHIFT	12
+#define VKI_PAGE_SIZE	(1UL << VKI_PAGE_SHIFT)
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/signal.h
+//----------------------------------------------------------------------
+
+#define _VKI_NSIG	64
+#define _VKI_NSIG_BPW	64
+#define _VKI_NSIG_WORDS	(_VKI_NSIG / _VKI_NSIG_BPW)
+
+typedef unsigned long vki_old_sigset_t;		/* at least 32 bits */
+
+typedef struct {
+	unsigned long sig[_VKI_NSIG_WORDS];
+} vki_sigset_t;
+
+#define VKI_SIGHUP		 1
+#define VKI_SIGINT		 2
+#define VKI_SIGQUIT		 3
+#define VKI_SIGILL		 4
+#define VKI_SIGTRAP		 5
+#define VKI_SIGABRT		 6
+#define VKI_SIGBUS		 7
+#define VKI_SIGFPE		 8
+#define VKI_SIGKILL		 9
+#define VKI_SIGUSR1		10
+#define VKI_SIGSEGV		11
+#define VKI_SIGUSR2		12
+#define VKI_SIGPIPE		13
+#define VKI_SIGALRM		14
+#define VKI_SIGTERM		15
+#define VKI_SIGSTKFLT		16
+#define VKI_SIGCHLD		17
+#define VKI_SIGCONT		18
+#define VKI_SIGSTOP		19
+#define VKI_SIGTSTP		20
+#define VKI_SIGTTIN		21
+#define VKI_SIGTTOU		22
+#define VKI_SIGURG		23
+#define VKI_SIGXCPU		24
+#define VKI_SIGXFSZ		25
+#define VKI_SIGVTALRM		26
+#define VKI_SIGPROF		27
+#define VKI_SIGWINCH		28
+#define VKI_SIGIO		29
+#define VKI_SIGPWR		30
+#define VKI_SIGSYS		31
+#define	VKI_SIGUNUSED		31
+
+#define VKI_SIGRTMIN		32
+#define VKI_SIGRTMAX		_VKI_NSIG
+
+#define VKI_SA_NOCLDSTOP	0x00000001
+#define VKI_SA_NOCLDWAIT	0x00000002
+#define VKI_SA_SIGINFO		0x00000004
+#define VKI_SA_ONSTACK		0x08000000
+#define VKI_SA_RESTART		0x10000000
+#define VKI_SA_NODEFER		0x40000000
+#define VKI_SA_RESETHAND	0x80000000
+
+#define VKI_SA_NOMASK	VKI_SA_NODEFER
+#define VKI_SA_ONESHOT	VKI_SA_RESETHAND
+
+#define VKI_SA_RESTORER	0x04000000
+
+#define VKI_SS_ONSTACK	1
+#define VKI_SS_DISABLE	2
+
+#define VKI_MINSIGSTKSZ	2048
+
+#define VKI_SIG_BLOCK          0	/* for blocking signals */
+#define VKI_SIG_UNBLOCK        1	/* for unblocking signals */
+#define VKI_SIG_SETMASK        2	/* for setting the signal mask */
+
+typedef void __vki_signalfn_t(int);
+typedef __vki_signalfn_t __user *__vki_sighandler_t;
+
+typedef void __vki_restorefn_t(void);
+typedef __vki_restorefn_t __user *__vki_sigrestore_t;
+
+#define VKI_SIG_DFL	((__vki_sighandler_t)0)	/* default signal handling */
+#define VKI_SIG_IGN	((__vki_sighandler_t)1)	/* ignore signal */
+
+struct vki_sigaction {
+        // [[Nb: a 'k' prefix is added to "sa_handler" because
+        // bits/sigaction.h (which gets dragged in somehow via signal.h)
+        // #defines it as something else.  Since that is done for glibc's
+        // purposes, which we don't care about here, we use our own name.]]
+	__vki_sighandler_t ksa_handler;
+	unsigned long sa_flags;
+	__vki_sigrestore_t sa_restorer;
+	vki_sigset_t sa_mask;		/* mask last for extensibility */
+};
+
+typedef struct vki_sigaltstack {
+	void __user *ss_sp;
+	int ss_flags;
+	vki_size_t ss_size;
+} vki_stack_t;
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/sigcontext.h
+//----------------------------------------------------------------------
+
+struct _vki_fpstate {
+	__vki_u16	cwd;
+	__vki_u16	swd;
+	__vki_u16	twd;	/* Note this is not the same as the 32bit/x87/FSAVE twd */
+	__vki_u16	fop;
+	__vki_u64	rip;
+	__vki_u64	rdp; 
+	__vki_u32	mxcsr;
+	__vki_u32	mxcsr_mask;
+	__vki_u32	st_space[32];	/* 8*16 bytes for each FP-reg */
+	__vki_u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg  */
+	__vki_u32	reserved2[24];
+};
+
+struct vki_sigcontext { 
+	unsigned long r8;
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long rdi;
+	unsigned long rsi;
+	unsigned long rbp;
+	unsigned long rbx;
+	unsigned long rdx;
+	unsigned long rax;
+	unsigned long rcx;
+	unsigned long rsp;
+	unsigned long rip;
+	unsigned long eflags;		/* RFLAGS */
+	unsigned short cs;
+	unsigned short gs;
+	unsigned short fs;
+	unsigned short __pad0; 
+	unsigned long err;
+	unsigned long trapno;
+	unsigned long oldmask;
+	unsigned long cr2;
+	struct _vki_fpstate __user *fpstate;	/* zero when no FPU context */
+	unsigned long reserved1[8];
+};
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/mman.h
+//----------------------------------------------------------------------
+
+#define VKI_PROT_READ	0x1		/* page can be read */
+#define VKI_PROT_WRITE	0x2		/* page can be written */
+#define VKI_PROT_EXEC	0x4		/* page can be executed */
+//#define VKI_PROT_NONE	0x0		/* page can not be accessed */
+
+#define VKI_MAP_SHARED	0x01		/* Share changes */
+#define VKI_MAP_PRIVATE	0x02		/* Changes are private */
+#define VKI_MAP_FIXED	0x10		/* Interpret addr exactly */
+#define VKI_MAP_ANONYMOUS	0x20	/* don't use a file */
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/fcntl.h
+//----------------------------------------------------------------------
+
+#define VKI_O_RDONLY	     00
+#define VKI_O_WRONLY	     01
+#define VKI_O_CREAT	   0100	/* not fcntl */
+#define VKI_O_EXCL	   0200	/* not fcntl */
+#define VKI_O_TRUNC	  01000	/* not fcntl */
+#define VKI_O_APPEND	  02000
+#define VKI_O_NONBLOCK	  04000
+
+#define VKI_F_DUPFD		0	/* dup */
+#define VKI_F_GETFD		1	/* get close_on_exec */
+#define VKI_F_SETFD		2	/* set/clear close_on_exec */
+#define VKI_F_GETFL		3	/* get file->f_flags */
+#define VKI_F_SETFL		4	/* set file->f_flags */
+#define VKI_F_GETLK		5
+#define VKI_F_SETLK		6
+#define VKI_F_SETLKW		7
+
+#define VKI_F_SETOWN		8	/*  for sockets. */
+#define VKI_F_GETOWN		9	/*  for sockets. */
+#define VKI_F_SETSIG		10	/*  for sockets. */
+#define VKI_F_GETSIG		11	/*  for sockets. */
+
+#define VKI_FD_CLOEXEC	1	/* actually anything with low bit set goes */
+
+#define VKI_F_LINUX_SPECIFIC_BASE	1024
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/resource.h
+//----------------------------------------------------------------------
+
+#define VKI_RLIMIT_DATA		2	/* max data size */
+#define VKI_RLIMIT_STACK	3	/* max stack size */
+#define VKI_RLIMIT_CORE		4	/* max core file size */
+#define VKI_RLIMIT_NOFILE	7	/* max number of open files */
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/socket.h
+//----------------------------------------------------------------------
+
+#define VKI_SOL_SOCKET	1
+
+#define VKI_SO_TYPE	3
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/sockios.h
+//----------------------------------------------------------------------
+
+#define VKI_SIOCSPGRP	0x8902
+#define VKI_SIOCGPGRP	0x8904
+#define VKI_SIOCGSTAMP	0x8906		/* Get stamp */
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/stat.h
+//----------------------------------------------------------------------
+
+struct vki_stat {
+	unsigned long	st_dev;
+	unsigned long	st_ino;
+	unsigned long	st_nlink;
+
+	unsigned int	st_mode;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	__pad0;
+	unsigned long	st_rdev;
+	long		st_size;
+	long		st_blksize;
+	long		st_blocks;	/* Number 512-byte blocks allocated. */
+
+	unsigned long	st_atime;
+	unsigned long 	st_atime_nsec; 
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long   st_ctime_nsec;
+  	long		__unused[3];
+};
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/statfs.h
+//----------------------------------------------------------------------
+
+struct vki_statfs {
+	long f_type;
+	long f_bsize;
+	long f_blocks;
+	long f_bfree;
+	long f_bavail;
+	long f_files;
+	long f_ffree;
+	__vki_kernel_fsid_t f_fsid;
+	long f_namelen;
+	long f_frsize;
+	long f_spare[5];
+};
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/termios.h
+//----------------------------------------------------------------------
+
+struct vki_winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define VKI_NCC 8
+struct vki_termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[VKI_NCC];	/* control characters */
+};
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/termbits.h
+//----------------------------------------------------------------------
+
+typedef unsigned char	vki_cc_t;
+typedef unsigned int	vki_tcflag_t;
+
+#define VKI_NCCS 19
+struct vki_termios {
+	vki_tcflag_t c_iflag;		/* input mode flags */
+	vki_tcflag_t c_oflag;		/* output mode flags */
+	vki_tcflag_t c_cflag;		/* control mode flags */
+	vki_tcflag_t c_lflag;		/* local mode flags */
+	vki_cc_t c_line;		/* line discipline */
+	vki_cc_t c_cc[VKI_NCCS];	/* control characters */
+};
+
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/ioctl.h
+//----------------------------------------------------------------------
+
+#define _VKI_IOC_NRBITS		8
+#define _VKI_IOC_TYPEBITS	8
+#define _VKI_IOC_SIZEBITS	14
+#define _VKI_IOC_DIRBITS	2
+
+#define _VKI_IOC_SIZEMASK	((1 << _VKI_IOC_SIZEBITS)-1)
+#define _VKI_IOC_DIRMASK	((1 << _VKI_IOC_DIRBITS)-1)
+
+#define _VKI_IOC_NRSHIFT	0
+#define _VKI_IOC_TYPESHIFT	(_VKI_IOC_NRSHIFT+_VKI_IOC_NRBITS)
+#define _VKI_IOC_SIZESHIFT	(_VKI_IOC_TYPESHIFT+_VKI_IOC_TYPEBITS)
+#define _VKI_IOC_DIRSHIFT	(_VKI_IOC_SIZESHIFT+_VKI_IOC_SIZEBITS)
+
+#define _VKI_IOC_NONE	0U
+#define _VKI_IOC_WRITE	1U
+#define _VKI_IOC_READ	2U
+
+#define _VKI_IOC(dir,type,nr,size) \
+	(((dir)  << _VKI_IOC_DIRSHIFT) | \
+	 ((type) << _VKI_IOC_TYPESHIFT) | \
+	 ((nr)   << _VKI_IOC_NRSHIFT) | \
+	 ((size) << _VKI_IOC_SIZESHIFT))
+
+#define _VKI_IO(type,nr)		_VKI_IOC(_VKI_IOC_NONE,(type),(nr),0)
+#define _VKI_IOR(type,nr,size)	_VKI_IOC(_VKI_IOC_READ,(type),(nr),sizeof(size))
+#define _VKI_IOW(type,nr,size)	_VKI_IOC(_VKI_IOC_WRITE,(type),(nr),sizeof(size))
+#define _VKI_IOWR(type,nr,size)	_VKI_IOC(_VKI_IOC_READ|_VKI_IOC_WRITE,(type),(nr),sizeof(size))
+
+#define _VKI_IOC_DIR(nr)		(((nr) >> _VKI_IOC_DIRSHIFT) & _VKI_IOC_DIRMASK)
+#define _VKI_IOC_SIZE(nr)		(((nr) >> _VKI_IOC_SIZESHIFT) & _VKI_IOC_SIZEMASK)
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/ioctls.h
+//----------------------------------------------------------------------
+
+#define VKI_TCGETS	0x5401
+#define VKI_TCSETS	0x5402
+#define VKI_TCSETSW	0x5403
+#define VKI_TCSETSF	0x5404
+#define VKI_TCGETA	0x5405
+#define VKI_TCSETA	0x5406
+#define VKI_TCSETAW	0x5407
+#define VKI_TCSETAF	0x5408
+#define VKI_TCSBRK	0x5409
+#define VKI_TCXONC	0x540A
+#define VKI_TCFLSH	0x540B
+#define VKI_TIOCSCTTY	0x540E
+#define VKI_TIOCGPGRP	0x540F
+#define VKI_TIOCSPGRP	0x5410
+#define VKI_TIOCOUTQ	0x5411
+#define VKI_TIOCGWINSZ	0x5413
+#define VKI_TIOCSWINSZ	0x5414
+#define VKI_TIOCMBIS	0x5416
+#define VKI_TIOCMBIC	0x5417
+#define VKI_TIOCMSET	0x5418
+#define VKI_FIONREAD	0x541B
+#define VKI_TIOCLINUX	0x541C
+#define VKI_FIONBIO	0x5421
+#define VKI_TCSBRKP	0x5425	/* Needed for POSIX tcsendbreak() */
+#define VKI_TIOCGPTN	_VKI_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define VKI_TIOCSPTLCK	_VKI_IOW('T',0x31, int)  /* Lock/unlock Pty */
+
+#define VKI_FIOASYNC	0x5452
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/poll.h
+//----------------------------------------------------------------------
+
+#define VKI_POLLIN		0x0001
+
+struct vki_pollfd {
+	int fd;
+	short events;
+	short revents;
+};
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/user.h
+//----------------------------------------------------------------------
+
+struct vki_user_regs_struct {
+	unsigned long r15,r14,r13,r12,rbp,rbx,r11,r10;
+	unsigned long r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
+	unsigned long rip,cs,eflags;
+	unsigned long rsp,ss;
+  	unsigned long fs_base, gs_base;
+	unsigned long ds,es,fs,gs; 
+}; 
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/elf.h
+//----------------------------------------------------------------------
+
+typedef unsigned long vki_elf_greg_t;
+
+#define VKI_ELF_NGREG (sizeof (struct vki_user_regs_struct) / sizeof(vki_elf_greg_t))
+typedef vki_elf_greg_t vki_elf_gregset_t[VKI_ELF_NGREG];
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/ucontext.h
+//----------------------------------------------------------------------
+
+struct vki_ucontext {
+	unsigned long		uc_flags;
+	struct vki_ucontext    *uc_link;
+	vki_stack_t		uc_stack;
+	struct vki_sigcontext	uc_mcontext;
+	vki_sigset_t		uc_sigmask;	/* mask last for extensibility */
+};
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/segment.h
+//----------------------------------------------------------------------
+
+#define VKI_GDT_ENTRY_TLS_ENTRIES 3
+
+#define VKI_GDT_ENTRY_TLS_MIN 11
+#define VKI_GDT_ENTRY_TLS_MAX 13
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/ldt.h
+//----------------------------------------------------------------------
+
+// I think this LDT stuff will have to be reinstated for amd64, but I'm not
+// certain.  Note that the type here is very slightly different to the
+// type for x86 (the final 'lm' field is added);  I'm not sure about the
+// significance of that... --njn
+
+#if 0
+/* [[Nb: This is the structure passed to the modify_ldt syscall.  Just so as
+   to confuse and annoy everyone, this is _not_ the same as an
+   VgLdtEntry and has to be translated into such.  The logic for doing
+   so, in vg_ldt.c, is copied from the kernel sources.]] */
+struct vki_user_desc {
+	unsigned int  entry_number;
+	unsigned long base_addr;
+	unsigned int  limit;
+	unsigned int  seg_32bit:1;
+	unsigned int  contents:2;
+	unsigned int  read_exec_only:1;
+	unsigned int  limit_in_pages:1;
+	unsigned int  seg_not_present:1;
+	unsigned int  useable:1;
+        unsigned int  lm:1;
+};
+
+// [[Nb: for our convenience within Valgrind, use a more specific name]]
+typedef struct vki_user_desc vki_modify_ldt_t;
+#endif
+
+//----------------------------------------------------------------------
+// And that's it!
+//----------------------------------------------------------------------
+
+#endif // __AMD64_LINUX_VKI_ARCH_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/amd64-linux/vki_arch_posixtypes.h b/include/amd64-linux/vki_arch_posixtypes.h
new file mode 100644
index 0000000..54f1eed
--- /dev/null
+++ b/include/amd64-linux/vki_arch_posixtypes.h
@@ -0,0 +1,68 @@
+
+/*--------------------------------------------------------------------*/
+/*--- AMD64/Linux-specific kernel interface: posix types.          ---*/
+/*---                            amd64-linux/vki_arch_posixtypes.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_LINUX_VKI_ARCH_POSIXTYPES_H
+#define __AMD64_LINUX_VKI_ARCH_POSIXTYPES_H
+
+//----------------------------------------------------------------------
+// From linux-2.6.9/include/asm-x86_64/posix_types.h
+//----------------------------------------------------------------------
+
+typedef unsigned int	__vki_kernel_mode_t;
+typedef long		__vki_kernel_off_t;
+typedef int		__vki_kernel_pid_t;
+typedef int		__vki_kernel_ipc_pid_t;
+typedef unsigned int	__vki_kernel_uid_t;
+typedef unsigned int	__vki_kernel_gid_t;
+typedef unsigned long	__vki_kernel_size_t;
+typedef long		__vki_kernel_time_t;
+typedef long		__vki_kernel_suseconds_t;
+typedef long		__vki_kernel_clock_t;
+typedef int		__vki_kernel_timer_t;
+typedef int		__vki_kernel_clockid_t;
+typedef char *		__vki_kernel_caddr_t;
+
+typedef long long	__vki_kernel_loff_t;
+
+typedef struct {
+	int	val[2];
+} __vki_kernel_fsid_t;
+
+typedef unsigned short __vki_kernel_old_uid_t;
+typedef unsigned short __vki_kernel_old_gid_t;
+typedef __vki_kernel_uid_t __vki_kernel_uid32_t;
+typedef __vki_kernel_gid_t __vki_kernel_gid32_t;
+
+#endif // __AMD64_LINUX_VKI_ARCH_POSIXTYPES_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/amd64/Makefile.am b/include/amd64/Makefile.am
new file mode 100644
index 0000000..5deb528
--- /dev/null
+++ b/include/amd64/Makefile.am
@@ -0,0 +1,4 @@
+incincdir = $(includedir)/valgrind/amd64
+
+incinc_HEADERS = tool_arch.h
+
diff --git a/include/amd64/tool_arch.h b/include/amd64/tool_arch.h
new file mode 100644
index 0000000..b307f83
--- /dev/null
+++ b/include/amd64/tool_arch.h
@@ -0,0 +1,46 @@
+/*--------------------------------------------------------------------*/
+/*---                                            amd64/tool_arch.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an extensible x86 protected-mode
+   emulator for monitoring program execution on x86-Unixes.
+
+   Copyright (C) 2000-2004 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __AMD64_TOOL_ARCH_H
+#define __AMD64_TOOL_ARCH_H
+
+/*====================================================================*/
+/*=== Registers, etc                                               ===*/
+/*====================================================================*/
+
+#define REGPARM(n)
+
+#define MIN_INSTR_SIZE     1
+#define MAX_INSTR_SIZE    16
+
+#endif   // __AMD64_TOOL_ARCH_H
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/arm-linux/vki_arch_posixtypes.h b/include/arm-linux/vki_arch_posixtypes.h
index 9f57856..b923708 100644
--- a/include/arm-linux/vki_arch_posixtypes.h
+++ b/include/arm-linux/vki_arch_posixtypes.h
@@ -1,7 +1,7 @@
 
 /*--------------------------------------------------------------------*/
 /*--- ARM/Linux-specific kernel interface: posix types.            ---*/
-/*---                                        vki_arch_posixtypes.h ---*/
+/*---                              arm-linux/vki_arch_posixtypes.h ---*/
 /*--------------------------------------------------------------------*/
 
 /*
@@ -61,7 +61,6 @@
 	int	val[2];
 } __vki_kernel_fsid_t;
 
-
 #endif // __ARM_LINUX_VKI_ARCH_POSIXTYPES_H
 
 /*--------------------------------------------------------------------*/
diff --git a/include/tool.h.base b/include/tool.h.base
index 1681351..9722c2f 100644
--- a/include/tool.h.base
+++ b/include/tool.h.base
@@ -274,9 +274,10 @@
 /* 64-bit counter for the number of basic blocks done. */
 extern ULong VG_(bbs_done);
 
-/* Check if an address is 4-byte aligned */
-#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3))
-#define IS_ALIGNED8_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 7))
+/* Check if an address is aligned */
+#define IS_ALIGNED4_ADDR(aaa_p)     (0 == (((Addr)(aaa_p)) & 3))
+#define IS_ALIGNED8_ADDR(aaa_p)     (0 == (((Addr)(aaa_p)) & 7))
+#define IS_WORD_ALIGNED_ADDR(aaa_p) (0 == (((Addr)(aaa_p)) & (sizeof(Addr)-1)))
 
 
 /* ------------------------------------------------------------------ */
@@ -541,7 +542,7 @@
 extern Int VG_(sigaltstack) ( const vki_stack_t* ss, vki_stack_t* oss );
 
 extern Int VG_(kill)        ( Int pid, Int signo );
-extern Int VG_(tkill)       ( Int pid, Int signo );
+extern Int VG_(tkill)       ( ThreadId tid, Int signo );
 extern Int VG_(sigpending)  ( vki_sigset_t* set );
 
 extern Int VG_(waitpid)	    ( Int pid, Int *status, Int options );
diff --git a/include/valgrind.h.in b/include/valgrind.h.in
index 9718c0f..cf28de9 100644
--- a/include/valgrind.h.in
+++ b/include/valgrind.h.in
@@ -98,6 +98,50 @@
    eg. x86/ subdirectories like we do within the rest of Valgrind.
 */
 
+#ifdef __arm__
+// XXX: termporary, until MAGIC_SEQUENCE is written properly
+extern int printf (__const char *__restrict __format, ...);
+extern void exit (int __status);
+#define VALGRIND_MAGIC_SEQUENCE(                                        \
+        _zzq_rlval, _zzq_default, _zzq_request,                         \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4)                     \
+                                                                        \
+  { volatile unsigned int _zzq_args[5];                                 \
+    _zzq_args[0] = (volatile unsigned int)(_zzq_request);               \
+    _zzq_args[1] = (volatile unsigned int)(_zzq_arg1);                  \
+    _zzq_args[2] = (volatile unsigned int)(_zzq_arg2);                  \
+    _zzq_args[3] = (volatile unsigned int)(_zzq_arg3);                  \
+    _zzq_args[4] = (volatile unsigned int)(_zzq_arg4);                  \
+    (_zzq_rlval) = (_zzq_default);/* temporary only */  \
+    printf("argh: MAGIC_SEQUENCE"); exit(1); \
+    asm volatile("");                                                   \
+  }
+// XXX: make sure that the register holding the args and the register taking
+// the return value match ARCH_CLREQ_ARGS and ARCH_CLREQ_RET in
+// arm/core_arch.h!
+#endif  // __arm__
+#ifdef __amd64__
+// XXX: termporary, until MAGIC_SEQUENCE is written properly
+extern int printf (__const char *__restrict __format, ...);
+extern void exit (int __status);
+#define VALGRIND_MAGIC_SEQUENCE(                                        \
+        _zzq_rlval, _zzq_default, _zzq_request,                         \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4)                     \
+                                                                        \
+  { volatile unsigned long _zzq_args[5];                                \
+    _zzq_args[0] = (volatile unsigned long)(_zzq_request);              \
+    _zzq_args[1] = (volatile unsigned long)(_zzq_arg1);                 \
+    _zzq_args[2] = (volatile unsigned long)(_zzq_arg2);                 \
+    _zzq_args[3] = (volatile unsigned long)(_zzq_arg3);                 \
+    _zzq_args[4] = (volatile unsigned long)(_zzq_arg4);                 \
+    (_zzq_rlval) = (_zzq_default);/* temporary only */  \
+    printf("argh: MAGIC_SEQUENCE"); exit(1); \
+    asm volatile("");                                                   \
+  }
+// XXX: make sure that the register holding the args and the register taking
+// the return value match ARCH_CLREQ_ARGS and ARCH_CLREQ_RET in
+// amd64/core_arch.h!
+#endif  // __amd64__
 #ifdef __x86__
 #define VALGRIND_MAGIC_SEQUENCE(                                        \
         _zzq_rlval, _zzq_default, _zzq_request,                         \
@@ -121,29 +165,6 @@
                 );                                                      \
   }
 #endif  // __x86__
-#ifdef __arm__
-// XXX: termporary, until MAGIC_SEQUENCE is written properly
-extern int printf (__const char *__restrict __format, ...);
-extern void exit (int __status);
-#define VALGRIND_MAGIC_SEQUENCE(                                        \
-        _zzq_rlval, _zzq_default, _zzq_request,                         \
-        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4)                     \
-                                                                        \
-  { volatile unsigned int _zzq_args[5];                                 \
-    _zzq_args[0] = (volatile unsigned int)(_zzq_request);               \
-    _zzq_args[1] = (volatile unsigned int)(_zzq_arg1);                  \
-    _zzq_args[2] = (volatile unsigned int)(_zzq_arg2);                  \
-    _zzq_args[3] = (volatile unsigned int)(_zzq_arg3);                  \
-    _zzq_args[4] = (volatile unsigned int)(_zzq_arg4);                  \
-    (_zzq_rlval) = (_zzq_default);/* temporary only */  \
-    printf("argh: MAGIC_SEQUENCE"); exit(1); \
-    asm volatile("");                                                   \
-  }
-// XXX: make sure that the register holding the args and the register taking
-// the return value match ARCH_CLREQ_ARGS and ARCH_CLREQ_RET in
-// arm/core_arch.h!
-#endif  // __arm__
-
 // Insert assembly code for other architectures here...
 
 #else  /* NVALGRIND */
@@ -241,13 +262,13 @@
 int
 VALGRIND_PRINTF(const char *format, ...)
 {
-   unsigned int _qzz_res;
+   unsigned long _qzz_res;
    va_list vargs;
    va_start(vargs, format);
    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__PRINTF,
-                           (unsigned int)format, (unsigned int)vargs, 0, 0);
+                           (unsigned long)format, (unsigned long)vargs, 0, 0);
    va_end(vargs);
-   return _qzz_res;
+   return (int)_qzz_res;
 }
 
 int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
@@ -256,13 +277,13 @@
 int
 VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
 {
-   unsigned int _qzz_res;
+   unsigned long _qzz_res;
    va_list vargs;
    va_start(vargs, format);
    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE,
-                           (unsigned int)format, (unsigned int)vargs, 0, 0);
+                           (unsigned long)format, (unsigned long)vargs, 0, 0);
    va_end(vargs);
-   return _qzz_res;
+   return (int)_qzz_res;
 }
 
 #else /* NVALGRIND */
@@ -275,7 +296,7 @@
 /* These requests allow control to move from the simulated CPU to the
    real CPU, calling an arbitary function */
 #define VALGRIND_NON_SIMD_CALL0(_qyy_fn)                       \
-   ({unsigned int _qyy_res;                                    \
+   ({unsigned long _qyy_res;                                   \
     VALGRIND_MAGIC_SEQUENCE(_qyy_res, 0 /* default return */,  \
                             VG_USERREQ__CLIENT_CALL0,          \
                             _qyy_fn,                           \
@@ -284,7 +305,7 @@
    })
 
 #define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1)            \
-   ({unsigned int _qyy_res;                                    \
+   ({unsigned long _qyy_res;                                   \
     VALGRIND_MAGIC_SEQUENCE(_qyy_res, 0 /* default return */,  \
                             VG_USERREQ__CLIENT_CALL1,          \
                             _qyy_fn,                           \
@@ -293,7 +314,7 @@
    })
 
 #define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \
-   ({unsigned int _qyy_res;                                    \
+   ({unsigned long _qyy_res;                                   \
     VALGRIND_MAGIC_SEQUENCE(_qyy_res, 0 /* default return */,  \
                             VG_USERREQ__CLIENT_CALL2,          \
                             _qyy_fn,                           \
@@ -302,7 +323,7 @@
    })
 
 #define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3)  \
-   ({unsigned int _qyy_res;                                          \
+   ({unsigned long _qyy_res;                                         \
     VALGRIND_MAGIC_SEQUENCE(_qyy_res, 0 /* default return */,        \
                             VG_USERREQ__CLIENT_CALL3,                \
                             _qyy_fn,                                 \
diff --git a/include/x86-linux/vki_arch.h b/include/x86-linux/vki_arch.h
index f1c4488..5a4075b 100644
--- a/include/x86-linux/vki_arch.h
+++ b/include/x86-linux/vki_arch.h
@@ -433,7 +433,7 @@
 // From linux-2.6.8.1/include/asm-i386/ioctl.h
 //----------------------------------------------------------------------
 
-#define _VKI_IOC_NRBITS	8
+#define _VKI_IOC_NRBITS		8
 #define _VKI_IOC_TYPEBITS	8
 #define _VKI_IOC_SIZEBITS	14
 #define _VKI_IOC_DIRBITS	2
@@ -693,6 +693,20 @@
 	long msgtyp;
 };
 
+#define VKI_SEMOP		 1
+#define VKI_SEMGET		 2
+#define VKI_SEMCTL		 3
+#define VKI_SEMTIMEDOP	 	 4
+#define VKI_MSGSND		11
+#define VKI_MSGRCV		12
+#define VKI_MSGGET		13
+#define VKI_MSGCTL		14
+#define VKI_SHMAT		21
+#define VKI_SHMDT		22
+#define VKI_SHMGET		23
+#define VKI_SHMCTL		24
+
+
 //----------------------------------------------------------------------
 // From linux-2.6.8.1/include/asm-i386/shmbuf.h
 //----------------------------------------------------------------------
@@ -726,7 +740,7 @@
 };
 
 //----------------------------------------------------------------------
-// From linux-2.6.9/include/asm-i386/shmbuf.h
+// From linux-2.6.9/include/asm-i386/ptrace.h
 //----------------------------------------------------------------------
 
 #define VKI_PTRACE_GETREGS            12
diff --git a/include/x86-linux/vki_arch_posixtypes.h b/include/x86-linux/vki_arch_posixtypes.h
index 5bc6a76..9a0555e 100644
--- a/include/x86-linux/vki_arch_posixtypes.h
+++ b/include/x86-linux/vki_arch_posixtypes.h
@@ -1,7 +1,7 @@
 
 /*--------------------------------------------------------------------*/
 /*--- x86/Linux-specific kernel interface: posix types.            ---*/
-/*---                                        vki_arch_posixtypes.h ---*/
+/*---                              x86-linux/vki_arch_posixtypes.h ---*/
 /*--------------------------------------------------------------------*/
 
 /*
diff --git a/memcheck/mac_leakcheck.c b/memcheck/mac_leakcheck.c
index df2a907..edae5a8 100644
--- a/memcheck/mac_leakcheck.c
+++ b/memcheck/mac_leakcheck.c
@@ -501,7 +501,7 @@
       print_record = ( MAC_(clo_show_reachable) || Proper != p_min->loss_mode );
       is_suppressed = 
          VG_(unique_error) ( tid, LeakErr, (UInt)i+1,
-                             (Char*)n_lossrecords, (void*) p_min,
+                             (Char*)(UWord)n_lossrecords, (void*) p_min,
                              p_min->allocated_at, print_record,
                              /*allow_GDB_attach*/False, /*count_error*/False );
 
diff --git a/memcheck/mac_needs.c b/memcheck/mac_needs.c
index 14edf23..91ce790 100644
--- a/memcheck/mac_needs.c
+++ b/memcheck/mac_needs.c
@@ -320,8 +320,8 @@
       }
       case LeakErr: {
          /* Totally abusing the types of these spare fields... oh well. */
-         UInt n_this_record   = (UInt)VG_(get_error_address)(err);
-         UInt n_total_records = (UInt)VG_(get_error_string) (err);
+         UInt n_this_record   = (UWord)VG_(get_error_address)(err);
+         UInt n_total_records = (UWord)VG_(get_error_string) (err);
 
          MAC_(pp_LeakError)(err_extra, n_this_record, n_total_records);
          break;
diff --git a/memcheck/mac_replace_strmem.c b/memcheck/mac_replace_strmem.c
index c268a2d..5dd8d09 100644
--- a/memcheck/mac_replace_strmem.c
+++ b/memcheck/mac_replace_strmem.c
@@ -65,7 +65,7 @@
    circumstance, presumably).
 */
 static __inline__
-Bool is_overlap ( void* dst, const void* src, UInt dstlen, UInt srclen )
+Bool is_overlap ( void* dst, const void* src, SizeT dstlen, SizeT srclen )
 {
    Addr loS, hiS, loD, hiD;
 
@@ -155,11 +155,11 @@
    return dst_orig;
 }
 
-char* strncat ( char* dst, const char* src, unsigned int n )
+char* strncat ( char* dst, const char* src, SizeT n )
 {
    const Char* src_orig = src;
          Char* dst_orig = dst;
-   UInt  m = 0;
+   SizeT m = 0;
 
    while (*dst) dst++;
    while (m   < n && *src) { m++; *dst++ = *src++; } /* concat <= n chars */
@@ -176,16 +176,16 @@
    return dst_orig;
 }
 
-unsigned int strnlen ( const char* str, unsigned int n )
+SizeT strnlen ( const char* str, SizeT n )
 {
-   UInt i = 0;
+   SizeT i = 0;
    while (i < n && str[i] != 0) i++;
    return i;
 }
 
-unsigned int strlen ( const char* str )
+SizeT strlen ( const char* str )
 {
-   UInt i = 0;
+   SizeT i = 0;
    while (str[i] != 0) i++;
    return i;
 }
@@ -209,11 +209,11 @@
    return dst_orig;
 }
 
-char* strncpy ( char* dst, const char* src, unsigned int n )
+char* strncpy ( char* dst, const char* src, SizeT n )
 {
    const Char* src_orig = src;
          Char* dst_orig = dst;
-   UInt  m = 0;
+   SizeT m = 0;
 
    while (m   < n && *src) { m++; *dst++ = *src++; }
    /* Check for overlap after copying; all n bytes of dst are relevant,
@@ -225,9 +225,9 @@
    return dst_orig;
 }
 
-int strncmp ( const char* s1, const char* s2, unsigned int nmax )
+int strncmp ( const char* s1, const char* s2, SizeT nmax )
 {
-   unsigned int n = 0;
+   SizeT n = 0;
    while (True) {
       if (n >= nmax) return 0;
       if (*s1 == 0 && *s2 == 0) return 0;
@@ -257,9 +257,9 @@
    return 0;
 }
 
-void* memchr(const void *s, int c, unsigned int n)
+void* memchr(const void *s, int c, SizeT n)
 {
-   unsigned int i;
+   SizeT i;
    UChar c0 = (UChar)c;
    UChar* p = (UChar*)s;
    for (i = 0; i < n; i++)
@@ -267,7 +267,7 @@
    return NULL;
 }
 
-void* memcpy( void *dst, const void *src, unsigned int len )
+void* memcpy( void *dst, const void *src, SizeT len )
 {
    register char *d;
    register char *s;
@@ -308,7 +308,7 @@
    return dst;
 }
 
-int memcmp ( const void *s1V, const void *s2V, unsigned int n )
+int memcmp ( const void *s1V, const void *s2V, SizeT n )
 {
    int res;
    unsigned char a0;
diff --git a/memcheck/mac_shared.h b/memcheck/mac_shared.h
index 15bc80f..a20634e 100644
--- a/memcheck/mac_shared.h
+++ b/memcheck/mac_shared.h
@@ -139,6 +139,7 @@
    struct _MAC_Chunk {
       struct _MAC_Chunk* next;
       Addr          data;           // ptr to actual block
+      // XXX: not 64-bit clean!
       UInt          size : 30;      // size requested
       MAC_AllocKind allockind : 2;  // which wrapper did the allocation
       ExeContext*   where;          // where it was allocated
diff --git a/memcheck/tests/amd64/Makefile.am b/memcheck/tests/amd64/Makefile.am
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/memcheck/tests/amd64/Makefile.am
diff --git a/none/tests/amd64/Makefile.am b/none/tests/amd64/Makefile.am
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/none/tests/amd64/Makefile.am
diff --git a/tests/cputest.c b/tests/cputest.c
index ac6d786..9f464a5 100644
--- a/tests/cputest.c
+++ b/tests/cputest.c
@@ -15,13 +15,43 @@
 typedef int    Bool;
 
 char* all_archs[] = {
-   "x86",
-   "arm",
    "amd64",
+   "arm",
    "ppc",
+   "x86",
    NULL
 };
 
+#ifdef __amd64
+static Bool go(char* cpu)
+{
+   if ( strcmp( cpu, "amd64" ) == 0 )
+      return True;
+   else 
+      return False;
+}
+#endif // __amd64
+
+#ifdef __arm__
+static Bool go(char* cpu)
+{
+   if ( strcmp( cpu, "arm" ) == 0 )
+      return True;
+   else 
+      return False;
+}
+#endif // __arm__
+
+#ifdef __ppc__
+static Bool go(char* cpu)
+{
+   if ( strcmp( cpu, "ppc" ) == 0 )
+      return True;
+   else 
+      return False;
+}
+#endif // __ppc__
+
 #ifdef __x86__
 static __inline__ void cpuid(unsigned int n,
                              unsigned int *a, unsigned int *b,
@@ -74,26 +104,6 @@
 #endif // __x86__
 
 
-#ifdef __arm__
-static Bool go(char* cpu)
-{
-   if ( strcmp( cpu, "arm" ) == 0 )
-      return True;
-   else 
-      return False;
-}
-#endif // __arm__
-
-#ifdef __ppc__
-static Bool go(char* cpu)
-{
-   if ( strcmp( cpu, "ppc" ) == 0 )
-      return True;
-   else 
-      return False;
-}
-#endif // __ppc__
-
 int main(int argc, char **argv)
 {
    int i;
diff --git a/valgrind.spec.in b/valgrind.spec.in
index f1b46b7..5c4e3d3 100644
--- a/valgrind.spec.in
+++ b/valgrind.spec.in
@@ -39,8 +39,14 @@
 /usr/include/valgrind/basic_types.h
 /usr/include/valgrind/tool.h
 /usr/include/valgrind/tool_asm.h
+/usr/include/valgrind/amd64/tool_arch.h
+/usr/include/valgrind/arm/tool_arch.h
 /usr/include/valgrind/x86/tool_arch.h
 /usr/include/valgrind/linux/vki.h
+/usr/include/valgrind/amd64-linux/vki_arch.h
+/usr/include/valgrind/amd64-linux/vki_arch_posixtypes.h
+/usr/include/valgrind/arm-linux/vki_arch.h
+/usr/include/valgrind/arm-linux/vki_arch_posixtypes.h
 /usr/include/valgrind/x86-linux/vki_arch.h
 /usr/include/valgrind/x86-linux/vki_arch_posixtypes.h
 /usr/bin/valgrind